Fix tool_choice=required to return after tool execution

eavanvalkenburg · eavanvalkenburg · commit 1c6f579da80d · 2026-02-04T10:46:29.000+01:00
When tool_choice is 'required', the user's intent is to force exactly one tool call. After the tool executes, return immediately with the function call and result - don't continue to call the model again. This fixes integration tests that were failing with empty text responses because with tool_choice=required, the model would keep returning function calls instead of text. Also adds regression tests for: - conversation_id propagation between tool iterations (from PR #3664) - tool_choice=required returns after tool execution
diff --git a/python/packages/azure-ai/tests/test_azure_ai_client.py b/python/packages/azure-ai/tests/test_azure_ai_client.py
@@ -1391,12 +1391,26 @@ async def test_integration_options(
 
         assert response is not None
         assert isinstance(response, ChatResponse)
-        assert response.text is not None, f"No text in response for option '{option_name}'"
-        assert len(response.text) > 0, f"Empty response for option '{option_name}'"
+
+        # For tool_choice="required", we return after tool execution without a model text response
+        is_required_tool_choice = option_name == "tool_choice" and (
+            option_value == "required" or (isinstance(option_value, dict) and option_value.get("mode") == "required")
+        )
+
+        if is_required_tool_choice:
+            # Response should have function call and function result, but no text from model
+            assert len(response.messages) >= 2, f"Expected function call + result for {option_name}"
+            has_function_call = any(c.type == "function_call" for msg in response.messages for c in msg.contents)
+            has_function_result = any(c.type == "function_result" for msg in response.messages for c in msg.contents)
+            assert has_function_call, f"No function call in response for {option_name}"
+            assert has_function_result, f"No function result in response for {option_name}"
+        else:
+            assert response.text is not None, f"No text in response for option '{option_name}'"
+            assert len(response.text) > 0, f"Empty response for option '{option_name}'"
 
         # Validate based on option type
         if needs_validation:
-            if option_name.startswith("tool_choice"):
+            if option_name.startswith("tool_choice") and not is_required_tool_choice:
                 # Should have called the weather function
                 text = response.text.lower()
                 assert "sunny" in text or "seattle" in text, f"Tool not invoked for {option_name}"
diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py
@@ -2194,6 +2194,11 @@ async def _get_response() -> ChatResponse:
                         break
                     errors_in_a_row = result["errors_in_a_row"]
 
+                    # When tool_choice is 'required', return after tool execution
+                    # The user's intent is to force exactly one tool call and get the result
+                    if mutable_options.get("tool_choice") == "required":
+                        return response
+
                     if response.conversation_id is not None:
                         # For conversation-based APIs, the server already has the function call message.
                         # Only send the new function result message (added by _handle_function_call_results).
@@ -2300,6 +2305,11 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                 if result["action"] != "continue":
                     return
 
+                # When tool_choice is 'required', return after tool execution
+                # The user's intent is to force exactly one tool call and get the result
+                if mutable_options.get("tool_choice") == "required":
+                    return
+
                 if response.conversation_id is not None:
                     # For conversation-based APIs, the server already has the function call message.
                     # Only send the new function result message (the last one added by _handle_function_call_results).
diff --git a/python/packages/core/tests/core/test_function_invocation_logic.py b/python/packages/core/tests/core/test_function_invocation_logic.py
@@ -2626,3 +2626,149 @@ def test_func(arg1: str) -> str:
     assert conversation_ids_received[1] == "stream_conv_after_first", (
         "streaming: conversation_id should be updated in options after receiving new conversation_id from API"
     )
+
+
+async def test_tool_choice_required_returns_after_tool_execution():
+    """Test that tool_choice='required' returns after tool execution without another model call.
+
+    When tool_choice is 'required', the user's intent is to force exactly one tool call.
+    After the tool executes, we should return the response with the function call and result,
+    not continue to call the model again.
+    """
+    from collections.abc import AsyncIterable, MutableSequence, Sequence
+    from typing import Any
+    from unittest.mock import patch
+
+    from agent_framework import (
+        BaseChatClient,
+        ChatMessage,
+        ChatResponse,
+        ChatResponseUpdate,
+        Content,
+        ResponseStream,
+        Role,
+        tool,
+    )
+    from agent_framework._middleware import ChatMiddlewareLayer
+    from agent_framework._tools import FunctionInvocationLayer
+
+    class TrackingChatClient(
+        ChatMiddlewareLayer,
+        FunctionInvocationLayer,
+        BaseChatClient,
+    ):
+        def __init__(self) -> None:
+            super().__init__(function_middleware=[])
+            self.run_responses: list[ChatResponse] = []
+            self.streaming_responses: list[list[ChatResponseUpdate]] = []
+            self.call_count: int = 0
+
+        def _inner_get_response(
+            self,
+            *,
+            messages: MutableSequence[ChatMessage],
+            stream: bool,
+            options: dict[str, Any],
+            **kwargs: Any,
+        ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]:
+            if stream:
+                return self._get_streaming_response(messages=messages, options=options, **kwargs)
+
+            async def _get() -> ChatResponse:
+                self.call_count += 1
+                if not self.run_responses:
+                    return ChatResponse(messages=ChatMessage(role="assistant", text="done"))
+                return self.run_responses.pop(0)
+
+            return _get()
+
+        def _get_streaming_response(
+            self,
+            *,
+            messages: MutableSequence[ChatMessage],
+            options: dict[str, Any],
+            **kwargs: Any,
+        ) -> ResponseStream[ChatResponseUpdate, ChatResponse]:
+            async def _stream() -> AsyncIterable[ChatResponseUpdate]:
+                self.call_count += 1
+                if not self.streaming_responses:
+                    yield ChatResponseUpdate(text="done", role="assistant", is_finished=True)
+                    return
+                response = self.streaming_responses.pop(0)
+                for update in response:
+                    yield update
+
+            def _finalize(updates: Sequence[ChatResponseUpdate]) -> ChatResponse:
+                return ChatResponse.from_chat_response_updates(updates)
+
+            return ResponseStream(_stream(), finalizer=_finalize)
+
+    @tool(name="test_func", approval_mode="never_require")
+    def test_func(arg1: str) -> str:
+        return f"Result {arg1}"
+
+    # Test non-streaming: should only call model once, then return with function call + result
+    with patch("agent_framework._tools.DEFAULT_MAX_ITERATIONS", 5):
+        client = TrackingChatClient()
+
+    client.run_responses = [
+        ChatResponse(
+            messages=ChatMessage(
+                role="assistant",
+                contents=[Content.from_function_call(call_id="call_1", name="test_func", arguments='{"arg1": "v1"}')],
+            ),
+        ),
+        # This second response should NOT be consumed
+        ChatResponse(
+            messages=ChatMessage(role="assistant", text="this should not be reached"),
+        ),
+    ]
+
+    response = await client.get_response(
+        "hello",
+        options={"tool_choice": "required", "tools": [test_func]},
+    )
+
+    # Should only call model once - after tool execution, return immediately
+    assert client.call_count == 1
+    # Response should contain function call and function result
+    assert len(response.messages) == 2
+    assert response.messages[0].role == Role.ASSISTANT
+    assert response.messages[0].contents[0].type == "function_call"
+    assert response.messages[1].role == Role.TOOL
+    assert response.messages[1].contents[0].type == "function_result"
+    # Second response should still be in queue (not consumed)
+    assert len(client.run_responses) == 1
+
+    # Test streaming version too
+    with patch("agent_framework._tools.DEFAULT_MAX_ITERATIONS", 5):
+        streaming_client = TrackingChatClient()
+
+    streaming_client.streaming_responses = [
+        [
+            ChatResponseUpdate(
+                contents=[Content.from_function_call(call_id="call_2", name="test_func", arguments='{"arg1": "v2"}')],
+                role="assistant",
+            ),
+        ],
+        # This second response should NOT be consumed
+        [
+            ChatResponseUpdate(text="this should not be reached", role="assistant", is_finished=True),
+        ],
+    ]
+
+    response_stream = streaming_client.get_response(
+        "hello",
+        stream=True,
+        options={"tool_choice": "required", "tools": [test_func]},
+    )
+    updates = []
+    async for update in response_stream:
+        updates.append(update)
+
+    # Should only call model once
+    assert streaming_client.call_count == 1
+    # Should have function call update and function result update
+    assert len(updates) == 2
+    # Second streaming response should still be in queue (not consumed)
+    assert len(streaming_client.streaming_responses) == 1