diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py index f7f6af7ef..f5ad9e408 100644 --- a/src/agents/run_internal/items.py +++ b/src/agents/run_internal/items.py @@ -23,6 +23,7 @@ "apply_patch_call": "apply_patch_call_output", "computer_call": "computer_call_output", "local_shell_call": "local_shell_call_output", + "mcp_call": "mcp_call_output", } __all__ = [ @@ -68,7 +69,8 @@ def drop_orphan_function_calls(items: list[TResponseInputItem]) -> list[TRespons if output_type is None: filtered.append(entry) continue - call_id = entry.get("call_id") + # Check call_id first, then fall back to id (used by mcp_call) + call_id = entry.get("call_id") or entry.get("id") if isinstance(call_id, str) and call_id in completed_call_ids.get(output_type, set()): filtered.append(entry) return filtered diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py index f2739756d..b2fde61cd 100644 --- a/src/agents/run_internal/turn_resolution.py +++ b/src/agents/run_internal/turn_resolution.py @@ -1299,6 +1299,26 @@ def process_model_response( break items.append(ToolCallItem(raw_item=output, agent=agent, description=_mcp_description)) tools_used.append("mcp") + + # Create a ToolCallOutputItem for MCP calls that have completed with output. + # This ensures MCP tool call results are persisted to session storage for replay. + if output.output is not None or output.error is not None: + # Build an MCP call output item for session persistence. + # Use call_id to link with the corresponding mcp_call item for proper + # deduplication and orphan filtering. + mcp_output_content = output.error if output.error else output.output + mcp_call_output: dict[str, Any] = { + "type": "mcp_call_output", + "call_id": output.id, + "output": mcp_output_content or "", + } + items.append( + ToolCallOutputItem( + raw_item=mcp_call_output, + output=mcp_output_content, + agent=agent, + ) + ) elif isinstance(output, ImageGenerationCall): items.append(ToolCallItem(raw_item=output, agent=agent)) tools_used.append("image_generation") diff --git a/tests/test_process_model_response.py b/tests/test_process_model_response.py index d26559a68..937b037e9 100644 --- a/tests/test_process_model_response.py +++ b/tests/test_process_model_response.py @@ -1,7 +1,8 @@ import pytest from openai.types.responses import ResponseCompactionItem +from openai.types.responses.response_output_item import McpCall -from agents import Agent, ApplyPatchTool, CompactionItem +from agents import Agent, ApplyPatchTool, CompactionItem, ToolCallItem, ToolCallOutputItem from agents.exceptions import ModelBehaviorError from agents.items import ModelResponse from agents.run_internal import run_loop @@ -93,3 +94,100 @@ def test_process_model_response_handles_compaction_item() -> None: assert item.raw_item["type"] == "compaction" assert item.raw_item["encrypted_content"] == "enc" assert "created_by" not in item.raw_item + + +def test_process_model_response_mcp_call_with_output_creates_output_item() -> None: + """Test that McpCall with output creates both ToolCallItem and ToolCallOutputItem. + + This ensures MCP tool call results are persisted to session storage for replay. + See: https://github.com/openai/openai-agents-python/issues/2384 + """ + agent = Agent(name="mcp-agent", model=FakeModel()) + mcp_call = McpCall( + id="mcp-call-1", + type="mcp_call", + name="test_tool", + server_label="test-server", + arguments='{"key": "value"}', + output="tool result output", + status="completed", + ) + + processed = run_loop.process_model_response( + agent=agent, + all_tools=[], + response=_response([mcp_call]), + output_schema=None, + handoffs=[], + ) + + # Should have 2 items: ToolCallItem for the call and ToolCallOutputItem for the output + assert len(processed.new_items) == 2 + + # First item should be the ToolCallItem + tool_call_item = processed.new_items[0] + assert isinstance(tool_call_item, ToolCallItem) + assert tool_call_item.raw_item.id == "mcp-call-1" + assert tool_call_item.raw_item.name == "test_tool" + + # Second item should be the ToolCallOutputItem + tool_output_item = processed.new_items[1] + assert isinstance(tool_output_item, ToolCallOutputItem) + assert tool_output_item.raw_item["type"] == "mcp_call_output" + assert tool_output_item.raw_item["call_id"] == "mcp-call-1" + assert tool_output_item.raw_item["output"] == "tool result output" + assert tool_output_item.output == "tool result output" + + +def test_process_model_response_mcp_call_with_error_creates_output_item() -> None: + """Test that McpCall with error creates ToolCallOutputItem with the error.""" + agent = Agent(name="mcp-agent", model=FakeModel()) + mcp_call = McpCall( + id="mcp-call-2", + type="mcp_call", + name="failing_tool", + server_label="test-server", + arguments="{}", + error="tool execution failed", + status="failed", + ) + + processed = run_loop.process_model_response( + agent=agent, + all_tools=[], + response=_response([mcp_call]), + output_schema=None, + handoffs=[], + ) + + assert len(processed.new_items) == 2 + + tool_output_item = processed.new_items[1] + assert isinstance(tool_output_item, ToolCallOutputItem) + assert tool_output_item.raw_item["output"] == "tool execution failed" + assert tool_output_item.output == "tool execution failed" + + +def test_process_model_response_mcp_call_without_output_no_output_item() -> None: + """Test that McpCall without output/error only creates ToolCallItem.""" + agent = Agent(name="mcp-agent", model=FakeModel()) + mcp_call = McpCall( + id="mcp-call-3", + type="mcp_call", + name="pending_tool", + server_label="test-server", + arguments="{}", + status="in_progress", + ) + + processed = run_loop.process_model_response( + agent=agent, + all_tools=[], + response=_response([mcp_call]), + output_schema=None, + handoffs=[], + ) + + # Should only have 1 item: ToolCallItem (no output yet) + assert len(processed.new_items) == 1 + assert isinstance(processed.new_items[0], ToolCallItem)