From 61a2dfd257e403975d16cc3b3f7e88a3981756c8 Mon Sep 17 00:00:00 2001
From: SAAKSHI GUPTA <saakshigupta2002@gmail.com>
Date: Sat, 31 Jan 2026 21:16:14 +1030
Subject: [PATCH] fix: persist MCP tool call outputs to session storage

When the model returns McpCall items with output data, the SDK was only
creating ToolCallItem for the call but not ToolCallOutputItem for the
result. This caused MCP tool call results to be missing from session
persistence, making session replay incomplete.

Changes:
- Create ToolCallOutputItem for McpCall items that have output or error
- Add mcp_call -> mcp_call_output mapping to _TOOL_CALL_TO_OUTPUT_TYPE
- Update drop_orphan_function_calls to check 'id' field for mcp_call
  items (McpCall uses 'id' instead of 'call_id')
- Add comprehensive tests for MCP tool call output persistence

Fixes #2384
---
 src/agents/run_internal/items.py           |   4 +-
 src/agents/run_internal/turn_resolution.py |  20 +++++
 tests/test_process_model_response.py       | 100 ++++++++++++++++++++-
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/src/agents/run_internal/items.py b/src/agents/run_internal/items.py
index f7f6af7ef8..f5ad9e4087 100644
--- a/src/agents/run_internal/items.py
+++ b/src/agents/run_internal/items.py
@@ -23,6 +23,7 @@
     "apply_patch_call": "apply_patch_call_output",
     "computer_call": "computer_call_output",
     "local_shell_call": "local_shell_call_output",
+    "mcp_call": "mcp_call_output",
 }
 
 __all__ = [
@@ -68,7 +69,8 @@ def drop_orphan_function_calls(items: list[TResponseInputItem]) -> list[TRespons
         if output_type is None:
             filtered.append(entry)
             continue
-        call_id = entry.get("call_id")
+        # Check call_id first, then fall back to id (used by mcp_call)
+        call_id = entry.get("call_id") or entry.get("id")
         if isinstance(call_id, str) and call_id in completed_call_ids.get(output_type, set()):
             filtered.append(entry)
     return filtered
diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py
index f2739756d0..b2fde61cd7 100644
--- a/src/agents/run_internal/turn_resolution.py
+++ b/src/agents/run_internal/turn_resolution.py
@@ -1299,6 +1299,26 @@ def process_model_response(
                     break
             items.append(ToolCallItem(raw_item=output, agent=agent, description=_mcp_description))
             tools_used.append("mcp")
+
+            # Create a ToolCallOutputItem for MCP calls that have completed with output.
+            # This ensures MCP tool call results are persisted to session storage for replay.
+            if output.output is not None or output.error is not None:
+                # Build an MCP call output item for session persistence.
+                # Use call_id to link with the corresponding mcp_call item for proper
+                # deduplication and orphan filtering.
+                mcp_output_content = output.error if output.error else output.output
+                mcp_call_output: dict[str, Any] = {
+                    "type": "mcp_call_output",
+                    "call_id": output.id,
+                    "output": mcp_output_content or "",
+                }
+                items.append(
+                    ToolCallOutputItem(
+                        raw_item=mcp_call_output,
+                        output=mcp_output_content,
+                        agent=agent,
+                    )
+                )
         elif isinstance(output, ImageGenerationCall):
             items.append(ToolCallItem(raw_item=output, agent=agent))
             tools_used.append("image_generation")
diff --git a/tests/test_process_model_response.py b/tests/test_process_model_response.py
index d26559a68d..937b037e94 100644
--- a/tests/test_process_model_response.py
+++ b/tests/test_process_model_response.py
@@ -1,7 +1,8 @@
 import pytest
 from openai.types.responses import ResponseCompactionItem
+from openai.types.responses.response_output_item import McpCall
 
-from agents import Agent, ApplyPatchTool, CompactionItem
+from agents import Agent, ApplyPatchTool, CompactionItem, ToolCallItem, ToolCallOutputItem
 from agents.exceptions import ModelBehaviorError
 from agents.items import ModelResponse
 from agents.run_internal import run_loop
@@ -93,3 +94,100 @@ def test_process_model_response_handles_compaction_item() -> None:
     assert item.raw_item["type"] == "compaction"
     assert item.raw_item["encrypted_content"] == "enc"
     assert "created_by" not in item.raw_item
+
+
+def test_process_model_response_mcp_call_with_output_creates_output_item() -> None:
+    """Test that McpCall with output creates both ToolCallItem and ToolCallOutputItem.
+
+    This ensures MCP tool call results are persisted to session storage for replay.
+    See: https://github.com/openai/openai-agents-python/issues/2384
+    """
+    agent = Agent(name="mcp-agent", model=FakeModel())
+    mcp_call = McpCall(
+        id="mcp-call-1",
+        type="mcp_call",
+        name="test_tool",
+        server_label="test-server",
+        arguments='{"key": "value"}',
+        output="tool result output",
+        status="completed",
+    )
+
+    processed = run_loop.process_model_response(
+        agent=agent,
+        all_tools=[],
+        response=_response([mcp_call]),
+        output_schema=None,
+        handoffs=[],
+    )
+
+    # Should have 2 items: ToolCallItem for the call and ToolCallOutputItem for the output
+    assert len(processed.new_items) == 2
+
+    # First item should be the ToolCallItem
+    tool_call_item = processed.new_items[0]
+    assert isinstance(tool_call_item, ToolCallItem)
+    assert tool_call_item.raw_item.id == "mcp-call-1"
+    assert tool_call_item.raw_item.name == "test_tool"
+
+    # Second item should be the ToolCallOutputItem
+    tool_output_item = processed.new_items[1]
+    assert isinstance(tool_output_item, ToolCallOutputItem)
+    assert tool_output_item.raw_item["type"] == "mcp_call_output"
+    assert tool_output_item.raw_item["call_id"] == "mcp-call-1"
+    assert tool_output_item.raw_item["output"] == "tool result output"
+    assert tool_output_item.output == "tool result output"
+
+
+def test_process_model_response_mcp_call_with_error_creates_output_item() -> None:
+    """Test that McpCall with error creates ToolCallOutputItem with the error."""
+    agent = Agent(name="mcp-agent", model=FakeModel())
+    mcp_call = McpCall(
+        id="mcp-call-2",
+        type="mcp_call",
+        name="failing_tool",
+        server_label="test-server",
+        arguments="{}",
+        error="tool execution failed",
+        status="failed",
+    )
+
+    processed = run_loop.process_model_response(
+        agent=agent,
+        all_tools=[],
+        response=_response([mcp_call]),
+        output_schema=None,
+        handoffs=[],
+    )
+
+    assert len(processed.new_items) == 2
+
+    tool_output_item = processed.new_items[1]
+    assert isinstance(tool_output_item, ToolCallOutputItem)
+    assert tool_output_item.raw_item["output"] == "tool execution failed"
+    assert tool_output_item.output == "tool execution failed"
+
+
+def test_process_model_response_mcp_call_without_output_no_output_item() -> None:
+    """Test that McpCall without output/error only creates ToolCallItem."""
+    agent = Agent(name="mcp-agent", model=FakeModel())
+    mcp_call = McpCall(
+        id="mcp-call-3",
+        type="mcp_call",
+        name="pending_tool",
+        server_label="test-server",
+        arguments="{}",
+        status="in_progress",
+    )
+
+    processed = run_loop.process_model_response(
+        agent=agent,
+        all_tools=[],
+        response=_response([mcp_call]),
+        output_schema=None,
+        handoffs=[],
+    )
+
+    # Should only have 1 item: ToolCallItem (no output yet)
+    assert len(processed.new_items) == 1
+    assert isinstance(processed.new_items[0], ToolCallItem)