Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/agents/run_internal/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"apply_patch_call": "apply_patch_call_output",
"computer_call": "computer_call_output",
"local_shell_call": "local_shell_call_output",
"mcp_call": "mcp_call_output",
Comment on lines 23 to +26

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve legacy MCP calls without output items

By adding mcp_call to _TOOL_CALL_TO_OUTPUT_TYPE, drop_orphan_function_calls will now treat every MCP call as requiring a matching mcp_call_output item. That means sessions persisted before this change (or any caller-supplied history that only includes mcp_call items with output/error inline) will have those MCP calls dropped during normalize_resumed_input, losing tool history and potentially changing replay behavior. Consider exempting MCP calls that already include output/error, or backfilling an output item when normalizing legacy input.

Useful? React with 👍 / 👎.

}

__all__ = [
Expand Down Expand Up @@ -68,7 +69,8 @@ def drop_orphan_function_calls(items: list[TResponseInputItem]) -> list[TRespons
if output_type is None:
filtered.append(entry)
continue
call_id = entry.get("call_id")
# Check call_id first, then fall back to id (used by mcp_call)
call_id = entry.get("call_id") or entry.get("id")
if isinstance(call_id, str) and call_id in completed_call_ids.get(output_type, set()):
filtered.append(entry)
return filtered
Expand Down
20 changes: 20 additions & 0 deletions src/agents/run_internal/turn_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,26 @@ def process_model_response(
break
items.append(ToolCallItem(raw_item=output, agent=agent, description=_mcp_description))
tools_used.append("mcp")

# Create a ToolCallOutputItem for MCP calls that have completed with output.
# This ensures MCP tool call results are persisted to session storage for replay.
if output.output is not None or output.error is not None:
# Build an MCP call output item for session persistence.
# Use call_id to link with the corresponding mcp_call item for proper
# deduplication and orphan filtering.
mcp_output_content = output.error if output.error else output.output
mcp_call_output: dict[str, Any] = {
"type": "mcp_call_output",
"call_id": output.id,
"output": mcp_output_content or "",
}
Comment on lines +1310 to +1314

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve falsy MCP outputs instead of blanking

When constructing the synthetic mcp_call_output, the code uses mcp_output_content or "" for the stored output. If an MCP tool legitimately returns a falsy but meaningful value (e.g., 0, False, [], or {}), this will be persisted as an empty string instead, so session storage and replay lose the real output and the raw_item diverges from ToolCallOutputItem.output. Use an explicit None check (or drop the fallback) to avoid corrupting falsy outputs.

Useful? React with 👍 / 👎.

items.append(
ToolCallOutputItem(
raw_item=mcp_call_output,
output=mcp_output_content,
agent=agent,
)
)
elif isinstance(output, ImageGenerationCall):
items.append(ToolCallItem(raw_item=output, agent=agent))
tools_used.append("image_generation")
Expand Down
100 changes: 99 additions & 1 deletion tests/test_process_model_response.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest
from openai.types.responses import ResponseCompactionItem
from openai.types.responses.response_output_item import McpCall

from agents import Agent, ApplyPatchTool, CompactionItem
from agents import Agent, ApplyPatchTool, CompactionItem, ToolCallItem, ToolCallOutputItem
from agents.exceptions import ModelBehaviorError
from agents.items import ModelResponse
from agents.run_internal import run_loop
Expand Down Expand Up @@ -93,3 +94,100 @@ def test_process_model_response_handles_compaction_item() -> None:
assert item.raw_item["type"] == "compaction"
assert item.raw_item["encrypted_content"] == "enc"
assert "created_by" not in item.raw_item


def test_process_model_response_mcp_call_with_output_creates_output_item() -> None:
"""Test that McpCall with output creates both ToolCallItem and ToolCallOutputItem.

This ensures MCP tool call results are persisted to session storage for replay.
See: https://github.com/openai/openai-agents-python/issues/2384
"""
agent = Agent(name="mcp-agent", model=FakeModel())
mcp_call = McpCall(
id="mcp-call-1",
type="mcp_call",
name="test_tool",
server_label="test-server",
arguments='{"key": "value"}',
output="tool result output",
status="completed",
)

processed = run_loop.process_model_response(
agent=agent,
all_tools=[],
response=_response([mcp_call]),
output_schema=None,
handoffs=[],
)

# Should have 2 items: ToolCallItem for the call and ToolCallOutputItem for the output
assert len(processed.new_items) == 2

# First item should be the ToolCallItem
tool_call_item = processed.new_items[0]
assert isinstance(tool_call_item, ToolCallItem)
assert tool_call_item.raw_item.id == "mcp-call-1"
assert tool_call_item.raw_item.name == "test_tool"

# Second item should be the ToolCallOutputItem
tool_output_item = processed.new_items[1]
assert isinstance(tool_output_item, ToolCallOutputItem)
assert tool_output_item.raw_item["type"] == "mcp_call_output"
assert tool_output_item.raw_item["call_id"] == "mcp-call-1"
assert tool_output_item.raw_item["output"] == "tool result output"
assert tool_output_item.output == "tool result output"


def test_process_model_response_mcp_call_with_error_creates_output_item() -> None:
"""Test that McpCall with error creates ToolCallOutputItem with the error."""
agent = Agent(name="mcp-agent", model=FakeModel())
mcp_call = McpCall(
id="mcp-call-2",
type="mcp_call",
name="failing_tool",
server_label="test-server",
arguments="{}",
error="tool execution failed",
status="failed",
)

processed = run_loop.process_model_response(
agent=agent,
all_tools=[],
response=_response([mcp_call]),
output_schema=None,
handoffs=[],
)

assert len(processed.new_items) == 2

tool_output_item = processed.new_items[1]
assert isinstance(tool_output_item, ToolCallOutputItem)
assert tool_output_item.raw_item["output"] == "tool execution failed"
assert tool_output_item.output == "tool execution failed"


def test_process_model_response_mcp_call_without_output_no_output_item() -> None:
"""Test that McpCall without output/error only creates ToolCallItem."""
agent = Agent(name="mcp-agent", model=FakeModel())
mcp_call = McpCall(
id="mcp-call-3",
type="mcp_call",
name="pending_tool",
server_label="test-server",
arguments="{}",
status="in_progress",
)

processed = run_loop.process_model_response(
agent=agent,
all_tools=[],
response=_response([mcp_call]),
output_schema=None,
handoffs=[],
)

# Should only have 1 item: ToolCallItem (no output yet)
assert len(processed.new_items) == 1
assert isinstance(processed.new_items[0], ToolCallItem)
Loading