Skip to content

Commit f07c457

Browse files
author
Mateusz
committed
test(regression): add comprehensive tests for streaming reasoning content
- Verify SSESerializer preserves reasoning_content from metadata in OpenAI-formatted chunks. - Verify ContentAccumulationProcessor extracts and accumulates reasoning content. - Verify OpenAI translator includes 'reasoning' alias for compatibility. - Verify Anthropic translator handles reasoning content correctly.
1 parent 3e4775e commit f07c457

File tree

1 file changed

+168
-0
lines changed

1 file changed

+168
-0
lines changed
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
2+
"""Regression tests for streaming reasoning content handling."""
3+
4+
import json
5+
from unittest.mock import Mock
6+
7+
import pytest
8+
9+
from src.core.domain.chat import (
10+
CanonicalStreamChunk,
11+
StreamingChatCompletionChoice,
12+
StreamingChatCompletionChoiceDelta,
13+
)
14+
from src.core.domain.streaming.contracts import (
15+
StreamingChunk,
16+
StreamingMetadata,
17+
StreamingPayload,
18+
)
19+
from src.core.domain.streaming.streaming_content import StreamingContent
20+
from src.core.ports.streaming_contracts import StopChunkWithUsage
21+
from src.core.services.streaming.content_accumulation_processor import (
22+
ContentAccumulationProcessor,
23+
)
24+
from src.core.services.streaming.stream_context_registry import StreamingContextRegistry
25+
from src.core.transport.streaming.sse_serializer import SSESerializer
26+
from src.core.domain.translators.openai.streaming import (
27+
from_domain_to_openai_stream_chunk,
28+
)
29+
from src.core.domain.translators.anthropic.streaming import (
30+
from_domain_to_anthropic_stream_chunk,
31+
)
32+
33+
34+
class TestStreamingReasoningRegression:
35+
"""Regression tests for reasoning content in streaming pipeline."""
36+
37+
def test_sse_serializer_preserves_reasoning_in_openai_dict(self) -> None:
38+
"""
39+
Regression test: Ensure SSESerializer injects reasoning_content from metadata
40+
into the delta when serializing OpenAI-formatted dict chunks.
41+
42+
Bug fixed: SSESerializer was ignoring metadata.reasoning_content for
43+
'opaque_json_dict' payloads that looked like OpenAI chunks.
44+
"""
45+
serializer = SSESerializer()
46+
47+
# Create an OpenAI-style dict chunk (as produced by Gemini translator)
48+
openai_dict = {
49+
"id": "test-id",
50+
"choices": [{"index": 0, "delta": {"content": "Hello"}}],
51+
}
52+
53+
content = StreamingContent(
54+
content=openai_dict,
55+
metadata={
56+
"reasoning_content": "I am thinking...",
57+
"model": "gemini-2.0",
58+
"id": "test-id"
59+
}
60+
)
61+
62+
# Serialize to SSE bytes
63+
sse_bytes = serializer.serialize(content)
64+
sse_str = sse_bytes.decode("utf-8")
65+
66+
# Verify reasoning is present in the output JSON
67+
# Format is data: {...}\n\n
68+
lines = sse_str.strip().split("\n")
69+
data_line = next(line for line in lines if line.startswith("data: "))
70+
json_str = data_line[6:]
71+
data = json.loads(json_str)
72+
73+
delta = data["choices"][0]["delta"]
74+
75+
assert "reasoning_content" in delta
76+
assert delta["reasoning_content"] == "I am thinking..."
77+
# Verify alias is also present for compatibility
78+
assert "reasoning" in delta
79+
assert delta["reasoning"] == "I am thinking..."
80+
81+
@pytest.mark.asyncio
82+
async def test_content_accumulation_extracts_reasoning(self) -> None:
83+
"""
84+
Regression test: Ensure ContentAccumulationProcessor extracts and accumulates
85+
reasoning content from OpenAI-style chunks.
86+
87+
Bug fixed: Processor was only looking for 'content' in delta, ignoring 'reasoning_content'.
88+
"""
89+
registry = StreamingContextRegistry()
90+
processor = ContentAccumulationProcessor(registry=registry)
91+
92+
# Chunk 1: Content only
93+
chunk1 = StreamingContent(
94+
content={
95+
"choices": [{"index": 0, "delta": {"content": "Hello"}}]
96+
},
97+
metadata={"stream_id": "stream-1"}
98+
)
99+
100+
# Chunk 2: Reasoning only
101+
chunk2 = StreamingContent(
102+
content={
103+
"choices": [{"index": 0, "delta": {"reasoning_content": "Thinking..."}}]
104+
},
105+
metadata={"stream_id": "stream-1"}
106+
)
107+
108+
# Chunk 3: Done/Stop
109+
chunk3 = StreamingContent(
110+
content="",
111+
is_done=True,
112+
metadata={"stream_id": "stream-1", "finish_reason": "stop"}
113+
)
114+
115+
# Process chunks
116+
await processor.process(chunk1)
117+
await processor.process(chunk2)
118+
final_result = await processor.process(chunk3)
119+
120+
# Verify accumulated metadata in final result
121+
assert final_result.metadata.get("accumulated_content") == "Hello"
122+
assert final_result.metadata.get("accumulated_reasoning") == "Thinking..."
123+
124+
def test_openai_translator_includes_reasoning_alias(self) -> None:
125+
"""
126+
Regression test: Ensure OpenAI translator includes 'reasoning' alias
127+
alongside 'reasoning_content'.
128+
"""
129+
delta = StreamingChatCompletionChoiceDelta(
130+
role="assistant",
131+
content="Answer",
132+
reasoning_content="Thought"
133+
)
134+
choice = StreamingChatCompletionChoice(index=0, delta=delta)
135+
chunk = CanonicalStreamChunk(
136+
id="test-id",
137+
choices=[choice],
138+
model="gpt-4o"
139+
)
140+
141+
openai_chunk = from_domain_to_openai_stream_chunk(chunk)
142+
output_delta = openai_chunk["choices"][0]["delta"]
143+
144+
assert output_delta["reasoning_content"] == "Thought"
145+
assert output_delta["reasoning"] == "Thought"
146+
147+
def test_anthropic_translator_handles_reasoning(self) -> None:
148+
"""
149+
Regression test: Ensure Anthropic translator handles reasoning content correctly.
150+
"""
151+
# Case 1: Reasoning in delta
152+
delta = StreamingChatCompletionChoiceDelta(
153+
role="assistant",
154+
reasoning_content="Thinking process"
155+
)
156+
choice = StreamingChatCompletionChoice(index=0, delta=delta)
157+
chunk = CanonicalStreamChunk(
158+
id="test-id",
159+
choices=[choice],
160+
model="claude-3-5-sonnet"
161+
)
162+
163+
anthropic_chunk = from_domain_to_anthropic_stream_chunk(chunk)
164+
165+
# Should produce a thinking_delta block
166+
assert anthropic_chunk["type"] == "content_block_delta"
167+
assert anthropic_chunk["delta"]["type"] == "thinking_delta"
168+
assert anthropic_chunk["delta"]["thinking"] == "Thinking process"

0 commit comments

Comments
 (0)