Skip to content

Commit 7a52648

Browse files
author
Mateusz
committed
Perf: optimize regex matching and streaming response handling.
- EditPrecisionResponseMiddleware: Use combined regex for O(1) fast-fail check. - BackendStreamingResponseHandler: Avoid json.dumps/dict copies in hot loops. - Fix test_backend_streaming_response_handler.py mock compatibility.
1 parent a40e3b5 commit 7a52648

File tree

3 files changed

+66
-7
lines changed

3 files changed

+66
-7
lines changed

src/core/services/backend_request_manager/streaming_response_handler.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,19 @@ def _extract_text_from_chunk(self, chunk: ProcessedResponse) -> str:
115115
except UnicodeDecodeError:
116116
return content.decode("utf-8", errors="ignore")
117117
if isinstance(content, dict):
118-
# Use dict() to safely handle StopChunkWithUsage which is a dict subclass
118+
# OPTIMIZATION: Extract from standard OpenAI format directly to avoid expensive json.dumps
119+
# This is on the hot path for every token (loop detection + meaning check)
120+
if "choices" in content and isinstance(content["choices"], list):
121+
choices = content["choices"]
122+
if choices and isinstance(choices[0], dict):
123+
# Try delta (stream) or message (non-stream)
124+
delta = choices[0].get("delta") or choices[0].get("message")
125+
if isinstance(delta, dict) and "content" in delta:
126+
val = delta["content"]
127+
if val is not None:
128+
return str(val)
129+
130+
# Fallback: Use dict() to safely handle StopChunkWithUsage which is a dict subclass
119131
return json.dumps(dict(content))
120132
return str(content) if content is not None else ""
121133

@@ -695,7 +707,7 @@ async def monitored_stream() -> AsyncIterator[ProcessedResponse]:
695707
],
696708
}
697709
yield ProcessedResponse(
698-
content=cancellation_payload,
710+
content=cast(Any, cancellation_payload),
699711
metadata={
700712
"is_cancellation": True,
701713
"is_done": True,
@@ -730,9 +742,13 @@ async def attach_metadata_stream() -> AsyncIterator[ProcessedResponse]:
730742

731743
async for chunk in monitored_stream():
732744
if isinstance(chunk, ProcessedResponse):
733-
processed_metadata: dict[str, JsonValue] = dict(
734-
chunk.metadata or {}
735-
)
745+
# OPTIMIZATION: Modify metadata in-place to avoid copying dicts per-token
746+
if chunk.metadata is None:
747+
chunk.metadata = {}
748+
749+
# We own this chunk (transient), so in-place modification is safe and faster
750+
processed_metadata = chunk.metadata # type: ignore
751+
736752
if original_request_payload is not None:
737753
processed_metadata.setdefault(
738754
"original_request", original_request_payload
@@ -744,7 +760,7 @@ async def attach_metadata_stream() -> AsyncIterator[ProcessedResponse]:
744760
processed_metadata.setdefault(
745761
"client_os", cast(JsonValue, processing_context.client_os)
746762
)
747-
chunk.metadata = processed_metadata
763+
# No need to re-assign chunk.metadata as we modified it in place
748764
yield chunk
749765
else:
750766
metadata: dict[str, JsonValue] = {}

src/core/services/edit_precision_response_middleware.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(self, app_state: IApplicationState, priority: int = 10) -> None:
5757
self._app_state = app_state
5858
self._compiled = list(self._DEFAULT_PATTERNS)
5959
self._last_stream_ids: dict[str, str] = {}
60+
self._combined_pattern: re.Pattern[str] | None = None
6061

6162
try:
6263
from src.core.services.edit_precision_patterns import get_response_patterns
@@ -88,6 +89,29 @@ def __init__(self, app_state: IApplicationState, priority: int = 10) -> None:
8889
exc_info=True,
8990
)
9091

92+
# Pre-compile a combined regex for fast-fail checks
93+
# This converts O(N) regex searches into O(1) for the common case (no errors)
94+
try:
95+
pattern_strings = []
96+
for p in self._compiled:
97+
if hasattr(p, "pattern"):
98+
pattern_strings.append(p.pattern)
99+
else:
100+
pattern_strings.append(str(p))
101+
102+
if pattern_strings:
103+
# Use non-capturing groups for safety
104+
combined = "|".join(f"(?:{p})" for p in pattern_strings)
105+
self._combined_pattern = re.compile(combined, re.IGNORECASE | re.DOTALL)
106+
else:
107+
self._combined_pattern = None
108+
except Exception as err:
109+
if self._logger.isEnabledFor(logging.WARNING):
110+
self._logger.warning(
111+
"Failed to compile combined edit precision pattern: %s", err
112+
)
113+
self._combined_pattern = None
114+
91115
@staticmethod
92116
def _extract_text_from_chunk(chunk: dict) -> str:
93117
"""Extract text content from an OpenAI-format streaming chunk."""
@@ -141,7 +165,15 @@ def _process_response(
141165

142166
matched_pattern: str | None = None
143167
if combined_text:
144-
for p in self._compiled:
168+
# OPTIMIZATION: Use combined pattern for O(1) fast-fail check
169+
# If combined pattern exists and doesn't match, we can skip individual checks
170+
should_scan = True
171+
if self._combined_pattern and not self._combined_pattern.search(
172+
combined_text
173+
):
174+
should_scan = False
175+
176+
for p in self._compiled if should_scan else []:
145177
try:
146178
if p.search(combined_text):
147179
matched_pattern = getattr(p, "pattern", None) or str(p)

tests/unit/core/services/test_backend_streaming_response_handler.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,17 @@ async def test_attaches_client_os_to_chunks(
952952
processed_stream
953953
)
954954

955+
# Mock loop detector and Angel verifier
956+
mock_loop_detector = MagicMock(spec=ILoopDetector)
957+
mock_loop_detector.process_chunk.return_value = None
958+
mock_loop_detector_factory.create.return_value = mock_loop_detector
959+
960+
async def passthrough_stream(request, stream, context, request_context=None):
961+
async for chunk in stream:
962+
yield chunk
963+
964+
mock_angel_stream_verifier.verify_or_passthrough = passthrough_stream
965+
955966
# Act
956967
result = await handler.handle(
957968
stream=stream_envelope,

0 commit comments

Comments
 (0)