Skip to content

Commit f52658e

Browse files
committed
fix(tracing): truncate oversized span fields for openai ingest
1 parent 9e812de commit f52658e

File tree

2 files changed

+204
-15
lines changed

2 files changed

+204
-15
lines changed

src/agents/tracing/processors.py

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import json
34
import os
45
import queue
56
import random
@@ -29,6 +30,8 @@ def export(self, items: list[Trace | Span[Any]]) -> None:
2930

3031
class BackendSpanExporter(TracingExporter):
3132
_OPENAI_TRACING_INGEST_ENDPOINT = "https://api.openai.com/v1/traces/ingest"
33+
_OPENAI_TRACING_MAX_FIELD_BYTES = 100_000
34+
_OPENAI_TRACING_STRING_TRUNCATION_SUFFIX = "... [truncated]"
3235
_OPENAI_TRACING_ALLOWED_USAGE_KEYS = frozenset(
3336
{
3437
"input_tokens",
@@ -182,32 +185,85 @@ def _should_sanitize_for_openai_tracing_api(self) -> bool:
182185
return self.endpoint.rstrip("/") == self._OPENAI_TRACING_INGEST_ENDPOINT.rstrip("/")
183186

184187
def _sanitize_for_openai_tracing_api(self, payload_item: dict[str, Any]) -> dict[str, Any]:
185-
"""Drop fields known to be rejected by OpenAI tracing ingestion."""
188+
"""Drop or truncate fields known to be rejected by OpenAI tracing ingestion."""
186189
span_data = payload_item.get("span_data")
187190
if not isinstance(span_data, dict):
188191
return payload_item
189192

190-
if span_data.get("type") != "generation":
191-
return payload_item
192-
193-
usage = span_data.get("usage")
194-
if not isinstance(usage, dict):
195-
return payload_item
193+
sanitized_span_data = span_data
194+
did_mutate = False
196195

197-
filtered_usage = {
198-
key: value
199-
for key, value in usage.items()
200-
if key in self._OPENAI_TRACING_ALLOWED_USAGE_KEYS
201-
}
202-
if filtered_usage == usage:
196+
for field_name in ("input", "output"):
197+
if field_name not in span_data:
198+
continue
199+
truncated_field = self._truncate_span_field_value(span_data[field_name])
200+
if truncated_field != span_data[field_name]:
201+
if not did_mutate:
202+
sanitized_span_data = dict(span_data)
203+
did_mutate = True
204+
sanitized_span_data[field_name] = truncated_field
205+
206+
if span_data.get("type") == "generation":
207+
usage = span_data.get("usage")
208+
if isinstance(usage, dict):
209+
filtered_usage = {
210+
key: value
211+
for key, value in usage.items()
212+
if key in self._OPENAI_TRACING_ALLOWED_USAGE_KEYS
213+
}
214+
if filtered_usage != usage:
215+
if not did_mutate:
216+
sanitized_span_data = dict(span_data)
217+
did_mutate = True
218+
sanitized_span_data["usage"] = filtered_usage
219+
220+
if not did_mutate:
203221
return payload_item
204222

205-
sanitized_span_data = dict(span_data)
206-
sanitized_span_data["usage"] = filtered_usage
207223
sanitized_payload_item = dict(payload_item)
208224
sanitized_payload_item["span_data"] = sanitized_span_data
209225
return sanitized_payload_item
210226

227+
def _value_json_size_bytes(self, value: Any) -> int:
228+
return len(json.dumps(value, ensure_ascii=False, separators=(",", ":")).encode("utf-8"))
229+
230+
def _truncate_string_for_json_limit(self, value: str, max_bytes: int) -> str:
231+
suffix = self._OPENAI_TRACING_STRING_TRUNCATION_SUFFIX
232+
if self._value_json_size_bytes(value) <= max_bytes:
233+
return value
234+
if self._value_json_size_bytes(suffix) > max_bytes:
235+
return ""
236+
237+
low = 0
238+
high = len(value)
239+
best = suffix
240+
while low <= high:
241+
mid = (low + high) // 2
242+
candidate = value[:mid] + suffix
243+
if self._value_json_size_bytes(candidate) <= max_bytes:
244+
best = candidate
245+
low = mid + 1
246+
else:
247+
high = mid - 1
248+
return best
249+
250+
def _truncate_span_field_value(self, value: Any) -> Any:
251+
max_bytes = self._OPENAI_TRACING_MAX_FIELD_BYTES
252+
if self._value_json_size_bytes(value) <= max_bytes:
253+
return value
254+
255+
if isinstance(value, str):
256+
return self._truncate_string_for_json_limit(value, max_bytes)
257+
258+
preview = str(value)
259+
if len(preview) > 512:
260+
preview = preview[:512] + self._OPENAI_TRACING_STRING_TRUNCATION_SUFFIX
261+
return {
262+
"truncated": True,
263+
"original_type": type(value).__name__,
264+
"preview": preview,
265+
}
266+
211267
def close(self):
212268
"""Close the underlying HTTP client."""
213269
self._client.close()

tests/test_trace_processor.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,108 @@ def export(self):
391391
exporter.close()
392392

393393

394+
@patch("httpx.Client")
395+
def test_backend_span_exporter_truncates_large_input_for_openai_tracing(mock_client):
396+
class DummyItem:
397+
tracing_api_key = None
398+
399+
def __init__(self):
400+
self.exported_payload: dict[str, Any] = {
401+
"object": "trace.span",
402+
"span_data": {
403+
"type": "generation",
404+
"input": "x" * (BackendSpanExporter._OPENAI_TRACING_MAX_FIELD_BYTES + 5000),
405+
},
406+
}
407+
408+
def export(self):
409+
return self.exported_payload
410+
411+
mock_response = MagicMock()
412+
mock_response.status_code = 200
413+
mock_client.return_value.post.return_value = mock_response
414+
415+
exporter = BackendSpanExporter(api_key="test_key")
416+
item = DummyItem()
417+
exporter.export([cast(Any, item)])
418+
419+
sent_payload = mock_client.return_value.post.call_args.kwargs["json"]["data"][0]
420+
sent_input = sent_payload["span_data"]["input"]
421+
assert isinstance(sent_input, str)
422+
assert sent_input.endswith(exporter._OPENAI_TRACING_STRING_TRUNCATION_SUFFIX)
423+
assert exporter._value_json_size_bytes(sent_input) <= exporter._OPENAI_TRACING_MAX_FIELD_BYTES
424+
assert item.exported_payload["span_data"]["input"] != sent_input
425+
exporter.close()
426+
427+
428+
@patch("httpx.Client")
429+
def test_backend_span_exporter_truncates_large_non_string_input_for_openai_tracing(mock_client):
430+
class DummyItem:
431+
tracing_api_key = None
432+
433+
def __init__(self):
434+
self.exported_payload: dict[str, Any] = {
435+
"object": "trace.span",
436+
"span_data": {
437+
"type": "generation",
438+
"input": {
439+
"blob": "x" * (BackendSpanExporter._OPENAI_TRACING_MAX_FIELD_BYTES + 5000)
440+
},
441+
},
442+
}
443+
444+
def export(self):
445+
return self.exported_payload
446+
447+
mock_response = MagicMock()
448+
mock_response.status_code = 200
449+
mock_client.return_value.post.return_value = mock_response
450+
451+
exporter = BackendSpanExporter(api_key="test_key")
452+
exporter.export([cast(Any, DummyItem())])
453+
454+
sent_payload = mock_client.return_value.post.call_args.kwargs["json"]["data"][0]
455+
sent_input = sent_payload["span_data"]["input"]
456+
assert isinstance(sent_input, dict)
457+
assert sent_input["truncated"] is True
458+
assert sent_input["original_type"] == "dict"
459+
assert exporter._value_json_size_bytes(sent_input) <= exporter._OPENAI_TRACING_MAX_FIELD_BYTES
460+
exporter.close()
461+
462+
463+
@patch("httpx.Client")
464+
def test_backend_span_exporter_keeps_large_input_for_custom_endpoint(mock_client):
465+
class DummyItem:
466+
tracing_api_key = None
467+
468+
def __init__(self):
469+
self.exported_payload: dict[str, Any] = {
470+
"object": "trace.span",
471+
"span_data": {
472+
"type": "generation",
473+
"input": "x" * (BackendSpanExporter._OPENAI_TRACING_MAX_FIELD_BYTES + 5000),
474+
},
475+
}
476+
477+
def export(self):
478+
return self.exported_payload
479+
480+
mock_response = MagicMock()
481+
mock_response.status_code = 200
482+
mock_client.return_value.post.return_value = mock_response
483+
484+
exporter = BackendSpanExporter(
485+
api_key="test_key",
486+
endpoint="https://example.com/v1/traces/ingest",
487+
)
488+
item = DummyItem()
489+
exporter.export([cast(Any, item)])
490+
491+
sent_payload = mock_client.return_value.post.call_args.kwargs["json"]["data"][0]
492+
assert sent_payload["span_data"]["input"] == item.exported_payload["span_data"]["input"]
493+
exporter.close()
494+
495+
394496
def test_sanitize_for_openai_tracing_api_keeps_allowed_generation_usage():
395497
exporter = BackendSpanExporter(api_key="test_key")
396498
payload = {
@@ -421,3 +523,34 @@ def test_sanitize_for_openai_tracing_api_skips_non_dict_generation_usage():
421523
}
422524
assert exporter._sanitize_for_openai_tracing_api(payload) is payload
423525
exporter.close()
526+
527+
528+
def test_sanitize_for_openai_tracing_api_keeps_small_input_without_mutation():
529+
exporter = BackendSpanExporter(api_key="test_key")
530+
payload = {
531+
"object": "trace.span",
532+
"span_data": {
533+
"type": "generation",
534+
"input": "short input",
535+
"usage": {"input_tokens": 1},
536+
},
537+
}
538+
assert exporter._sanitize_for_openai_tracing_api(payload) is payload
539+
exporter.close()
540+
541+
542+
def test_truncate_string_for_json_limit_returns_original_when_within_limit():
543+
exporter = BackendSpanExporter(api_key="test_key")
544+
value = "hello"
545+
max_bytes = exporter._value_json_size_bytes(value)
546+
assert exporter._truncate_string_for_json_limit(value, max_bytes) == value
547+
exporter.close()
548+
549+
550+
def test_truncate_string_for_json_limit_returns_empty_when_suffix_too_large():
551+
exporter = BackendSpanExporter(api_key="test_key")
552+
max_bytes = (
553+
exporter._value_json_size_bytes(exporter._OPENAI_TRACING_STRING_TRUNCATION_SUFFIX) - 1
554+
)
555+
assert exporter._truncate_string_for_json_limit("x" * 100, max_bytes) == ""
556+
exporter.close()

0 commit comments

Comments
 (0)