From 8d382306764d721347aa550d7f5daa062200f7c0 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 23 Feb 2026 18:35:22 -0800 Subject: [PATCH] emit token count metrics for vllm --- llm_bench/load_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llm_bench/load_test.py b/llm_bench/load_test.py index 174a903..2be1f1f 100644 --- a/llm_bench/load_test.py +++ b/llm_bench/load_test.py @@ -754,6 +754,8 @@ class VllmProvider(OpenAIProvider): def format_payload(self, prompt, max_tokens, images): data = super().format_payload(prompt, max_tokens, images) data["ignore_eos"] = True + if data.get("stream"): + data["stream_options"] = {"include_usage": True} return data