diff --git a/llm_bench/load_test.py b/llm_bench/load_test.py index 174a903..2be1f1f 100644 --- a/llm_bench/load_test.py +++ b/llm_bench/load_test.py @@ -754,6 +754,8 @@ class VllmProvider(OpenAIProvider): def format_payload(self, prompt, max_tokens, images): data = super().format_payload(prompt, max_tokens, images) data["ignore_eos"] = True + if data.get("stream"): + data["stream_options"] = {"include_usage": True} return data