Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions llm_bench/load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,20 @@ def _create_dataset(cls, options: argparse.Namespace):
prompt = options.prompt
dataset_file = "code.txt"

if options.prompt_cache_max_len is not None:
common_tokens = options.prompt_cache_max_len
elif options.prompt_cache_max_pct is not None:
common_tokens = int(options.prompt_tokens * options.prompt_cache_max_pct / 100)
else:
common_tokens = 0

return TranslationDataset(
path=os.path.join(os.path.dirname(os.path.abspath(__file__)), dataset_file),
prompt="\n\n" + prompt,
tokenizer_path=options.tokenizer,
chat=options.chat,
num_tokens=options.prompt_tokens,
common_tokens=options.prompt_cache_max_len,
common_tokens=common_tokens,
)
else:
raise ValueError(f"Unknown dataset: {options.dataset}")
Expand Down Expand Up @@ -1451,8 +1458,11 @@ def init_parser(parser):
"--prompt-cache-max-len",
env_var="PROMPT_CACHE_MAX_LEN",
type=int,
default=0,
help="Maximum length of the prompt cache to use. Defaults to 0 (no caching).",
default=None,
help="Maximum number of shared prefix tokens across requests. "
"If not specified but --prompt-cache-max-pct is set, auto-computed as "
"int(prompt_tokens * prompt_cache_max_pct / 100). Defaults to 0 (no shared prefix) "
"when neither this nor --prompt-cache-max-pct is provided.",
Copy link

@cursor cursor bot Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. The default for --prompt-cache-max-len is back to 0. The auto-computation logic now works as:

  • If --prompt-cache-max-len is explicitly set to a non-zero value, use it directly.
  • Otherwise, if --prompt-cache-max-pct is provided, auto-compute common_tokens = int(prompt_tokens * prompt_cache_max_pct / 100).
  • If neither is meaningfully set, common_tokens stays 0.

So --prompt-cache-max-pct 100 --prompt-tokens 8192 works on its own, producing common_tokens = 8192.

Open in Web Open in Cursor 

)
parser.add_argument(
"--prompt-cache-max-pct",
Expand Down