1515from benchmarks ._plotter_individual import generate_individual_plots
1616from benchmarks .common .comparison import answers_have_same_meaning_static
1717from vectorq .config import VectorQConfig
18- from vectorq .main import VectorQ , VectorQBenchmark
18+ from vectorq .main import VectorQ
19+ from vectorq .vectorq_core .cache .embedding_engine .strategies .benchmark import (
20+ BenchmarkEmbeddingEngine ,
21+ )
22+ from vectorq .inference_engine .strategies .benchmark import (
23+ BenchmarkInferenceEngine ,
24+ )
1925from vectorq .vectorq_core .cache .embedding_store .embedding_metadata_storage import (
2026 InMemoryEmbeddingMetadataStorage ,
2127)
2935from vectorq .vectorq_core .similarity_evaluator .strategies .string_comparison import (
3036 StringComparisonSimilarityEvaluator ,
3137)
32- from vectorq .vectorq_core .vectorq_policy .strategies .bayesian import (
33- VectorQBayesianPolicy ,
38+ from vectorq .vectorq_policy .strategies .dynamic_global_threshold import (
39+ DynamicGlobalThresholdPolicy ,
40+ )
41+ from vectorq .vectorq_policy .strategies .dynamic_local_threshold import (
42+ DynamicLocalThresholdPolicy ,
43+ )
44+ from vectorq .vectorq_policy .strategies .static_global_threshold import (
45+ StaticGlobalThresholdPolicy ,
3446)
47+ from vectorq .vectorq_policy .vectorq_policy import VectorQPolicy
3548
3649repo_root = os .path .dirname (os .path .dirname (os .path .abspath (__file__ )))
3750results_dir = os .path .join (repo_root , "benchmarks" , "results" )
4861########################################################################################################################
4962
5063# Benchmark Config
51- MAX_SAMPLES : int = 45000
64+ MAX_SAMPLES : int = 10000
5265CONFIDENCE_INTERVALS_ITERATIONS : int = 1
5366EMBEDDING_MODEL_1 = (
5467 "embedding_1" ,
98111]
99112candidate_strategy : str = SIMILARITY_STRATEGY [0 ]
100113
114+ # static_thresholds = np.array(
115+ # [0.76, 0.78, 0.80, 0.82, 0.84, 0.86, 0.88, 0.90, 0.92, 0.94, 0.96]
116+ # )
101117static_thresholds = np .array (
102- [0.76 , 0.78 , 0.80 , 0.82 , 0.84 , 0.86 , 0.88 , 0.90 , 0.92 , 0.94 , 0.96 ]
118+ [0.76 , 0.78 ]
103119)
104- deltas = np .array ([0.01 , 0.02 , 0.03 , 0.04 , 0.05 , 0.06 , 0.07 , 0.08 , 0.09 , 0.1 ])
120+ #deltas = np.array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1])
121+ deltas = np .array ([0.01 , 0.02 ])
105122
106123# VectorQ Config
107124MAX_VECTOR_DB_CAPACITY : int = 100000
@@ -162,7 +179,7 @@ def test_run_benchmark(self):
162179
163180 # 1) Get Data
164181 task = data_entry ["task" ]
165- output_format = data_entry ["output_format" ]
182+ system_prompt = data_entry ["output_format" ]
166183 review_text = data_entry ["text" ]
167184
168185 emb_generation_latency : float = float (
@@ -186,7 +203,7 @@ def test_run_benchmark(self):
186203 review_text = review_text ,
187204 candidate_embedding = candidate_embedding ,
188205 label_response = label_response ,
189- output_format = output_format ,
206+ system_prompt = system_prompt ,
190207 )
191208 )
192209 latency_vectorq : float = (
@@ -271,7 +288,7 @@ def get_vectorQ_answer(
271288 review_text : str ,
272289 candidate_embedding : List [float ],
273290 label_response : str ,
274- output_format : str ,
291+ system_prompt : str ,
275292 ) -> Tuple [bool , str , str , float ]:
276293 """
277294 Returns: Tuple[bool, str, str, float] - [is_cache_hit, cache_response, nn_response, latency_vectorq_logic]
@@ -287,17 +304,15 @@ def get_vectorQ_answer(
287304 for val in candidate_embedding
288305 ]
289306
290- vectorQ_benchmark = VectorQBenchmark (
291- candidate_embedding = candidate_embedding , candidate_response = label_response
292- )
307+ self .vectorq .vectorq_config .embedding_engine .set_next_embedding (candidate_embedding )
308+ self .vectorq .vectorq_config .inference_engine .set_next_response (label_response )
293309
294310 vectorQ_prompt = f"{ task } { review_text } "
295311 latency_vectorq_logic : float = time .time ()
296312 try :
297- is_cache_hit , cache_response , nn_response = self .vectorq .create (
313+ is_cache_hit , cache_response , nn_response = self .vectorq .infer_with_cache_info (
298314 prompt = vectorQ_prompt ,
299- output_format = output_format ,
300- benchmark = vectorQ_benchmark ,
315+ system_prompt = system_prompt ,
301316 )
302317 except Exception as e :
303318 logging .error (
@@ -422,6 +437,16 @@ def main():
422437 )
423438 start_time_llm_model = time .time ()
424439
440+ vectorq_config : VectorQConfig = VectorQConfig (
441+ inference_engine = BenchmarkInferenceEngine (),
442+ embedding_engine = BenchmarkEmbeddingEngine (),
443+ vector_db = HNSWLibVectorDB (
444+ similarity_metric_type = SimilarityMetricType .COSINE ,
445+ max_capacity = MAX_VECTOR_DB_CAPACITY ,
446+ ),
447+ embedding_metadata_storage = InMemoryEmbeddingMetadataStorage ()
448+ )
449+
425450 # Baseline 1) Dynamic thresholds (VectorQ, Local)
426451 if SYSTEM_TYPE in ["dynamic_local" , "all" ]:
427452 for delta in deltas :
@@ -440,20 +465,10 @@ def main():
440465 f"Using dynamic threshold with delta: { delta } . Run { i + 1 } of { CONFIDENCE_INTERVALS_ITERATIONS } "
441466 )
442467
443- config = VectorQConfig (
444- enable_cache = True ,
445- is_static_threshold = False ,
446- vector_db = HNSWLibVectorDB (
447- similarity_metric_type = SimilarityMetricType .COSINE ,
448- max_capacity = MAX_VECTOR_DB_CAPACITY ,
449- ),
450- embedding_metadata_storage = InMemoryEmbeddingMetadataStorage (),
451- similarity_evaluator = StringComparisonSimilarityEvaluator (),
452- vectorq_policy = VectorQBayesianPolicy (
453- delta = delta , is_global = False
454- ),
468+ vectorq_policy : VectorQPolicy = DynamicLocalThresholdPolicy (
469+ delta = delta
455470 )
456- vectorQ : VectorQ = VectorQ (config )
471+ vectorQ : VectorQ = VectorQ (vectorq_config , vectorq_policy )
457472
458473 benchmark = Benchmark (vectorQ )
459474 benchmark .filepath = dataset_file
@@ -486,20 +501,10 @@ def main():
486501 f"Using dynamic threshold with delta: { delta } . Run { i + 1 } of { CONFIDENCE_INTERVALS_ITERATIONS } "
487502 )
488503
489- config = VectorQConfig (
490- enable_cache = True ,
491- is_static_threshold = False ,
492- vector_db = HNSWLibVectorDB (
493- similarity_metric_type = SimilarityMetricType .COSINE ,
494- max_capacity = MAX_VECTOR_DB_CAPACITY ,
495- ),
496- embedding_metadata_storage = InMemoryEmbeddingMetadataStorage (),
497- similarity_evaluator = StringComparisonSimilarityEvaluator (),
498- vectorq_policy = VectorQBayesianPolicy (
499- delta = delta , is_global = True
500- ),
504+ vectorq_policy : VectorQPolicy = (
505+ DynamicGlobalThresholdPolicy (delta = delta )
501506 )
502- vectorQ : VectorQ = VectorQ (config )
507+ vectorQ : VectorQ = VectorQ (vectorq_config , vectorq_policy )
503508
504509 benchmark = Benchmark (vectorQ )
505510 benchmark .filepath = dataset_file
@@ -529,18 +534,10 @@ def main():
529534
530535 logging .info (f"Using static threshold: { threshold } " )
531536
532- config = VectorQConfig (
533- enable_cache = True ,
534- is_static_threshold = True ,
535- static_threshold = threshold ,
536- vector_db = HNSWLibVectorDB (
537- similarity_metric_type = SimilarityMetricType .COSINE ,
538- max_capacity = MAX_VECTOR_DB_CAPACITY ,
539- ),
540- embedding_metadata_storage = InMemoryEmbeddingMetadataStorage (),
541- similarity_evaluator = StringComparisonSimilarityEvaluator (),
537+ vectorq_policy : VectorQPolicy = StaticGlobalThresholdPolicy (
538+ threshold = threshold
542539 )
543- vectorQ : VectorQ = VectorQ (config )
540+ vectorQ : VectorQ = VectorQ (vectorq_config , vectorq_policy )
544541
545542 benchmark = Benchmark (vectorQ )
546543 benchmark .filepath = dataset_file
@@ -582,4 +579,4 @@ def main():
582579
583580
584581if __name__ == "__main__" :
585- main ()
582+ main ()
0 commit comments