vcache-project
diff --git a/‎benchmarks/_plotter_combined.py‎
Lines changed: 15 additions & 13 deletions b/‎benchmarks/_plotter_combined.py‎
Lines changed: 15 additions & 13 deletions
diff --git a/‎benchmarks/_plotter_helper.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/_plotter_helper.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/_plotter_individual.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/_plotter_individual.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/benchmark.py‎
Lines changed: 50 additions & 53 deletions b/‎benchmarks/benchmark.py‎
Lines changed: 50 additions & 53 deletions
diff --git a/‎tests/integration/test_1.py‎
Lines changed: 0 additions & 1 deletion b/‎tests/integration/test_1.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎vectorq/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎vectorq/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎vectorq/config.py‎
Lines changed: 2 additions & 2 deletions b/‎vectorq/config.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎vectorq/inference_engine/inference_engine.py‎
Lines changed: 5 additions & 3 deletions b/‎vectorq/inference_engine/inference_engine.py‎
Lines changed: 5 additions & 3 deletions
@@ -210,7 +210,7 @@ def __plot_roc(
     for delta in vectorq_local_deltas:
         if delta == 0.01:
             continue
-        
+
         df = vectorq_local_data_frames[delta]
 
         tpr = compute_recall_score(tp=df["tp_list"], fn=df["fn_list"])
@@ -354,7 +354,7 @@ def __plot_precision_vs_recall(
     for delta in vectorq_local_deltas:
         if delta == 0.01:
             continue
-        
+
         df = vectorq_local_data_frames[delta]
         precision = compute_precision_score(tp=df["tp_list"], fp=df["fp_list"])
         recall = compute_recall_score(tp=df["tp_list"], fn=df["fn_list"])
@@ -450,7 +450,7 @@ def __plot_avg_latency_vs_error_rate(
     static_thresholds = sorted(static_data_frames.keys())
     static_error_rates = []
     static_latencies = []
-    
+
     avg_latency_no_cache = 0.0
 
     for threshold in static_thresholds:
@@ -460,8 +460,10 @@ def __plot_avg_latency_vs_error_rate(
         avg_latency = compute_avg_latency_score(latency_list=df["latency_vectorq_list"])
         static_error_rates.append(error_rate)
         static_latencies.append(avg_latency)
-        
-        avg_latency_no_cache = compute_avg_latency_score(latency_list=df["latency_direct_list"])
+
+        avg_latency_no_cache = compute_avg_latency_score(
+            latency_list=df["latency_direct_list"]
+        )
 
     if static_thresholds:
         plt.plot(
@@ -488,10 +490,10 @@ def __plot_avg_latency_vs_error_rate(
 
         plt.axvline(
             x=avg_latency_no_cache,
-            color='grey',
-            linestyle='--',
+            color="grey",
+            linestyle="--",
             linewidth=2,
-            label='No Cache'
+            label="No Cache",
         )
 
     vectorq_local_deltas = sorted(vectorq_local_data_frames.keys())
@@ -501,7 +503,7 @@ def __plot_avg_latency_vs_error_rate(
     for delta in vectorq_local_deltas:
         if delta == 0.01:
             continue
-        
+
         df = vectorq_local_data_frames[delta]
 
         error_rate = compute_error_rate_score(fp=df["fp_list"])
@@ -523,7 +525,7 @@ def __plot_avg_latency_vs_error_rate(
         for i, _ in enumerate(vectorq_local_latencies):
             if i == 0:
                 continue
-            
+
             if i == 0 or i == len(vectorq_local_deltas) - 1:
                 label = f"{vectorq_local_deltas[i]:.2f}"
                 plt.annotate(
@@ -642,7 +644,7 @@ def __plot_cache_hit_vs_error_rate(
     for delta in vectorq_local_deltas:
         if delta == 0.01:
             continue
-        
+
         df = vectorq_local_data_frames[delta]
 
         cache_hit_rate = compute_cache_hit_rate_score(
@@ -668,7 +670,7 @@ def __plot_cache_hit_vs_error_rate(
         for i, _ in enumerate(vectorq_local_error_rates):
             if i == 0:
                 continue
-            
+
             if i == 0 or i == len(vectorq_local_deltas) - 1:
                 label = f"{vectorq_local_deltas[i]:.2f}"
             else:
@@ -963,4 +965,4 @@ def __plot_delta_accuracy(
 
     filename = results_dir + f"/delta_accuracy_{timestamp}.pdf"
     plt.savefig(filename, format="pdf", bbox_inches="tight")
-    plt.close()
+    plt.close()
@@ -330,4 +330,4 @@ def compute_avg_latency_score(latency_list: pd.DataFrame) -> float:
     Returns:
         avg_latency: float - Average Latency 0.xx
     """
-    return latency_list.mean()
+    return latency_list.mean()
@@ -341,4 +341,4 @@ def __plot_avg_latency_cache_hit_rate_cache_miss_rate(
     filename = benchmark.output_folder_path + f"/statistics_{benchmark.timestamp}.json"
 
     with open(filename, "w") as f:
-        json.dump(statistics, f, indent=4)
+        json.dump(statistics, f, indent=4)
@@ -15,7 +15,13 @@
 from benchmarks._plotter_individual import generate_individual_plots
 from benchmarks.common.comparison import answers_have_same_meaning_static
 from vectorq.config import VectorQConfig
-from vectorq.main import VectorQ, VectorQBenchmark
+from vectorq.main import VectorQ
+from vectorq.vectorq_core.cache.embedding_engine.strategies.benchmark import (
+    BenchmarkEmbeddingEngine,
+)
+from vectorq.inference_engine.strategies.benchmark import (
+    BenchmarkInferenceEngine,
+)
 from vectorq.vectorq_core.cache.embedding_store.embedding_metadata_storage import (
     InMemoryEmbeddingMetadataStorage,
 )
@@ -29,9 +35,16 @@
 from vectorq.vectorq_core.similarity_evaluator.strategies.string_comparison import (
     StringComparisonSimilarityEvaluator,
 )
-from vectorq.vectorq_core.vectorq_policy.strategies.bayesian import (
-    VectorQBayesianPolicy,
+from vectorq.vectorq_policy.strategies.dynamic_global_threshold import (
+    DynamicGlobalThresholdPolicy,
+)
+from vectorq.vectorq_policy.strategies.dynamic_local_threshold import (
+    DynamicLocalThresholdPolicy,
+)
+from vectorq.vectorq_policy.strategies.static_global_threshold import (
+    StaticGlobalThresholdPolicy,
 )
+from vectorq.vectorq_policy.vectorq_policy import VectorQPolicy
 
 repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 results_dir = os.path.join(repo_root, "benchmarks", "results")
@@ -48,7 +61,7 @@
 ########################################################################################################################
 
 # Benchmark Config
-MAX_SAMPLES: int = 45000
+MAX_SAMPLES: int = 10000
 CONFIDENCE_INTERVALS_ITERATIONS: int = 1
 EMBEDDING_MODEL_1 = (
     "embedding_1",
@@ -98,10 +111,14 @@
 ]
 candidate_strategy: str = SIMILARITY_STRATEGY[0]
 
+# static_thresholds = np.array(
+#     [0.76, 0.78, 0.80, 0.82, 0.84, 0.86, 0.88, 0.90, 0.92, 0.94, 0.96]
+# )
 static_thresholds = np.array(
-    [0.76, 0.78, 0.80, 0.82, 0.84, 0.86, 0.88, 0.90, 0.92, 0.94, 0.96]
+    [0.76, 0.78]
 )
-deltas = np.array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1])
+#deltas = np.array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1])
+deltas = np.array([0.01, 0.02])
 
 # VectorQ Config
 MAX_VECTOR_DB_CAPACITY: int = 100000
@@ -162,7 +179,7 @@ def test_run_benchmark(self):
 
                     # 1) Get Data
                     task = data_entry["task"]
-                    output_format = data_entry["output_format"]
+                    system_prompt = data_entry["output_format"]
                     review_text = data_entry["text"]
 
                     emb_generation_latency: float = float(
@@ -186,7 +203,7 @@ def test_run_benchmark(self):
                             review_text=review_text,
                             candidate_embedding=candidate_embedding,
                             label_response=label_response,
-                            output_format=output_format,
+                            system_prompt=system_prompt,
                         )
                     )
                     latency_vectorq: float = (
@@ -271,7 +288,7 @@ def get_vectorQ_answer(
         review_text: str,
         candidate_embedding: List[float],
         label_response: str,
-        output_format: str,
+        system_prompt: str,
     ) -> Tuple[bool, str, str, float]:
         """
         Returns: Tuple[bool, str, str, float] - [is_cache_hit, cache_response, nn_response, latency_vectorq_logic]
@@ -287,17 +304,15 @@ def get_vectorQ_answer(
                 for val in candidate_embedding
             ]
 
-        vectorQ_benchmark = VectorQBenchmark(
-            candidate_embedding=candidate_embedding, candidate_response=label_response
-        )
+        self.vectorq.vectorq_config.embedding_engine.set_next_embedding(candidate_embedding)
+        self.vectorq.vectorq_config.inference_engine.set_next_response(label_response)
 
         vectorQ_prompt = f"{task} {review_text}"
         latency_vectorq_logic: float = time.time()
         try:
-            is_cache_hit, cache_response, nn_response = self.vectorq.create(
+            is_cache_hit, cache_response, nn_response = self.vectorq.infer_with_cache_info(
                 prompt=vectorQ_prompt,
-                output_format=output_format,
-                benchmark=vectorQ_benchmark,
+                system_prompt=system_prompt,
             )
         except Exception as e:
             logging.error(
@@ -422,6 +437,16 @@ def main():
                 )
                 start_time_llm_model = time.time()
 
+                vectorq_config: VectorQConfig = VectorQConfig(
+                    inference_engine=BenchmarkInferenceEngine(),
+                    embedding_engine=BenchmarkEmbeddingEngine(),
+                    vector_db=HNSWLibVectorDB(
+                        similarity_metric_type=SimilarityMetricType.COSINE,
+                        max_capacity=MAX_VECTOR_DB_CAPACITY,
+                    ),
+                    embedding_metadata_storage=InMemoryEmbeddingMetadataStorage()
+                )
+
                 # Baseline 1) Dynamic thresholds (VectorQ, Local)
                 if SYSTEM_TYPE in ["dynamic_local", "all"]:
                     for delta in deltas:
@@ -440,20 +465,10 @@ def main():
                                 f"Using dynamic threshold with delta: {delta}. Run {i + 1} of {CONFIDENCE_INTERVALS_ITERATIONS}"
                             )
 
-                            config = VectorQConfig(
-                                enable_cache=True,
-                                is_static_threshold=False,
-                                vector_db=HNSWLibVectorDB(
-                                    similarity_metric_type=SimilarityMetricType.COSINE,
-                                    max_capacity=MAX_VECTOR_DB_CAPACITY,
-                                ),
-                                embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
-                                similarity_evaluator=StringComparisonSimilarityEvaluator(),
-                                vectorq_policy=VectorQBayesianPolicy(
-                                    delta=delta, is_global=False
-                                ),
+                            vectorq_policy: VectorQPolicy = DynamicLocalThresholdPolicy(
+                                delta=delta
                             )
-                            vectorQ: VectorQ = VectorQ(config)
+                            vectorQ: VectorQ = VectorQ(vectorq_config, vectorq_policy)
 
                             benchmark = Benchmark(vectorQ)
                             benchmark.filepath = dataset_file
@@ -486,20 +501,10 @@ def main():
                                 f"Using dynamic threshold with delta: {delta}. Run {i + 1} of {CONFIDENCE_INTERVALS_ITERATIONS}"
                             )
 
-                            config = VectorQConfig(
-                                enable_cache=True,
-                                is_static_threshold=False,
-                                vector_db=HNSWLibVectorDB(
-                                    similarity_metric_type=SimilarityMetricType.COSINE,
-                                    max_capacity=MAX_VECTOR_DB_CAPACITY,
-                                ),
-                                embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
-                                similarity_evaluator=StringComparisonSimilarityEvaluator(),
-                                vectorq_policy=VectorQBayesianPolicy(
-                                    delta=delta, is_global=True
-                                ),
+                            vectorq_policy: VectorQPolicy = (
+                                DynamicGlobalThresholdPolicy(delta=delta)
                             )
-                            vectorQ: VectorQ = VectorQ(config)
+                            vectorQ: VectorQ = VectorQ(vectorq_config, vectorq_policy)
 
                             benchmark = Benchmark(vectorQ)
                             benchmark.filepath = dataset_file
@@ -529,18 +534,10 @@ def main():
 
                         logging.info(f"Using static threshold: {threshold}")
 
-                        config = VectorQConfig(
-                            enable_cache=True,
-                            is_static_threshold=True,
-                            static_threshold=threshold,
-                            vector_db=HNSWLibVectorDB(
-                                similarity_metric_type=SimilarityMetricType.COSINE,
-                                max_capacity=MAX_VECTOR_DB_CAPACITY,
-                            ),
-                            embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
-                            similarity_evaluator=StringComparisonSimilarityEvaluator(),
+                        vectorq_policy: VectorQPolicy = StaticGlobalThresholdPolicy(
+                            threshold=threshold
                         )
-                        vectorQ: VectorQ = VectorQ(config)
+                        vectorQ: VectorQ = VectorQ(vectorq_config, vectorq_policy)
 
                         benchmark = Benchmark(vectorQ)
                         benchmark.filepath = dataset_file
@@ -582,4 +579,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
@@ -58,4 +58,3 @@ class TestVectorQIntegration(unittest.TestCase):
 if __name__ == "__main__":
     # unittest.main()
     pass
-
@@ -49,6 +49,7 @@
 
 # VectorQ Policies
 from vectorq.vectorq_policy import (
+    DynamicGlobalThresholdPolicy,
     DynamicLocalThresholdPolicy,
     NoCachePolicy,
     StaticGlobalThresholdPolicy,
@@ -85,6 +86,7 @@
     # VectorQ Policies
     "VectorQPolicy",
     "DynamicLocalThresholdPolicy",
+    "DynamicGlobalThresholdPolicy",
     "StaticGlobalThresholdPolicy",
     "NoCachePolicy",
 ]
@@ -15,7 +15,7 @@
     HNSWLibVectorDB,
 )
 from vectorq.vectorq_core.cache.eviction_policy.eviction_policy import EvictionPolicy
-from vectorq.vectorq_core.cache.eviction_policy.strategies.lru import LRUEvictionPolicy
+from vectorq.vectorq_core.cache.eviction_policy.strategies.no_eviction import NoEvictionPolicy
 
 
 class VectorQConfig:
@@ -30,7 +30,7 @@ def __init__(
         embedding_engine: EmbeddingEngine = OpenAIEmbeddingEngine(),
         vector_db: VectorDB = HNSWLibVectorDB(),
         embedding_metadata_storage: EmbeddingMetadataStorage = InMemoryEmbeddingMetadataStorage(),
-        eviction_policy: EvictionPolicy = LRUEvictionPolicy(),
+        eviction_policy: EvictionPolicy = NoEvictionPolicy(),
         system_prompt: Optional[str] = None,
     ):
         self.inference_engine = inference_engine
 
@@ -9,8 +9,10 @@ class InferenceEngine(ABC):
     @abstractmethod
     def create(self, prompt: str, system_prompt: str = None) -> str:
         """
-        prompt: str - The prompt to create an answer for
-        output_format: str - The optional output format to use for the response
-        returns: str - The answer to the prompt
+        Args
+            prompt: str - The prompt to create an answer for
+            system_prompt: str - The optional output format to use for the response
+        Returns
+            str - The answer to the prompt
         """
         pass