vcache-project · luis-gasparschroeder · May 5, 2025 · May 5, 2025 · May 5, 2025 · May 5, 2025
diff --git a/.gitignore b/.gitignore
@@ -12,4 +12,5 @@ benchmarks/results/*
 temp/*
 *.log
 bin/*
-.venv/
+.venv/
+.env
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
@@ -291,9 +291,9 @@ def get_vectorQ_answer(
         vectorQ_prompt = f"{task} {review_text}"
         latency_vectorq_logic: float = time.time()
         try:
-            is_cache_hit, cache_response, nn_response = self.vectorq.create(
+            is_cache_hit, cache_response, nn_response = self.vectorq.infer(
                 prompt=vectorQ_prompt,
-                output_format=output_format,
+                system_prompt=output_format,
                 benchmark=vectorQ_benchmark,
             )
         except Exception as e:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,6 +22,7 @@ dependencies = [
     "accelerate (>=1.6.0,<2.0.0)",
     "typing-extensions (>=4.13.2,<5.0.0)",
     "torchvision (>=0.22.0,<0.23.0)",
+    "statsmodels (>=0.14.4,<0.15.0)",
 ]
 
 
@@ -44,6 +45,7 @@ ruff = "^0.11.6"
 mypy = "^1.15.0"
 pre-commit = "^4.2.0"
 pytest = "^8.0.0"
+python-dotenv = "^1.1.0"
 
 
 [tool.ruff]

diff --git a/tests/ReadMe.md b/tests/ReadMe.md
@@ -6,27 +6,9 @@ The unit tests are supposed to soley test the logic of an invidual module strate
 ## Integration Tests
 The integration tests are supposed to test the combination and interaction of all module strategies.
 
-## Run All Tests
+### Run Integration Tests
+Set `OPEN_AI_APIKEY` in `.env`, and run:
 
-```bash
-pip install -e .
-```
-
-```bash
-export OPENAI_API_KEY="your_api_key_here"
-```
-
-```bash
-python3 runner.py
-```
-
-## Run Individual Tests
-
-```bash
-pytest unit/VectorDBStrategy/test.py
-```
-
-With print terminal output enabled
-```bash
-pytest -vs unit/VectorDBStrategy/test.py
+```base
+poetry run pytest tests/integration
 ```
diff --git a/tests/integration/test_1.py b/tests/integration/test_1.py
diff --git a/tests/integration/test_dynamic_threshold.py b/tests/integration/test_dynamic_threshold.py
@@ -0,0 +1,94 @@
+import unittest
+
+from dotenv import load_dotenv
+
+from vectorq import (
+    DynamicThresholdPolicy,
+    HNSWLibVectorDB,
+    InMemoryEmbeddingMetadataStorage,
+    LangChainEmbeddingEngine,
+    OpenAIInferenceEngine,
+    StringComparisonSimilarityEvaluator,
+    VectorQ,
+    VectorQConfig,
+)
+
+load_dotenv()
+
+
+def create_default_config_and_policy():
+    config = VectorQConfig(
+        inference_engine=OpenAIInferenceEngine(
+            model_name="gpt-4.1-nano-2025-04-14",
+            temperature=0.0,
+        ),
+        embedding_engine=LangChainEmbeddingEngine(
+            model_name="sentence-transformers/all-mpnet-base-v2"
+        ),
+        vector_db=HNSWLibVectorDB(),
+        embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
+        system_prompt="Please answer in a single word with the first letter capitalized. Example: London",
+    )
+    policy = DynamicThresholdPolicy(
+        delta=0.05,
+        is_global=False,
+        similarity_evaluator=StringComparisonSimilarityEvaluator(),
+    )
+    return config, policy
+
+
+class TestVectorQDynamicThreshold(unittest.TestCase):
+    def test_basic_functionality(self):
+        """Test that the cache correctly identifies hits and misses."""
+        config, policy = create_default_config_and_policy()
+        vectorq = VectorQ(config, policy)
+
+        # First request should be a miss
+        cache_hit, response, _ = vectorq.infer_with_cache_info(
+            prompt="What is the capital of France?"
+        )
+        self.assertFalse(cache_hit, "First request should be a cache miss")
+        self.assertTrue(len(response) > 0, "Response should not be empty")
+
+        # The 2nd to 5th request should be miss because it's still adjusting the threshold
+        cache_hit, response, _ = vectorq.infer_with_cache_info(
+            prompt="What's France's capital city?"
+        )
+        self.assertFalse(cache_hit, "Second request should be a cache miss")
+        self.assertTrue(len(response) > 0, "Response should not be empty")
+        cache_hit, response, _ = vectorq.infer_with_cache_info(
+            prompt="France's capital city is called what?"
+        )
+        self.assertFalse(cache_hit, "Identical request should be a cache hit")
+        self.assertTrue(len(response) > 0, "Response should not be empty")
+        cache_hit, response, _ = vectorq.infer_with_cache_info(
+            prompt="Tell me the capital city of France"
+        )
+        cache_hit, response, _ = vectorq.infer_with_cache_info(
+            prompt="Which city is the capital of France?"
+        )
+
+        # After several tries with the Bayesian policy, we should now get a hit
+        cache_hit, response, _ = vectorq.infer_with_cache_info(
+            prompt="The capital of France is?"
+        )
+        self.assertTrue(cache_hit, "Similar request should now be a cache hit")
+        self.assertTrue(len(response) > 0, "Response should not be empty")
+
+        cache_hit, response, _ = vectorq.infer_with_cache_info(
+            prompt="Can you tell me what the capital of France is?"
+        )
+        self.assertTrue(cache_hit, "Similar request should now be a cache hit")
+        self.assertTrue(len(response) > 0, "Response should not be empty")
+
+    def test_high_delta(self):
+        # TODO: Implement this
+        self.assertTrue(True)
+
+    def test_low_delta(self):
+        # TODO: Implement this
+        self.assertTrue(True)
+
+
+if __name__ == "__main__":
+    unittest.main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,4 +12,5 @@ benchmarks/results/* @@
     temp/*
     *.log
     bin/*
-    .venv/
+    .venv/
+    .env