Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ benchmarks/results/*
temp/*
*.log
bin/*
.venv/
.venv/
.env
4 changes: 2 additions & 2 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,9 @@ def get_vectorQ_answer(
vectorQ_prompt = f"{task} {review_text}"
latency_vectorq_logic: float = time.time()
try:
is_cache_hit, cache_response, nn_response = self.vectorq.create(
is_cache_hit, cache_response, nn_response = self.vectorq.infer(
prompt=vectorQ_prompt,
output_format=output_format,
system_prompt=output_format,
benchmark=vectorQ_benchmark,
)
except Exception as e:
Expand Down
80 changes: 75 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies = [
"accelerate (>=1.6.0,<2.0.0)",
"typing-extensions (>=4.13.2,<5.0.0)",
"torchvision (>=0.22.0,<0.23.0)",
"statsmodels (>=0.14.4,<0.15.0)",
]


Expand All @@ -44,6 +45,7 @@ ruff = "^0.11.6"
mypy = "^1.15.0"
pre-commit = "^4.2.0"
pytest = "^8.0.0"
python-dotenv = "^1.1.0"


[tool.ruff]
Expand Down
26 changes: 4 additions & 22 deletions tests/ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,9 @@ The unit tests are supposed to soley test the logic of an invidual module strate
## Integration Tests
The integration tests are supposed to test the combination and interaction of all module strategies.

## Run All Tests
### Run Integration Tests
Set `OPEN_AI_APIKEY` in `.env`, and run:

```bash
pip install -e .
```

```bash
export OPENAI_API_KEY="your_api_key_here"
```

```bash
python3 runner.py
```

## Run Individual Tests

```bash
pytest unit/VectorDBStrategy/test.py
```

With print terminal output enabled
```bash
pytest -vs unit/VectorDBStrategy/test.py
```base
poetry run pytest tests/integration
```
60 changes: 0 additions & 60 deletions tests/integration/test_1.py

This file was deleted.

94 changes: 94 additions & 0 deletions tests/integration/test_dynamic_threshold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import unittest

from dotenv import load_dotenv

from vectorq import (
DynamicThresholdPolicy,
HNSWLibVectorDB,
InMemoryEmbeddingMetadataStorage,
LangChainEmbeddingEngine,
OpenAIInferenceEngine,
StringComparisonSimilarityEvaluator,
VectorQ,
VectorQConfig,
)

load_dotenv()


def create_default_config_and_policy():
config = VectorQConfig(
inference_engine=OpenAIInferenceEngine(
model_name="gpt-4.1-nano-2025-04-14",
temperature=0.0,
),
embedding_engine=LangChainEmbeddingEngine(
model_name="sentence-transformers/all-mpnet-base-v2"
),
vector_db=HNSWLibVectorDB(),
embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
system_prompt="Please answer in a single word with the first letter capitalized. Example: London",
)
policy = DynamicThresholdPolicy(
delta=0.05,
is_global=False,
similarity_evaluator=StringComparisonSimilarityEvaluator(),
)
return config, policy


class TestVectorQDynamicThreshold(unittest.TestCase):
def test_basic_functionality(self):
"""Test that the cache correctly identifies hits and misses."""
config, policy = create_default_config_and_policy()
vectorq = VectorQ(config, policy)

# First request should be a miss
cache_hit, response, _ = vectorq.infer_with_cache_info(
prompt="What is the capital of France?"
)
self.assertFalse(cache_hit, "First request should be a cache miss")
self.assertTrue(len(response) > 0, "Response should not be empty")

# The 2nd to 5th request should be miss because it's still adjusting the threshold
cache_hit, response, _ = vectorq.infer_with_cache_info(
prompt="What's France's capital city?"
)
self.assertFalse(cache_hit, "Second request should be a cache miss")
self.assertTrue(len(response) > 0, "Response should not be empty")
cache_hit, response, _ = vectorq.infer_with_cache_info(
prompt="France's capital city is called what?"
)
self.assertFalse(cache_hit, "Identical request should be a cache hit")
self.assertTrue(len(response) > 0, "Response should not be empty")
cache_hit, response, _ = vectorq.infer_with_cache_info(
prompt="Tell me the capital city of France"
)
cache_hit, response, _ = vectorq.infer_with_cache_info(
prompt="Which city is the capital of France?"
)

# After several tries with the Bayesian policy, we should now get a hit
cache_hit, response, _ = vectorq.infer_with_cache_info(
prompt="The capital of France is?"
)
self.assertTrue(cache_hit, "Similar request should now be a cache hit")
self.assertTrue(len(response) > 0, "Response should not be empty")

cache_hit, response, _ = vectorq.infer_with_cache_info(
prompt="Can you tell me what the capital of France is?"
)
self.assertTrue(cache_hit, "Similar request should now be a cache hit")
self.assertTrue(len(response) > 0, "Response should not be empty")

def test_high_delta(self):
# TODO: Implement this
self.assertTrue(True)

def test_low_delta(self):
# TODO: Implement this
self.assertTrue(True)


if __name__ == "__main__":
unittest.main()
Loading