Skip to content

Commit 6f6c9ca

Browse files
Implemented benchmark logic
1 parent dbf346d commit 6f6c9ca

File tree

15 files changed

+119
-97
lines changed

15 files changed

+119
-97
lines changed

benchmarks/_plotter_combined.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def __plot_roc(
210210
for delta in vectorq_local_deltas:
211211
if delta == 0.01:
212212
continue
213-
213+
214214
df = vectorq_local_data_frames[delta]
215215

216216
tpr = compute_recall_score(tp=df["tp_list"], fn=df["fn_list"])
@@ -354,7 +354,7 @@ def __plot_precision_vs_recall(
354354
for delta in vectorq_local_deltas:
355355
if delta == 0.01:
356356
continue
357-
357+
358358
df = vectorq_local_data_frames[delta]
359359
precision = compute_precision_score(tp=df["tp_list"], fp=df["fp_list"])
360360
recall = compute_recall_score(tp=df["tp_list"], fn=df["fn_list"])
@@ -450,7 +450,7 @@ def __plot_avg_latency_vs_error_rate(
450450
static_thresholds = sorted(static_data_frames.keys())
451451
static_error_rates = []
452452
static_latencies = []
453-
453+
454454
avg_latency_no_cache = 0.0
455455

456456
for threshold in static_thresholds:
@@ -460,8 +460,10 @@ def __plot_avg_latency_vs_error_rate(
460460
avg_latency = compute_avg_latency_score(latency_list=df["latency_vectorq_list"])
461461
static_error_rates.append(error_rate)
462462
static_latencies.append(avg_latency)
463-
464-
avg_latency_no_cache = compute_avg_latency_score(latency_list=df["latency_direct_list"])
463+
464+
avg_latency_no_cache = compute_avg_latency_score(
465+
latency_list=df["latency_direct_list"]
466+
)
465467

466468
if static_thresholds:
467469
plt.plot(
@@ -488,10 +490,10 @@ def __plot_avg_latency_vs_error_rate(
488490

489491
plt.axvline(
490492
x=avg_latency_no_cache,
491-
color='grey',
492-
linestyle='--',
493+
color="grey",
494+
linestyle="--",
493495
linewidth=2,
494-
label='No Cache'
496+
label="No Cache",
495497
)
496498

497499
vectorq_local_deltas = sorted(vectorq_local_data_frames.keys())
@@ -501,7 +503,7 @@ def __plot_avg_latency_vs_error_rate(
501503
for delta in vectorq_local_deltas:
502504
if delta == 0.01:
503505
continue
504-
506+
505507
df = vectorq_local_data_frames[delta]
506508

507509
error_rate = compute_error_rate_score(fp=df["fp_list"])
@@ -523,7 +525,7 @@ def __plot_avg_latency_vs_error_rate(
523525
for i, _ in enumerate(vectorq_local_latencies):
524526
if i == 0:
525527
continue
526-
528+
527529
if i == 0 or i == len(vectorq_local_deltas) - 1:
528530
label = f"{vectorq_local_deltas[i]:.2f}"
529531
plt.annotate(
@@ -642,7 +644,7 @@ def __plot_cache_hit_vs_error_rate(
642644
for delta in vectorq_local_deltas:
643645
if delta == 0.01:
644646
continue
645-
647+
646648
df = vectorq_local_data_frames[delta]
647649

648650
cache_hit_rate = compute_cache_hit_rate_score(
@@ -668,7 +670,7 @@ def __plot_cache_hit_vs_error_rate(
668670
for i, _ in enumerate(vectorq_local_error_rates):
669671
if i == 0:
670672
continue
671-
673+
672674
if i == 0 or i == len(vectorq_local_deltas) - 1:
673675
label = f"{vectorq_local_deltas[i]:.2f}"
674676
else:
@@ -963,4 +965,4 @@ def __plot_delta_accuracy(
963965

964966
filename = results_dir + f"/delta_accuracy_{timestamp}.pdf"
965967
plt.savefig(filename, format="pdf", bbox_inches="tight")
966-
plt.close()
968+
plt.close()

benchmarks/_plotter_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,4 +330,4 @@ def compute_avg_latency_score(latency_list: pd.DataFrame) -> float:
330330
Returns:
331331
avg_latency: float - Average Latency 0.xx
332332
"""
333-
return latency_list.mean()
333+
return latency_list.mean()

benchmarks/_plotter_individual.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,4 +341,4 @@ def __plot_avg_latency_cache_hit_rate_cache_miss_rate(
341341
filename = benchmark.output_folder_path + f"/statistics_{benchmark.timestamp}.json"
342342

343343
with open(filename, "w") as f:
344-
json.dump(statistics, f, indent=4)
344+
json.dump(statistics, f, indent=4)

benchmarks/benchmark.py

Lines changed: 50 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,13 @@
1515
from benchmarks._plotter_individual import generate_individual_plots
1616
from benchmarks.common.comparison import answers_have_same_meaning_static
1717
from vectorq.config import VectorQConfig
18-
from vectorq.main import VectorQ, VectorQBenchmark
18+
from vectorq.main import VectorQ
19+
from vectorq.vectorq_core.cache.embedding_engine.strategies.benchmark import (
20+
BenchmarkEmbeddingEngine,
21+
)
22+
from vectorq.inference_engine.strategies.benchmark import (
23+
BenchmarkInferenceEngine,
24+
)
1925
from vectorq.vectorq_core.cache.embedding_store.embedding_metadata_storage import (
2026
InMemoryEmbeddingMetadataStorage,
2127
)
@@ -29,9 +35,16 @@
2935
from vectorq.vectorq_core.similarity_evaluator.strategies.string_comparison import (
3036
StringComparisonSimilarityEvaluator,
3137
)
32-
from vectorq.vectorq_core.vectorq_policy.strategies.bayesian import (
33-
VectorQBayesianPolicy,
38+
from vectorq.vectorq_policy.strategies.dynamic_global_threshold import (
39+
DynamicGlobalThresholdPolicy,
40+
)
41+
from vectorq.vectorq_policy.strategies.dynamic_local_threshold import (
42+
DynamicLocalThresholdPolicy,
43+
)
44+
from vectorq.vectorq_policy.strategies.static_global_threshold import (
45+
StaticGlobalThresholdPolicy,
3446
)
47+
from vectorq.vectorq_policy.vectorq_policy import VectorQPolicy
3548

3649
repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
3750
results_dir = os.path.join(repo_root, "benchmarks", "results")
@@ -48,7 +61,7 @@
4861
########################################################################################################################
4962

5063
# Benchmark Config
51-
MAX_SAMPLES: int = 45000
64+
MAX_SAMPLES: int = 10000
5265
CONFIDENCE_INTERVALS_ITERATIONS: int = 1
5366
EMBEDDING_MODEL_1 = (
5467
"embedding_1",
@@ -98,10 +111,14 @@
98111
]
99112
candidate_strategy: str = SIMILARITY_STRATEGY[0]
100113

114+
# static_thresholds = np.array(
115+
# [0.76, 0.78, 0.80, 0.82, 0.84, 0.86, 0.88, 0.90, 0.92, 0.94, 0.96]
116+
# )
101117
static_thresholds = np.array(
102-
[0.76, 0.78, 0.80, 0.82, 0.84, 0.86, 0.88, 0.90, 0.92, 0.94, 0.96]
118+
[0.76, 0.78]
103119
)
104-
deltas = np.array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1])
120+
#deltas = np.array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1])
121+
deltas = np.array([0.01, 0.02])
105122

106123
# VectorQ Config
107124
MAX_VECTOR_DB_CAPACITY: int = 100000
@@ -162,7 +179,7 @@ def test_run_benchmark(self):
162179

163180
# 1) Get Data
164181
task = data_entry["task"]
165-
output_format = data_entry["output_format"]
182+
system_prompt = data_entry["output_format"]
166183
review_text = data_entry["text"]
167184

168185
emb_generation_latency: float = float(
@@ -186,7 +203,7 @@ def test_run_benchmark(self):
186203
review_text=review_text,
187204
candidate_embedding=candidate_embedding,
188205
label_response=label_response,
189-
output_format=output_format,
206+
system_prompt=system_prompt,
190207
)
191208
)
192209
latency_vectorq: float = (
@@ -271,7 +288,7 @@ def get_vectorQ_answer(
271288
review_text: str,
272289
candidate_embedding: List[float],
273290
label_response: str,
274-
output_format: str,
291+
system_prompt: str,
275292
) -> Tuple[bool, str, str, float]:
276293
"""
277294
Returns: Tuple[bool, str, str, float] - [is_cache_hit, cache_response, nn_response, latency_vectorq_logic]
@@ -287,17 +304,15 @@ def get_vectorQ_answer(
287304
for val in candidate_embedding
288305
]
289306

290-
vectorQ_benchmark = VectorQBenchmark(
291-
candidate_embedding=candidate_embedding, candidate_response=label_response
292-
)
307+
self.vectorq.vectorq_config.embedding_engine.set_next_embedding(candidate_embedding)
308+
self.vectorq.vectorq_config.inference_engine.set_next_response(label_response)
293309

294310
vectorQ_prompt = f"{task} {review_text}"
295311
latency_vectorq_logic: float = time.time()
296312
try:
297-
is_cache_hit, cache_response, nn_response = self.vectorq.create(
313+
is_cache_hit, cache_response, nn_response = self.vectorq.infer_with_cache_info(
298314
prompt=vectorQ_prompt,
299-
output_format=output_format,
300-
benchmark=vectorQ_benchmark,
315+
system_prompt=system_prompt,
301316
)
302317
except Exception as e:
303318
logging.error(
@@ -422,6 +437,16 @@ def main():
422437
)
423438
start_time_llm_model = time.time()
424439

440+
vectorq_config: VectorQConfig = VectorQConfig(
441+
inference_engine=BenchmarkInferenceEngine(),
442+
embedding_engine=BenchmarkEmbeddingEngine(),
443+
vector_db=HNSWLibVectorDB(
444+
similarity_metric_type=SimilarityMetricType.COSINE,
445+
max_capacity=MAX_VECTOR_DB_CAPACITY,
446+
),
447+
embedding_metadata_storage=InMemoryEmbeddingMetadataStorage()
448+
)
449+
425450
# Baseline 1) Dynamic thresholds (VectorQ, Local)
426451
if SYSTEM_TYPE in ["dynamic_local", "all"]:
427452
for delta in deltas:
@@ -440,20 +465,10 @@ def main():
440465
f"Using dynamic threshold with delta: {delta}. Run {i + 1} of {CONFIDENCE_INTERVALS_ITERATIONS}"
441466
)
442467

443-
config = VectorQConfig(
444-
enable_cache=True,
445-
is_static_threshold=False,
446-
vector_db=HNSWLibVectorDB(
447-
similarity_metric_type=SimilarityMetricType.COSINE,
448-
max_capacity=MAX_VECTOR_DB_CAPACITY,
449-
),
450-
embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
451-
similarity_evaluator=StringComparisonSimilarityEvaluator(),
452-
vectorq_policy=VectorQBayesianPolicy(
453-
delta=delta, is_global=False
454-
),
468+
vectorq_policy: VectorQPolicy = DynamicLocalThresholdPolicy(
469+
delta=delta
455470
)
456-
vectorQ: VectorQ = VectorQ(config)
471+
vectorQ: VectorQ = VectorQ(vectorq_config, vectorq_policy)
457472

458473
benchmark = Benchmark(vectorQ)
459474
benchmark.filepath = dataset_file
@@ -486,20 +501,10 @@ def main():
486501
f"Using dynamic threshold with delta: {delta}. Run {i + 1} of {CONFIDENCE_INTERVALS_ITERATIONS}"
487502
)
488503

489-
config = VectorQConfig(
490-
enable_cache=True,
491-
is_static_threshold=False,
492-
vector_db=HNSWLibVectorDB(
493-
similarity_metric_type=SimilarityMetricType.COSINE,
494-
max_capacity=MAX_VECTOR_DB_CAPACITY,
495-
),
496-
embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
497-
similarity_evaluator=StringComparisonSimilarityEvaluator(),
498-
vectorq_policy=VectorQBayesianPolicy(
499-
delta=delta, is_global=True
500-
),
504+
vectorq_policy: VectorQPolicy = (
505+
DynamicGlobalThresholdPolicy(delta=delta)
501506
)
502-
vectorQ: VectorQ = VectorQ(config)
507+
vectorQ: VectorQ = VectorQ(vectorq_config, vectorq_policy)
503508

504509
benchmark = Benchmark(vectorQ)
505510
benchmark.filepath = dataset_file
@@ -529,18 +534,10 @@ def main():
529534

530535
logging.info(f"Using static threshold: {threshold}")
531536

532-
config = VectorQConfig(
533-
enable_cache=True,
534-
is_static_threshold=True,
535-
static_threshold=threshold,
536-
vector_db=HNSWLibVectorDB(
537-
similarity_metric_type=SimilarityMetricType.COSINE,
538-
max_capacity=MAX_VECTOR_DB_CAPACITY,
539-
),
540-
embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
541-
similarity_evaluator=StringComparisonSimilarityEvaluator(),
537+
vectorq_policy: VectorQPolicy = StaticGlobalThresholdPolicy(
538+
threshold=threshold
542539
)
543-
vectorQ: VectorQ = VectorQ(config)
540+
vectorQ: VectorQ = VectorQ(vectorq_config, vectorq_policy)
544541

545542
benchmark = Benchmark(vectorQ)
546543
benchmark.filepath = dataset_file
@@ -582,4 +579,4 @@ def main():
582579

583580

584581
if __name__ == "__main__":
585-
main()
582+
main()

tests/integration/test_1.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,3 @@ class TestVectorQIntegration(unittest.TestCase):
5858
if __name__ == "__main__":
5959
# unittest.main()
6060
pass
61-

vectorq/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949

5050
# VectorQ Policies
5151
from vectorq.vectorq_policy import (
52+
DynamicGlobalThresholdPolicy,
5253
DynamicLocalThresholdPolicy,
5354
NoCachePolicy,
5455
StaticGlobalThresholdPolicy,
@@ -85,6 +86,7 @@
8586
# VectorQ Policies
8687
"VectorQPolicy",
8788
"DynamicLocalThresholdPolicy",
89+
"DynamicGlobalThresholdPolicy",
8890
"StaticGlobalThresholdPolicy",
8991
"NoCachePolicy",
9092
]

vectorq/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
HNSWLibVectorDB,
1616
)
1717
from vectorq.vectorq_core.cache.eviction_policy.eviction_policy import EvictionPolicy
18-
from vectorq.vectorq_core.cache.eviction_policy.strategies.lru import LRUEvictionPolicy
18+
from vectorq.vectorq_core.cache.eviction_policy.strategies.no_eviction import NoEvictionPolicy
1919

2020

2121
class VectorQConfig:
@@ -30,7 +30,7 @@ def __init__(
3030
embedding_engine: EmbeddingEngine = OpenAIEmbeddingEngine(),
3131
vector_db: VectorDB = HNSWLibVectorDB(),
3232
embedding_metadata_storage: EmbeddingMetadataStorage = InMemoryEmbeddingMetadataStorage(),
33-
eviction_policy: EvictionPolicy = LRUEvictionPolicy(),
33+
eviction_policy: EvictionPolicy = NoEvictionPolicy(),
3434
system_prompt: Optional[str] = None,
3535
):
3636
self.inference_engine = inference_engine

vectorq/inference_engine/inference_engine.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ class InferenceEngine(ABC):
99
@abstractmethod
1010
def create(self, prompt: str, system_prompt: str = None) -> str:
1111
"""
12-
prompt: str - The prompt to create an answer for
13-
output_format: str - The optional output format to use for the response
14-
returns: str - The answer to the prompt
12+
Args
13+
prompt: str - The prompt to create an answer for
14+
system_prompt: str - The optional output format to use for the response
15+
Returns
16+
str - The answer to the prompt
1517
"""
1618
pass

0 commit comments

Comments
 (0)