Skip to content

Commit 6064c72

Browse files
committed
perf: optimize compaction by using lazy deserialization to avoid make_static overhead
1 parent e2c2b5a commit 6064c72

File tree

2 files changed

+129
-49
lines changed

2 files changed

+129
-49
lines changed

src/jstable.rs

Lines changed: 123 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,17 @@ use std::fs::File;
77
use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
88
use xorf::BinaryFuse8;
99

10+
#[derive(Clone, Debug)]
11+
pub enum StoredValue {
12+
Static(Value),
13+
Lazy(LazyDocument),
14+
}
15+
1016
pub struct JSTable {
1117
pub timestamp: u64,
1218
pub collection: String,
1319
pub schema: Schema,
14-
pub documents: BTreeMap<String, Value>,
20+
pub documents: BTreeMap<String, StoredValue>,
1521
}
1622

1723
#[derive(Serialize, Deserialize)]
@@ -26,7 +32,7 @@ impl JSTable {
2632
timestamp: u64,
2733
collection: String,
2834
schema: Schema,
29-
documents: BTreeMap<String, Value>,
35+
documents: BTreeMap<String, StoredValue>,
3036
) -> Self {
3137
JSTable {
3238
timestamp,
@@ -84,19 +90,28 @@ impl JSTable {
8490
let mut bytes_since_last_index: u64 = 0;
8591
let mut first = true;
8692

87-
for (id, doc) in &self.documents {
93+
for (id, val) in &self.documents {
8894
// Add index entry if needed
8995
if first || bytes_since_last_index >= index_threshold {
9096
index.push((id.clone(), current_offset));
9197
bytes_since_last_index = 0;
9298
first = false;
9399
}
94100

95-
// Use SerdeWrapper to serialize jsonb Value via serde infrastructure
96-
let record = (id.clone(), SerdeWrapper(doc));
97-
let record_blob = jsonb_schema::to_owned_jsonb(&record)
98-
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
99-
let record_bytes = record_blob.to_vec();
101+
let record_bytes = match val {
102+
StoredValue::Static(doc) => {
103+
// Use SerdeWrapper to serialize jsonb Value via serde infrastructure
104+
let record = (id.clone(), SerdeWrapper(doc));
105+
let record_blob = jsonb_schema::to_owned_jsonb(&record)
106+
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
107+
record_blob.to_vec()
108+
}
109+
StoredValue::Lazy(doc) => {
110+
// LazyDocument.raw is already the serialized [id, doc] tuple
111+
doc.raw.clone()
112+
}
113+
};
114+
100115
let record_len = record_bytes.len() as u32;
101116

102117
data_file.write_all(&record_len.to_le_bytes())?;
@@ -281,15 +296,15 @@ impl Iterator for JSTableIterator {
281296
}
282297

283298
pub fn read_jstable(path: &str) -> io::Result<JSTable> {
284-
let iterator = JSTableIterator::new(path)?;
285-
let timestamp = iterator.timestamp();
286-
let collection = iterator.collection().to_string();
287-
let schema = iterator.schema().clone();
299+
let iterator = JSTableLazyIterator::new(path)?;
300+
let timestamp = iterator.timestamp;
301+
let collection = iterator.collection.clone();
302+
let schema = iterator.schema.clone();
288303

289304
let mut documents = BTreeMap::new();
290305
for result in iterator {
291-
let (id, doc) = result?;
292-
documents.insert(id, doc);
306+
let lazy_doc = result?;
307+
documents.insert(lazy_doc.id.clone(), StoredValue::Lazy(lazy_doc));
293308
}
294309

295310
Ok(JSTable {
@@ -398,7 +413,10 @@ pub fn merge_jstables(mut tables: Vec<JSTable>) -> JSTable {
398413

399414
// Filter nulls (tombstones) - Value::Null matches jsonb Null
400415
use jsonb_schema::Value as JsonbValue;
401-
merged_documents.retain(|_, v| !matches!(v, JsonbValue::Null));
416+
merged_documents.retain(|_, v| match v {
417+
StoredValue::Static(s) => !matches!(s, JsonbValue::Null),
418+
StoredValue::Lazy(l) => !l.is_tombstone(),
419+
});
402420

403421
JSTable::new(max_timestamp, collection, merged_schema, merged_documents)
404422
}
@@ -428,8 +446,14 @@ mod tests {
428446
Schema::new(InstanceType::Integer),
429447
)]));
430448
let mut documents = BTreeMap::new();
431-
documents.insert("id1".to_string(), serde_to_jsonb(json!({"a": 1})));
432-
documents.insert("id2".to_string(), serde_to_jsonb(json!({"a": 2})));
449+
documents.insert(
450+
"id1".to_string(),
451+
StoredValue::Static(serde_to_jsonb(json!({"a": 1}))),
452+
);
453+
documents.insert(
454+
"id2".to_string(),
455+
StoredValue::Static(serde_to_jsonb(json!({"a": 2}))),
456+
);
433457
let jstable = JSTable::new(
434458
12345,
435459
"test_col".to_string(),
@@ -447,13 +471,15 @@ mod tests {
447471
assert_eq!(read_table.collection, "test_col");
448472
assert_eq!(get_types(&read_table.schema), vec![InstanceType::Object]);
449473
assert_eq!(read_table.documents.len(), 2);
450-
// Compare values
451-
let v1 = read_table.documents.get("id1").unwrap();
452-
// convert to serde for easy comparison
453-
assert_eq!(jsonb_to_serde(v1), json!({"a": 1}));
454474

455-
let v2 = read_table.documents.get("id2").unwrap();
456-
assert_eq!(jsonb_to_serde(v2), json!({"a": 2}));
475+
// Check contents
476+
match read_table.documents.get("id1").unwrap() {
477+
StoredValue::Lazy(lazy) => {
478+
assert_eq!(lazy.id, "id1");
479+
}
480+
_ => panic!("Expected Lazy document"),
481+
}
482+
457483
Ok(())
458484
}
459485

@@ -465,8 +491,14 @@ mod tests {
465491
Schema::new(InstanceType::Integer),
466492
)]));
467493
let mut documents = BTreeMap::new();
468-
documents.insert("id1".to_string(), serde_to_jsonb(json!({"a": 1})));
469-
documents.insert("id2".to_string(), serde_to_jsonb(json!({"a": 2})));
494+
documents.insert(
495+
"id1".to_string(),
496+
StoredValue::Static(serde_to_jsonb(json!({"a": 1}))),
497+
);
498+
documents.insert(
499+
"id2".to_string(),
500+
StoredValue::Static(serde_to_jsonb(json!({"a": 2}))),
501+
);
470502
let jstable = JSTable::new(
471503
12345,
472504
"test_col".to_string(),
@@ -506,8 +538,14 @@ mod tests {
506538
Schema::new(InstanceType::Integer),
507539
)]));
508540
let mut documents = BTreeMap::new();
509-
documents.insert("id1".to_string(), serde_to_jsonb(json!({"a": 1})));
510-
documents.insert("id2".to_string(), serde_to_jsonb(json!({"a": 2})));
541+
documents.insert(
542+
"id1".to_string(),
543+
StoredValue::Static(serde_to_jsonb(json!({"a": 1}))),
544+
);
545+
documents.insert(
546+
"id2".to_string(),
547+
StoredValue::Static(serde_to_jsonb(json!({"a": 2}))),
548+
);
511549
let jstable = JSTable::new(
512550
12345,
513551
"test_col".to_string(),
@@ -541,36 +579,50 @@ mod tests {
541579
let schema = Schema::new(InstanceType::Object);
542580

543581
let mut docs1 = BTreeMap::new();
544-
docs1.insert("id1".to_string(), serde_to_jsonb(json!({"v": 1})));
582+
docs1.insert(
583+
"id1".to_string(),
584+
StoredValue::Static(serde_to_jsonb(json!({"v": 1}))),
585+
);
545586
let t1 = JSTable::new(100, "test_col".to_string(), schema.clone(), docs1);
546587

547588
let mut docs2 = BTreeMap::new();
548-
docs2.insert("id1".to_string(), serde_to_jsonb(json!({"v": 2})));
589+
docs2.insert(
590+
"id1".to_string(),
591+
StoredValue::Static(serde_to_jsonb(json!({"v": 2}))),
592+
);
549593
let t2 = JSTable::new(200, "test_col".to_string(), schema.clone(), docs2);
550594

551595
// Case 1: t1 (older) then t2 (newer) in the slice
552596
let merged = merge_jstables(vec![t1, t2]);
553-
assert_eq!(
554-
jsonb_to_serde(merged.documents.get("id1").unwrap()),
555-
json!({"v": 2})
556-
);
597+
let val = merged.documents.get("id1").unwrap();
598+
match val {
599+
StoredValue::Static(v) => assert_eq!(jsonb_to_serde(v), json!({"v": 2})),
600+
_ => panic!("Expected static value"),
601+
}
557602
assert_eq!(merged.timestamp, 200);
558603
assert_eq!(merged.collection, "test_col");
559604

560605
// Case 2: Reverse order
561606
let mut docs1 = BTreeMap::new();
562-
docs1.insert("id1".to_string(), serde_to_jsonb(json!({"v": 1})));
607+
docs1.insert(
608+
"id1".to_string(),
609+
StoredValue::Static(serde_to_jsonb(json!({"v": 1}))),
610+
);
563611
let t1b = JSTable::new(100, "test_col".to_string(), schema.clone(), docs1);
564612

565613
let mut docs2 = BTreeMap::new();
566-
docs2.insert("id1".to_string(), serde_to_jsonb(json!({"v": 2})));
614+
docs2.insert(
615+
"id1".to_string(),
616+
StoredValue::Static(serde_to_jsonb(json!({"v": 2}))),
617+
);
567618
let t2b = JSTable::new(200, "test_col".to_string(), schema.clone(), docs2);
568619

569620
let merged_reverse = merge_jstables(vec![t2b, t1b]);
570-
assert_eq!(
571-
jsonb_to_serde(merged_reverse.documents.get("id1").unwrap()),
572-
json!({"v": 2})
573-
);
621+
let val = merged_reverse.documents.get("id1").unwrap();
622+
match val {
623+
StoredValue::Static(v) => assert_eq!(jsonb_to_serde(v), json!({"v": 2})),
624+
_ => panic!("Expected static value"),
625+
}
574626
assert_eq!(merged_reverse.timestamp, 200);
575627
}
576628

@@ -579,9 +631,18 @@ mod tests {
579631
let schema = Schema::new(InstanceType::Object);
580632
let mut documents = BTreeMap::new();
581633
// Insert keys in non-sorted order (BTreeMap will sort them)
582-
documents.insert("c".to_string(), serde_to_jsonb(json!(3)));
583-
documents.insert("a".to_string(), serde_to_jsonb(json!(1)));
584-
documents.insert("b".to_string(), serde_to_jsonb(json!(2)));
634+
documents.insert(
635+
"c".to_string(),
636+
StoredValue::Static(serde_to_jsonb(json!(3))),
637+
);
638+
documents.insert(
639+
"a".to_string(),
640+
StoredValue::Static(serde_to_jsonb(json!(1))),
641+
);
642+
documents.insert(
643+
"b".to_string(),
644+
StoredValue::Static(serde_to_jsonb(json!(2))),
645+
);
585646

586647
let jstable = JSTable::new(123, "sorted_test".to_string(), schema, documents);
587648

@@ -602,13 +663,28 @@ mod tests {
602663
let mut documents = BTreeMap::new();
603664

604665
let large_val = "x".repeat(500); // ~500 bytes
605-
documents.insert("a".to_string(), serde_to_jsonb(json!(large_val)));
606-
documents.insert("b".to_string(), serde_to_jsonb(json!(large_val)));
607-
documents.insert("c".to_string(), serde_to_jsonb(json!(large_val)));
666+
documents.insert(
667+
"a".to_string(),
668+
StoredValue::Static(serde_to_jsonb(json!(large_val))),
669+
);
670+
documents.insert(
671+
"b".to_string(),
672+
StoredValue::Static(serde_to_jsonb(json!(large_val))),
673+
);
674+
documents.insert(
675+
"c".to_string(),
676+
StoredValue::Static(serde_to_jsonb(json!(large_val))),
677+
);
608678

609679
let larger_val = "x".repeat(1100);
610-
documents.insert("d".to_string(), serde_to_jsonb(json!(larger_val)));
611-
documents.insert("e".to_string(), serde_to_jsonb(json!(1)));
680+
documents.insert(
681+
"d".to_string(),
682+
StoredValue::Static(serde_to_jsonb(json!(larger_val))),
683+
);
684+
documents.insert(
685+
"e".to_string(),
686+
StoredValue::Static(serde_to_jsonb(json!(1))),
687+
);
612688

613689
let jstable = JSTable::new(123, "idx_test".to_string(), schema, documents);
614690
let dir = tempdir()?;

src/storage.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::Value;
2-
use crate::jstable::JSTable;
2+
use crate::jstable::{JSTable, StoredValue};
33
use crate::schema::{Schema, SchemaExt, infer_schema};
44
use std::collections::{BTreeMap, HashMap};
55

@@ -42,7 +42,11 @@ impl MemTable {
4242
.as_millis() as u64;
4343

4444
// Sort documents by ID for JSTable
45-
let sorted_docs: BTreeMap<String, Value> = self.documents.into_iter().collect();
45+
let sorted_docs: BTreeMap<String, StoredValue> = self
46+
.documents
47+
.into_iter()
48+
.map(|(k, v)| (k, StoredValue::Static(v)))
49+
.collect();
4650

4751
let jstable = JSTable::new(timestamp, collection, self.schema, sorted_docs);
4852
jstable.write(path, index_threshold)

0 commit comments

Comments
 (0)