@@ -7,11 +7,17 @@ use std::fs::File;
77use std:: io:: { self , BufReader , Read , Seek , SeekFrom , Write } ;
88use xorf:: BinaryFuse8 ;
99
10+ #[ derive( Clone , Debug ) ]
11+ pub enum StoredValue {
12+ Static ( Value ) ,
13+ Lazy ( LazyDocument ) ,
14+ }
15+
1016pub struct JSTable {
1117 pub timestamp : u64 ,
1218 pub collection : String ,
1319 pub schema : Schema ,
14- pub documents : BTreeMap < String , Value > ,
20+ pub documents : BTreeMap < String , StoredValue > ,
1521}
1622
1723#[ derive( Serialize , Deserialize ) ]
@@ -26,7 +32,7 @@ impl JSTable {
2632 timestamp : u64 ,
2733 collection : String ,
2834 schema : Schema ,
29- documents : BTreeMap < String , Value > ,
35+ documents : BTreeMap < String , StoredValue > ,
3036 ) -> Self {
3137 JSTable {
3238 timestamp,
@@ -84,19 +90,28 @@ impl JSTable {
8490 let mut bytes_since_last_index: u64 = 0 ;
8591 let mut first = true ;
8692
87- for ( id, doc ) in & self . documents {
93+ for ( id, val ) in & self . documents {
8894 // Add index entry if needed
8995 if first || bytes_since_last_index >= index_threshold {
9096 index. push ( ( id. clone ( ) , current_offset) ) ;
9197 bytes_since_last_index = 0 ;
9298 first = false ;
9399 }
94100
95- // Use SerdeWrapper to serialize jsonb Value via serde infrastructure
96- let record = ( id. clone ( ) , SerdeWrapper ( doc) ) ;
97- let record_blob = jsonb_schema:: to_owned_jsonb ( & record)
98- . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
99- let record_bytes = record_blob. to_vec ( ) ;
101+ let record_bytes = match val {
102+ StoredValue :: Static ( doc) => {
103+ // Use SerdeWrapper to serialize jsonb Value via serde infrastructure
104+ let record = ( id. clone ( ) , SerdeWrapper ( doc) ) ;
105+ let record_blob = jsonb_schema:: to_owned_jsonb ( & record)
106+ . map_err ( |e| io:: Error :: new ( io:: ErrorKind :: InvalidData , e) ) ?;
107+ record_blob. to_vec ( )
108+ }
109+ StoredValue :: Lazy ( doc) => {
110+ // LazyDocument.raw is already the serialized [id, doc] tuple
111+ doc. raw . clone ( )
112+ }
113+ } ;
114+
100115 let record_len = record_bytes. len ( ) as u32 ;
101116
102117 data_file. write_all ( & record_len. to_le_bytes ( ) ) ?;
@@ -281,15 +296,15 @@ impl Iterator for JSTableIterator {
281296}
282297
283298pub fn read_jstable ( path : & str ) -> io:: Result < JSTable > {
284- let iterator = JSTableIterator :: new ( path) ?;
285- let timestamp = iterator. timestamp ( ) ;
286- let collection = iterator. collection ( ) . to_string ( ) ;
287- let schema = iterator. schema ( ) . clone ( ) ;
299+ let iterator = JSTableLazyIterator :: new ( path) ?;
300+ let timestamp = iterator. timestamp ;
301+ let collection = iterator. collection . clone ( ) ;
302+ let schema = iterator. schema . clone ( ) ;
288303
289304 let mut documents = BTreeMap :: new ( ) ;
290305 for result in iterator {
291- let ( id , doc ) = result?;
292- documents. insert ( id , doc ) ;
306+ let lazy_doc = result?;
307+ documents. insert ( lazy_doc . id . clone ( ) , StoredValue :: Lazy ( lazy_doc ) ) ;
293308 }
294309
295310 Ok ( JSTable {
@@ -398,7 +413,10 @@ pub fn merge_jstables(mut tables: Vec<JSTable>) -> JSTable {
398413
399414 // Filter nulls (tombstones) - Value::Null matches jsonb Null
400415 use jsonb_schema:: Value as JsonbValue ;
401- merged_documents. retain ( |_, v| !matches ! ( v, JsonbValue :: Null ) ) ;
416+ merged_documents. retain ( |_, v| match v {
417+ StoredValue :: Static ( s) => !matches ! ( s, JsonbValue :: Null ) ,
418+ StoredValue :: Lazy ( l) => !l. is_tombstone ( ) ,
419+ } ) ;
402420
403421 JSTable :: new ( max_timestamp, collection, merged_schema, merged_documents)
404422}
@@ -428,8 +446,14 @@ mod tests {
428446 Schema :: new ( InstanceType :: Integer ) ,
429447 ) ] ) ) ;
430448 let mut documents = BTreeMap :: new ( ) ;
431- documents. insert ( "id1" . to_string ( ) , serde_to_jsonb ( json ! ( { "a" : 1 } ) ) ) ;
432- documents. insert ( "id2" . to_string ( ) , serde_to_jsonb ( json ! ( { "a" : 2 } ) ) ) ;
449+ documents. insert (
450+ "id1" . to_string ( ) ,
451+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "a" : 1 } ) ) ) ,
452+ ) ;
453+ documents. insert (
454+ "id2" . to_string ( ) ,
455+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "a" : 2 } ) ) ) ,
456+ ) ;
433457 let jstable = JSTable :: new (
434458 12345 ,
435459 "test_col" . to_string ( ) ,
@@ -447,13 +471,15 @@ mod tests {
447471 assert_eq ! ( read_table. collection, "test_col" ) ;
448472 assert_eq ! ( get_types( & read_table. schema) , vec![ InstanceType :: Object ] ) ;
449473 assert_eq ! ( read_table. documents. len( ) , 2 ) ;
450- // Compare values
451- let v1 = read_table. documents . get ( "id1" ) . unwrap ( ) ;
452- // convert to serde for easy comparison
453- assert_eq ! ( jsonb_to_serde( v1) , json!( { "a" : 1 } ) ) ;
454474
455- let v2 = read_table. documents . get ( "id2" ) . unwrap ( ) ;
456- assert_eq ! ( jsonb_to_serde( v2) , json!( { "a" : 2 } ) ) ;
475+ // Check contents
476+ match read_table. documents . get ( "id1" ) . unwrap ( ) {
477+ StoredValue :: Lazy ( lazy) => {
478+ assert_eq ! ( lazy. id, "id1" ) ;
479+ }
480+ _ => panic ! ( "Expected Lazy document" ) ,
481+ }
482+
457483 Ok ( ( ) )
458484 }
459485
@@ -465,8 +491,14 @@ mod tests {
465491 Schema :: new ( InstanceType :: Integer ) ,
466492 ) ] ) ) ;
467493 let mut documents = BTreeMap :: new ( ) ;
468- documents. insert ( "id1" . to_string ( ) , serde_to_jsonb ( json ! ( { "a" : 1 } ) ) ) ;
469- documents. insert ( "id2" . to_string ( ) , serde_to_jsonb ( json ! ( { "a" : 2 } ) ) ) ;
494+ documents. insert (
495+ "id1" . to_string ( ) ,
496+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "a" : 1 } ) ) ) ,
497+ ) ;
498+ documents. insert (
499+ "id2" . to_string ( ) ,
500+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "a" : 2 } ) ) ) ,
501+ ) ;
470502 let jstable = JSTable :: new (
471503 12345 ,
472504 "test_col" . to_string ( ) ,
@@ -506,8 +538,14 @@ mod tests {
506538 Schema :: new ( InstanceType :: Integer ) ,
507539 ) ] ) ) ;
508540 let mut documents = BTreeMap :: new ( ) ;
509- documents. insert ( "id1" . to_string ( ) , serde_to_jsonb ( json ! ( { "a" : 1 } ) ) ) ;
510- documents. insert ( "id2" . to_string ( ) , serde_to_jsonb ( json ! ( { "a" : 2 } ) ) ) ;
541+ documents. insert (
542+ "id1" . to_string ( ) ,
543+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "a" : 1 } ) ) ) ,
544+ ) ;
545+ documents. insert (
546+ "id2" . to_string ( ) ,
547+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "a" : 2 } ) ) ) ,
548+ ) ;
511549 let jstable = JSTable :: new (
512550 12345 ,
513551 "test_col" . to_string ( ) ,
@@ -541,36 +579,50 @@ mod tests {
541579 let schema = Schema :: new ( InstanceType :: Object ) ;
542580
543581 let mut docs1 = BTreeMap :: new ( ) ;
544- docs1. insert ( "id1" . to_string ( ) , serde_to_jsonb ( json ! ( { "v" : 1 } ) ) ) ;
582+ docs1. insert (
583+ "id1" . to_string ( ) ,
584+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "v" : 1 } ) ) ) ,
585+ ) ;
545586 let t1 = JSTable :: new ( 100 , "test_col" . to_string ( ) , schema. clone ( ) , docs1) ;
546587
547588 let mut docs2 = BTreeMap :: new ( ) ;
548- docs2. insert ( "id1" . to_string ( ) , serde_to_jsonb ( json ! ( { "v" : 2 } ) ) ) ;
589+ docs2. insert (
590+ "id1" . to_string ( ) ,
591+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "v" : 2 } ) ) ) ,
592+ ) ;
549593 let t2 = JSTable :: new ( 200 , "test_col" . to_string ( ) , schema. clone ( ) , docs2) ;
550594
551595 // Case 1: t1 (older) then t2 (newer) in the slice
552596 let merged = merge_jstables ( vec ! [ t1, t2] ) ;
553- assert_eq ! (
554- jsonb_to_serde( merged. documents. get( "id1" ) . unwrap( ) ) ,
555- json!( { "v" : 2 } )
556- ) ;
597+ let val = merged. documents . get ( "id1" ) . unwrap ( ) ;
598+ match val {
599+ StoredValue :: Static ( v) => assert_eq ! ( jsonb_to_serde( v) , json!( { "v" : 2 } ) ) ,
600+ _ => panic ! ( "Expected static value" ) ,
601+ }
557602 assert_eq ! ( merged. timestamp, 200 ) ;
558603 assert_eq ! ( merged. collection, "test_col" ) ;
559604
560605 // Case 2: Reverse order
561606 let mut docs1 = BTreeMap :: new ( ) ;
562- docs1. insert ( "id1" . to_string ( ) , serde_to_jsonb ( json ! ( { "v" : 1 } ) ) ) ;
607+ docs1. insert (
608+ "id1" . to_string ( ) ,
609+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "v" : 1 } ) ) ) ,
610+ ) ;
563611 let t1b = JSTable :: new ( 100 , "test_col" . to_string ( ) , schema. clone ( ) , docs1) ;
564612
565613 let mut docs2 = BTreeMap :: new ( ) ;
566- docs2. insert ( "id1" . to_string ( ) , serde_to_jsonb ( json ! ( { "v" : 2 } ) ) ) ;
614+ docs2. insert (
615+ "id1" . to_string ( ) ,
616+ StoredValue :: Static ( serde_to_jsonb ( json ! ( { "v" : 2 } ) ) ) ,
617+ ) ;
567618 let t2b = JSTable :: new ( 200 , "test_col" . to_string ( ) , schema. clone ( ) , docs2) ;
568619
569620 let merged_reverse = merge_jstables ( vec ! [ t2b, t1b] ) ;
570- assert_eq ! (
571- jsonb_to_serde( merged_reverse. documents. get( "id1" ) . unwrap( ) ) ,
572- json!( { "v" : 2 } )
573- ) ;
621+ let val = merged_reverse. documents . get ( "id1" ) . unwrap ( ) ;
622+ match val {
623+ StoredValue :: Static ( v) => assert_eq ! ( jsonb_to_serde( v) , json!( { "v" : 2 } ) ) ,
624+ _ => panic ! ( "Expected static value" ) ,
625+ }
574626 assert_eq ! ( merged_reverse. timestamp, 200 ) ;
575627 }
576628
@@ -579,9 +631,18 @@ mod tests {
579631 let schema = Schema :: new ( InstanceType :: Object ) ;
580632 let mut documents = BTreeMap :: new ( ) ;
581633 // Insert keys in non-sorted order (BTreeMap will sort them)
582- documents. insert ( "c" . to_string ( ) , serde_to_jsonb ( json ! ( 3 ) ) ) ;
583- documents. insert ( "a" . to_string ( ) , serde_to_jsonb ( json ! ( 1 ) ) ) ;
584- documents. insert ( "b" . to_string ( ) , serde_to_jsonb ( json ! ( 2 ) ) ) ;
634+ documents. insert (
635+ "c" . to_string ( ) ,
636+ StoredValue :: Static ( serde_to_jsonb ( json ! ( 3 ) ) ) ,
637+ ) ;
638+ documents. insert (
639+ "a" . to_string ( ) ,
640+ StoredValue :: Static ( serde_to_jsonb ( json ! ( 1 ) ) ) ,
641+ ) ;
642+ documents. insert (
643+ "b" . to_string ( ) ,
644+ StoredValue :: Static ( serde_to_jsonb ( json ! ( 2 ) ) ) ,
645+ ) ;
585646
586647 let jstable = JSTable :: new ( 123 , "sorted_test" . to_string ( ) , schema, documents) ;
587648
@@ -602,13 +663,28 @@ mod tests {
602663 let mut documents = BTreeMap :: new ( ) ;
603664
604665 let large_val = "x" . repeat ( 500 ) ; // ~500 bytes
605- documents. insert ( "a" . to_string ( ) , serde_to_jsonb ( json ! ( large_val) ) ) ;
606- documents. insert ( "b" . to_string ( ) , serde_to_jsonb ( json ! ( large_val) ) ) ;
607- documents. insert ( "c" . to_string ( ) , serde_to_jsonb ( json ! ( large_val) ) ) ;
666+ documents. insert (
667+ "a" . to_string ( ) ,
668+ StoredValue :: Static ( serde_to_jsonb ( json ! ( large_val) ) ) ,
669+ ) ;
670+ documents. insert (
671+ "b" . to_string ( ) ,
672+ StoredValue :: Static ( serde_to_jsonb ( json ! ( large_val) ) ) ,
673+ ) ;
674+ documents. insert (
675+ "c" . to_string ( ) ,
676+ StoredValue :: Static ( serde_to_jsonb ( json ! ( large_val) ) ) ,
677+ ) ;
608678
609679 let larger_val = "x" . repeat ( 1100 ) ;
610- documents. insert ( "d" . to_string ( ) , serde_to_jsonb ( json ! ( larger_val) ) ) ;
611- documents. insert ( "e" . to_string ( ) , serde_to_jsonb ( json ! ( 1 ) ) ) ;
680+ documents. insert (
681+ "d" . to_string ( ) ,
682+ StoredValue :: Static ( serde_to_jsonb ( json ! ( larger_val) ) ) ,
683+ ) ;
684+ documents. insert (
685+ "e" . to_string ( ) ,
686+ StoredValue :: Static ( serde_to_jsonb ( json ! ( 1 ) ) ) ,
687+ ) ;
612688
613689 let jstable = JSTable :: new ( 123 , "idx_test" . to_string ( ) , schema, documents) ;
614690 let dir = tempdir ( ) ?;
0 commit comments