44//! This module defines the default layout strategy for a Vortex file.
55
66use std:: sync:: Arc ;
7-
7+ use std:: sync:: LazyLock ;
8+
9+ // Compressed encodings from encoding crates
10+ use vortex_alp:: ALPRDVTable ;
11+ use vortex_alp:: ALPVTable ;
12+ // Canonical array encodings from vortex-array
13+ use vortex_array:: arrays:: BoolVTable ;
14+ use vortex_array:: arrays:: ChunkedVTable ;
15+ use vortex_array:: arrays:: ConstantVTable ;
16+ use vortex_array:: arrays:: DecimalVTable ;
17+ use vortex_array:: arrays:: DictVTable ;
18+ use vortex_array:: arrays:: ExtensionVTable ;
19+ use vortex_array:: arrays:: FixedSizeListVTable ;
20+ use vortex_array:: arrays:: ListVTable ;
21+ use vortex_array:: arrays:: ListViewVTable ;
22+ use vortex_array:: arrays:: MaskedVTable ;
23+ use vortex_array:: arrays:: NullVTable ;
24+ use vortex_array:: arrays:: PrimitiveVTable ;
25+ use vortex_array:: arrays:: StructVTable ;
26+ use vortex_array:: arrays:: VarBinVTable ;
27+ use vortex_array:: arrays:: VarBinViewVTable ;
28+ use vortex_array:: session:: ArrayRegistry ;
29+ use vortex_bytebool:: ByteBoolVTable ;
30+ use vortex_datetime_parts:: DateTimePartsVTable ;
31+ use vortex_decimal_byte_parts:: DecimalBytePartsVTable ;
832use vortex_dtype:: FieldPath ;
33+ use vortex_fastlanes:: BitPackedVTable ;
34+ use vortex_fastlanes:: DeltaVTable ;
35+ use vortex_fastlanes:: FoRVTable ;
36+ use vortex_fastlanes:: RLEVTable ;
37+ use vortex_fsst:: FSSTVTable ;
938use vortex_layout:: LayoutStrategy ;
1039use vortex_layout:: layouts:: buffered:: BufferedStrategy ;
1140use vortex_layout:: layouts:: chunked:: writer:: ChunkedLayoutStrategy ;
@@ -19,10 +48,64 @@ use vortex_layout::layouts::repartition::RepartitionWriterOptions;
1948use vortex_layout:: layouts:: table:: TableStrategy ;
2049use vortex_layout:: layouts:: zoned:: writer:: ZonedLayoutOptions ;
2150use vortex_layout:: layouts:: zoned:: writer:: ZonedStrategy ;
51+ use vortex_pco:: PcoVTable ;
52+ use vortex_runend:: RunEndVTable ;
53+ use vortex_sequence:: SequenceVTable ;
54+ use vortex_sparse:: SparseVTable ;
2255use vortex_utils:: aliases:: hash_map:: HashMap ;
56+ use vortex_zigzag:: ZigZagVTable ;
57+ #[ cfg( feature = "zstd" ) ]
58+ use vortex_zstd:: ZstdVTable ;
2359
2460const ONE_MEG : u64 = 1 << 20 ;
2561
62+ /// Static registry of all allowed array encodings for file writing.
63+ ///
64+ /// This includes all canonical encodings from vortex-array plus all compressed
65+ /// encodings from the various encoding crates.
66+ pub static ALLOWED_ENCODINGS : LazyLock < ArrayRegistry > = LazyLock :: new ( || {
67+ let registry = ArrayRegistry :: default ( ) ;
68+
69+ // Canonical encodings from vortex-array
70+ registry. register ( NullVTable :: ID , NullVTable ) ;
71+ registry. register ( BoolVTable :: ID , BoolVTable ) ;
72+ registry. register ( PrimitiveVTable :: ID , PrimitiveVTable ) ;
73+ registry. register ( DecimalVTable :: ID , DecimalVTable ) ;
74+ registry. register ( VarBinVTable :: ID , VarBinVTable ) ;
75+ registry. register ( VarBinViewVTable :: ID , VarBinViewVTable ) ;
76+ registry. register ( ListVTable :: ID , ListVTable ) ;
77+ registry. register ( ListViewVTable :: ID , ListViewVTable ) ;
78+ registry. register ( FixedSizeListVTable :: ID , FixedSizeListVTable ) ;
79+ registry. register ( StructVTable :: ID , StructVTable ) ;
80+ registry. register ( ExtensionVTable :: ID , ExtensionVTable ) ;
81+ registry. register ( ChunkedVTable :: ID , ChunkedVTable ) ;
82+ registry. register ( ConstantVTable :: ID , ConstantVTable ) ;
83+ registry. register ( MaskedVTable :: ID , MaskedVTable ) ;
84+ registry. register ( DictVTable :: ID , DictVTable ) ;
85+
86+ // Compressed encodings from encoding crates
87+ registry. register ( ALPVTable :: ID , ALPVTable ) ;
88+ registry. register ( ALPRDVTable :: ID , ALPRDVTable ) ;
89+ registry. register ( BitPackedVTable :: ID , BitPackedVTable ) ;
90+ registry. register ( ByteBoolVTable :: ID , ByteBoolVTable ) ;
91+ registry. register ( DateTimePartsVTable :: ID , DateTimePartsVTable ) ;
92+ registry. register ( DecimalBytePartsVTable :: ID , DecimalBytePartsVTable ) ;
93+ registry. register ( DeltaVTable :: ID , DeltaVTable ) ;
94+ registry. register ( FoRVTable :: ID , FoRVTable ) ;
95+ registry. register ( FSSTVTable :: ID , FSSTVTable ) ;
96+ registry. register ( PcoVTable :: ID , PcoVTable ) ;
97+ registry. register ( RLEVTable :: ID , RLEVTable ) ;
98+ registry. register ( RunEndVTable :: ID , RunEndVTable ) ;
99+ registry. register ( SequenceVTable :: ID , SequenceVTable ) ;
100+ registry. register ( SparseVTable :: ID , SparseVTable ) ;
101+ registry. register ( ZigZagVTable :: ID , ZigZagVTable ) ;
102+
103+ #[ cfg( feature = "zstd" ) ]
104+ registry. register ( ZstdVTable :: ID , ZstdVTable ) ;
105+
106+ registry
107+ } ) ;
108+
26109/// Build a new [writer strategy][LayoutStrategy] to compress and reorganize chunks of a Vortex file.
27110///
28111/// Vortex provides an out-of-the-box file writer that optimizes the layout of chunks on-disk,
@@ -32,25 +115,23 @@ pub struct WriteStrategyBuilder {
32115 compressor : Option < Arc < dyn CompressorPlugin > > ,
33116 row_block_size : usize ,
34117 field_writers : HashMap < FieldPath , Arc < dyn LayoutStrategy > > ,
118+ allow_encodings : Option < ArrayRegistry > ,
35119}
36120
37121impl Default for WriteStrategyBuilder {
122+ /// Create a new empty builder. It can be further configured,
123+ /// and then finally built yielding the [`LayoutStrategy`].
38124 fn default ( ) -> Self {
39- Self :: new ( )
40- }
41- }
42-
43- impl WriteStrategyBuilder {
44- /// Create a new empty builder. It can be further configured, and then finally built
45- /// yielding the [`LayoutStrategy`].
46- pub fn new ( ) -> Self {
47125 Self {
48126 compressor : None ,
49127 row_block_size : 8192 ,
50128 field_writers : HashMap :: new ( ) ,
129+ allow_encodings : None ,
51130 }
52131 }
132+ }
53133
134+ impl WriteStrategyBuilder {
54135 /// Override the [compressor][CompressorPlugin] used for compressing chunks in the file.
55136 ///
56137 /// If not provided, this will use a BtrBlocks-style cascading compressor that tries to balance
@@ -77,11 +158,23 @@ impl WriteStrategyBuilder {
77158 self
78159 }
79160
161+ /// Override the allowed array encodings for normalization.
162+ pub fn with_allow_encodings ( mut self , allow_encodings : ArrayRegistry ) -> Self {
163+ self . allow_encodings = Some ( allow_encodings) ;
164+ self
165+ }
166+
80167 /// Builds the canonical [`LayoutStrategy`] implementation, with the configured overrides
81168 /// applied.
82169 pub fn build ( self ) -> Arc < dyn LayoutStrategy > {
170+ let flat = if let Some ( allow_encodings) = self . allow_encodings {
171+ FlatLayoutStrategy :: default ( ) . with_allow_encodings ( allow_encodings)
172+ } else {
173+ FlatLayoutStrategy :: default ( )
174+ } ;
175+
83176 // 7. for each chunk create a flat layout
84- let chunked = ChunkedLayoutStrategy :: new ( FlatLayoutStrategy :: default ( ) ) ;
177+ let chunked = ChunkedLayoutStrategy :: new ( flat . clone ( ) ) ;
85178 // 6. buffer chunks so they end up with closer segment ids physically
86179 let buffered = BufferedStrategy :: new ( chunked, 2 * ONE_MEG ) ; // 2MB
87180 // 5. compress each chunk
@@ -110,9 +203,9 @@ impl WriteStrategyBuilder {
110203
111204 // 2.1. | 3.1. compress stats tables and dict values.
112205 let compress_then_flat = if let Some ( ref compressor) = self . compressor {
113- CompressingStrategy :: new_opaque ( FlatLayoutStrategy :: default ( ) , compressor. clone ( ) )
206+ CompressingStrategy :: new_opaque ( flat , compressor. clone ( ) )
114207 } else {
115- CompressingStrategy :: new_btrblocks ( FlatLayoutStrategy :: default ( ) , false )
208+ CompressingStrategy :: new_btrblocks ( flat , false )
116209 } ;
117210
118211 // 3. apply dict encoding or fallback
0 commit comments