Skip to content

Commit 014d10f

Browse files
committed
Address list-of-struct projection review
1 parent c1dc71f commit 014d10f

File tree

6 files changed

+55
-22
lines changed

6 files changed

+55
-22
lines changed

vortex-array/src/expr/analysis/immediate_access.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::expr::analysis::AnnotationFn;
1111
use crate::expr::analysis::Annotations;
1212
use crate::expr::descendent_annotations;
1313
use crate::expr::exprs::get_item::GetItem;
14+
use crate::expr::exprs::get_item_list::GetItemList;
1415
use crate::expr::exprs::root::Root;
1516
use crate::expr::exprs::select::Select;
1617

@@ -28,7 +29,17 @@ pub fn annotate_scope_access(scope: &StructFields) -> impl AnnotationFn<Annotati
2829
if expr.child(0).is::<Root>() {
2930
return vec![field_name.clone()];
3031
}
31-
} else if expr.is::<Root>() {
32+
}
33+
34+
if expr.is::<GetItemList>() {
35+
if let Some(field_name) = expr.child(0).as_opt::<GetItem>() {
36+
if expr.child(0).child(0).is::<Root>() {
37+
return vec![field_name.clone()];
38+
}
39+
}
40+
}
41+
42+
if expr.is::<Root>() {
3243
return scope.names().iter().cloned().collect();
3344
}
3445

vortex-array/src/expr/exprs/get_item.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,7 @@ impl VTable for GetItem {
9292
vortex_err!("Couldn't find the {} field in the input scope", field_name)
9393
})?;
9494

95-
// Match here to avoid cloning the dtype if nullability doesn't need to change
96-
if matches!(
97-
(struct_dtype.nullability(), field_dtype.nullability()),
98-
(Nullability::Nullable, Nullability::NonNullable)
99-
) {
100-
return Ok(field_dtype.with_nullability(Nullability::Nullable));
101-
}
102-
103-
Ok(field_dtype)
95+
Ok(field_dtype.union_nullability(struct_dtype.nullability()))
10496
}
10597

10698
fn execute(

vortex-array/src/expr/vtable.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,4 +815,16 @@ mod tests {
815815

816816
Ok(())
817817
}
818+
819+
#[test]
820+
fn get_item_list_is_not_serializable() {
821+
let expr = get_item_list("field", root());
822+
let err = expr
823+
.serialize_proto()
824+
.expect_err("get_item_list must not be serializable");
825+
assert!(
826+
err.to_string().contains("must not be serialized"),
827+
"unexpected error: {err}"
828+
);
829+
}
818830
}

vortex-datafusion/src/persistent/opener.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,8 @@ impl FileOpener for VortexOpener {
204204

205205
// The schema of the stream returned from the vortex scan.
206206
// We use the physical_file_schema as reference for types that don't roundtrip.
207-
let scan_dtype = scan_projection.return_dtype(vxf.dtype()).map_err(|_e| {
208-
exec_datafusion_err!("Couldn't get the dtype for the underlying Vortex scan")
207+
let scan_dtype = scan_projection.return_dtype(vxf.dtype()).map_err(|e| {
208+
exec_datafusion_err!("Couldn't get the dtype for the underlying Vortex scan: {e}")
209209
})?;
210210
let stream_schema = calculate_physical_schema(&scan_dtype, &projected_physical_schema)?;
211211

vortex-file/tests/test_write_table.rs

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use vortex_array::ToCanonical;
1313
use vortex_array::arrays::ListArray;
1414
use vortex_array::arrays::PrimitiveArray;
1515
use vortex_array::arrays::StructArray;
16+
use vortex_array::assert_arrays_eq;
1617
use vortex_array::expr::get_item;
1718
use vortex_array::expr::root;
1819
use vortex_array::expr::session::ExprSession;
@@ -167,7 +168,7 @@ async fn test_list_of_struct_nested_projection() {
167168
],
168169
)]),
169170
],
170-
element_dtype,
171+
element_dtype.clone(),
171172
)
172173
.unwrap();
173174

@@ -178,7 +179,7 @@ async fn test_list_of_struct_nested_projection() {
178179

179180
let data = StructArray::new(
180181
FieldNames::from(["id", "items"]),
181-
vec![ids, items],
182+
vec![ids, items.clone()],
182183
row_count,
183184
Validity::NonNullable,
184185
)
@@ -204,19 +205,33 @@ async fn test_list_of_struct_nested_projection() {
204205
let mut stream = vxf
205206
.scan()
206207
.expect("scan")
207-
.with_projection(projection)
208+
.with_projection(projection.clone())
208209
.into_stream()
209210
.expect("into_stream");
210211

211212
let out = stream.next().await.expect("first batch").expect("batch");
212213

213-
// Smoke-check the projected shape; detailed value semantics are covered by unit tests in
214-
// `vortex-array`.
215-
assert_eq!(out.len(), row_count);
216-
assert!(matches!(
217-
out.dtype(),
218-
vortex_dtype::DType::List(_, Nullability::Nullable)
219-
));
214+
let expected = ListArray::from_iter_opt_slow::<u32, _, _>(
215+
[
216+
Some(vec![
217+
Scalar::primitive(1i32, Nullability::NonNullable),
218+
Scalar::primitive(2i32, Nullability::NonNullable),
219+
]),
220+
Some(Vec::new()),
221+
None,
222+
Some(vec![Scalar::primitive(3i32, Nullability::NonNullable)]),
223+
],
224+
Arc::new(vortex_dtype::DType::Primitive(
225+
PType::I32,
226+
Nullability::NonNullable,
227+
)),
228+
)
229+
.unwrap()
230+
.into_array();
231+
let simplified = projection.optimize_recursive(data.dtype()).unwrap();
232+
let expected_dtype = simplified.return_dtype(data.dtype()).unwrap();
233+
assert_eq!(out.dtype(), &expected_dtype);
234+
assert_arrays_eq!(expected, out);
220235

221236
// Verify the list column is not stored as a single flat blob layout.
222237
// This is the root cause of poor nested support described in #4889.

vortex-layout/src/layouts/list/reader.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ impl ListReader {
121121
self.lazy_children.get(idx)
122122
}
123123

124+
/// Creates a future that will produce a slice of this list array.
125+
///
126+
/// The produced slice may have a projection applied to its elements.
124127
fn list_slice_future(
125128
&self,
126129
row_range: Range<u64>,

0 commit comments

Comments
 (0)