diff --git a/Cargo.lock b/Cargo.lock index ecd564e865..cb9acead0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4937,6 +4937,7 @@ dependencies = [ "rstest", "serde", "static_assertions", + "vortex-array", "vortex-buffer", "vortex-datetime-dtype", "vortex-dtype", diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index d2d8c9f744..065d18e34e 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -56,6 +56,7 @@ vortex-scalar = { workspace = true, features = ["flatbuffers", "serde", "arbitra [features] arbitrary = ["dep:arbitrary", "vortex-dtype/arbitrary"] +test_util = [] canonical_counter = [] [target.'cfg(target_arch = "wasm32")'.dependencies] @@ -65,6 +66,8 @@ getrandom = { workspace = true, features = ["js"] } [dev-dependencies] criterion = { workspace = true } rstest = { workspace = true } +vortex-array = { path = ".", features = ["test_util"] } + [[bench]] name = "search_sorted" diff --git a/vortex-array/src/array/list/mod.rs b/vortex-array/src/array/list/mod.rs index 174215a4d8..04df958da5 100644 --- a/vortex-array/src/array/list/mod.rs +++ b/vortex-array/src/array/list/mod.rs @@ -3,12 +3,20 @@ mod compute; use std::fmt::Display; use std::sync::Arc; +#[cfg(feature = "test_util")] +use itertools::Itertools; use num_traits::AsPrimitive; use serde::{Deserialize, Serialize}; +#[cfg(feature = "test_util")] +use vortex_dtype::Nullability; use vortex_dtype::{match_each_native_ptype, DType, PType}; use vortex_error::{vortex_bail, vortex_panic, VortexExpect, VortexResult}; +#[cfg(feature = "test_util")] +use vortex_scalar::Scalar; use crate::array::PrimitiveArray; +#[cfg(feature = "test_util")] +use crate::builders::{ArrayBuilder, ListBuilder}; use crate::compute::{scalar_at, slice}; use crate::encoding::ids; use crate::stats::{Stat, StatisticsVTable, StatsSet}; @@ -184,12 +192,42 @@ impl ValidityVTable for ListEncoding { } } +#[cfg(feature = "test_util")] +impl ListArray { + /// This is a convenience method to create a list array from an iterator of iterators. + /// This method is slow however since each element is first converted to a scalar and then + /// appended to the array. + pub fn from_iter_slow(iter: I, dtype: Arc) -> VortexResult + where + I::Item: IntoIterator, + ::Item: Into, + { + let iter = iter.into_iter(); + let mut builder = ListBuilder::::with_capacity( + dtype.clone(), + Nullability::NonNullable, + iter.size_hint().0, + ); + + for v in iter { + let elem = Scalar::list( + dtype.clone(), + v.into_iter().map(|x| x.into()).collect_vec(), + Nullability::Nullable, + ); + builder.append_value(elem.as_list())? + } + builder.finish() + } +} + #[cfg(test)] mod test { use std::sync::Arc; + use vortex_dtype::Nullability; use vortex_dtype::Nullability::NonNullable; - use vortex_dtype::{Nullability, PType}; + use vortex_dtype::PType::I32; use vortex_scalar::Scalar; use crate::array::list::ListArray; @@ -221,7 +259,7 @@ mod test { assert_eq!( Scalar::list( - Arc::new(PType::I32.into()), + Arc::new(I32.into()), vec![1.into(), 2.into()], Nullability::Nullable ), @@ -229,19 +267,38 @@ mod test { ); assert_eq!( Scalar::list( - Arc::new(PType::I32.into()), + Arc::new(I32.into()), vec![3.into(), 4.into()], Nullability::Nullable ), scalar_at(&list, 1).unwrap() ); assert_eq!( - Scalar::list( - Arc::new(PType::I32.into()), - vec![5.into()], - Nullability::Nullable - ), + Scalar::list(Arc::new(I32.into()), vec![5.into()], Nullability::Nullable), scalar_at(&list, 2).unwrap() ); } + + #[test] + fn test_simple_list_array_from_iter() { + let elements = PrimitiveArray::from_iter([1i32, 2, 3]); + let offsets = PrimitiveArray::from_iter([0, 2, 3]); + let validity = Validity::NonNullable; + + let list = + ListArray::try_new(elements.into_array(), offsets.into_array(), validity).unwrap(); + + let list_from_iter = + ListArray::from_iter_slow(vec![vec![1i32, 2], vec![3]], Arc::new(I32.into())).unwrap(); + + assert_eq!(list.len(), list_from_iter.len()); + assert_eq!( + scalar_at(&list, 0).unwrap(), + scalar_at(&list_from_iter, 0).unwrap() + ); + assert_eq!( + scalar_at(&list, 1).unwrap(), + scalar_at(&list_from_iter, 1).unwrap() + ); + } } diff --git a/vortex-file/Cargo.toml b/vortex-file/Cargo.toml index 48f07f46e4..ae1c026360 100644 --- a/vortex-file/Cargo.toml +++ b/vortex-file/Cargo.toml @@ -41,6 +41,7 @@ arrow-schema = { workspace = true } rstest = { workspace = true } tokio = { workspace = true, features = ["full"] } vortex-io = { path = "../vortex-io", features = ["tokio"] } +vortex-array = { workspace = true, features = ["test_util"] } [lints] workspace = true diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 44fe8d95b4..59e608d40f 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -8,13 +8,14 @@ use futures::StreamExt; use futures_util::TryStreamExt; use itertools::Itertools; use vortex_array::accessor::ArrayAccessor; -use vortex_array::array::{ChunkedArray, PrimitiveArray, StructArray, VarBinArray}; +use vortex_array::array::{ChunkedArray, ListArray, PrimitiveArray, StructArray, VarBinArray}; use vortex_array::compute::scalar_at; use vortex_array::validity::Validity; use vortex_array::variants::{PrimitiveArrayTrait, StructArrayTrait}; use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant, ToArrayData}; use vortex_buffer::{buffer, Buffer}; use vortex_dtype::field::Field; +use vortex_dtype::PType::I32; use vortex_dtype::{DType, Nullability, PType, StructDType}; use vortex_error::{vortex_panic, VortexResult}; use vortex_expr::{BinaryExpr, Column, Literal, Operator}; @@ -89,7 +90,23 @@ async fn test_read_simple_with_spawn() { ]) .into_array(); - let st = StructArray::from_fields(&[("strings", strings), ("numbers", numbers)]).unwrap(); + let lists = ChunkedArray::from_iter([ + ListArray::from_iter_slow( + vec![vec![11, 12], vec![21, 22], vec![31, 32], vec![41, 42]], + Arc::new(I32.into()), + ) + .unwrap(), + ListArray::from_iter_slow( + vec![vec![51, 52], vec![61, 62], vec![71, 72], vec![81, 82]], + Arc::new(I32.into()), + ) + .unwrap(), + ]) + .into_array(); + + let st = + StructArray::from_fields(&[("strings", strings), ("numbers", numbers), ("lists", lists)]) + .unwrap(); let buf = Vec::new(); let written = tokio::spawn(async move {