Skip to content

Commit

Permalink
Add a from_iter_slow method to ListArray to allow convenient creati…
Browse files Browse the repository at this point in the history
…on of lists (#1778)

Once we have a better list scalar this can be renamed to from_iter (like
other ctors).

This allow implementation hiding tests, but shouldn't be used for
performance sensitive paths
  • Loading branch information
joseph-isaacs authored Jan 2, 2025
1 parent caf6631 commit 1f8f98f
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 10 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ vortex-scalar = { workspace = true, features = ["flatbuffers", "serde", "arbitra

[features]
arbitrary = ["dep:arbitrary", "vortex-dtype/arbitrary"]
test_util = []
canonical_counter = []

[target.'cfg(target_arch = "wasm32")'.dependencies]
Expand All @@ -65,6 +66,8 @@ getrandom = { workspace = true, features = ["js"] }
[dev-dependencies]
criterion = { workspace = true }
rstest = { workspace = true }
vortex-array = { path = ".", features = ["test_util"] }


[[bench]]
name = "search_sorted"
Expand Down
73 changes: 65 additions & 8 deletions vortex-array/src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,20 @@ mod compute;
use std::fmt::Display;
use std::sync::Arc;

#[cfg(feature = "test_util")]
use itertools::Itertools;
use num_traits::AsPrimitive;
use serde::{Deserialize, Serialize};
#[cfg(feature = "test_util")]
use vortex_dtype::Nullability;
use vortex_dtype::{match_each_native_ptype, DType, PType};
use vortex_error::{vortex_bail, vortex_panic, VortexExpect, VortexResult};
#[cfg(feature = "test_util")]
use vortex_scalar::Scalar;

use crate::array::PrimitiveArray;
#[cfg(feature = "test_util")]
use crate::builders::{ArrayBuilder, ListBuilder};
use crate::compute::{scalar_at, slice};
use crate::encoding::ids;
use crate::stats::{Stat, StatisticsVTable, StatsSet};
Expand Down Expand Up @@ -184,12 +192,42 @@ impl ValidityVTable<ListArray> for ListEncoding {
}
}

#[cfg(feature = "test_util")]
impl ListArray {
/// This is a convenience method to create a list array from an iterator of iterators.
/// This method is slow however since each element is first converted to a scalar and then
/// appended to the array.
pub fn from_iter_slow<I: IntoIterator>(iter: I, dtype: Arc<DType>) -> VortexResult<ArrayData>
where
I::Item: IntoIterator,
<I::Item as IntoIterator>::Item: Into<Scalar>,
{
let iter = iter.into_iter();
let mut builder = ListBuilder::<u32>::with_capacity(
dtype.clone(),
Nullability::NonNullable,
iter.size_hint().0,
);

for v in iter {
let elem = Scalar::list(
dtype.clone(),
v.into_iter().map(|x| x.into()).collect_vec(),
Nullability::Nullable,
);
builder.append_value(elem.as_list())?
}
builder.finish()
}
}

#[cfg(test)]
mod test {
use std::sync::Arc;

use vortex_dtype::Nullability;
use vortex_dtype::Nullability::NonNullable;
use vortex_dtype::{Nullability, PType};
use vortex_dtype::PType::I32;
use vortex_scalar::Scalar;

use crate::array::list::ListArray;
Expand Down Expand Up @@ -221,27 +259,46 @@ mod test {

assert_eq!(
Scalar::list(
Arc::new(PType::I32.into()),
Arc::new(I32.into()),
vec![1.into(), 2.into()],
Nullability::Nullable
),
scalar_at(&list, 0).unwrap()
);
assert_eq!(
Scalar::list(
Arc::new(PType::I32.into()),
Arc::new(I32.into()),
vec![3.into(), 4.into()],
Nullability::Nullable
),
scalar_at(&list, 1).unwrap()
);
assert_eq!(
Scalar::list(
Arc::new(PType::I32.into()),
vec![5.into()],
Nullability::Nullable
),
Scalar::list(Arc::new(I32.into()), vec![5.into()], Nullability::Nullable),
scalar_at(&list, 2).unwrap()
);
}

#[test]
fn test_simple_list_array_from_iter() {
let elements = PrimitiveArray::from_iter([1i32, 2, 3]);
let offsets = PrimitiveArray::from_iter([0, 2, 3]);
let validity = Validity::NonNullable;

let list =
ListArray::try_new(elements.into_array(), offsets.into_array(), validity).unwrap();

let list_from_iter =
ListArray::from_iter_slow(vec![vec![1i32, 2], vec![3]], Arc::new(I32.into())).unwrap();

assert_eq!(list.len(), list_from_iter.len());
assert_eq!(
scalar_at(&list, 0).unwrap(),
scalar_at(&list_from_iter, 0).unwrap()
);
assert_eq!(
scalar_at(&list, 1).unwrap(),
scalar_at(&list_from_iter, 1).unwrap()
);
}
}
1 change: 1 addition & 0 deletions vortex-file/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ arrow-schema = { workspace = true }
rstest = { workspace = true }
tokio = { workspace = true, features = ["full"] }
vortex-io = { path = "../vortex-io", features = ["tokio"] }
vortex-array = { workspace = true, features = ["test_util"] }

[lints]
workspace = true
Expand Down
21 changes: 19 additions & 2 deletions vortex-file/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ use futures::StreamExt;
use futures_util::TryStreamExt;
use itertools::Itertools;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::array::{ChunkedArray, PrimitiveArray, StructArray, VarBinArray};
use vortex_array::array::{ChunkedArray, ListArray, PrimitiveArray, StructArray, VarBinArray};
use vortex_array::compute::scalar_at;
use vortex_array::validity::Validity;
use vortex_array::variants::{PrimitiveArrayTrait, StructArrayTrait};
use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant, ToArrayData};
use vortex_buffer::{buffer, Buffer};
use vortex_dtype::field::Field;
use vortex_dtype::PType::I32;
use vortex_dtype::{DType, Nullability, PType, StructDType};
use vortex_error::{vortex_panic, VortexResult};
use vortex_expr::{BinaryExpr, Column, Literal, Operator};
Expand Down Expand Up @@ -89,7 +90,23 @@ async fn test_read_simple_with_spawn() {
])
.into_array();

let st = StructArray::from_fields(&[("strings", strings), ("numbers", numbers)]).unwrap();
let lists = ChunkedArray::from_iter([
ListArray::from_iter_slow(
vec![vec![11, 12], vec![21, 22], vec![31, 32], vec![41, 42]],
Arc::new(I32.into()),
)
.unwrap(),
ListArray::from_iter_slow(
vec![vec![51, 52], vec![61, 62], vec![71, 72], vec![81, 82]],
Arc::new(I32.into()),
)
.unwrap(),
])
.into_array();

let st =
StructArray::from_fields(&[("strings", strings), ("numbers", numbers), ("lists", lists)])
.unwrap();
let buf = Vec::new();

let written = tokio::spawn(async move {
Expand Down

0 comments on commit 1f8f98f

Please sign in to comment.