Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(functions): add new function: map_pick #15573

Merged
merged 10 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions src/query/functions/src/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@

use std::collections::HashSet;
use std::hash::Hash;
use std::sync::Arc;

use databend_common_expression::types::array::ArrayColumnBuilder;
use databend_common_expression::types::map::KvPair;
use databend_common_expression::types::nullable::NullableDomain;
use databend_common_expression::types::AnyType;
use databend_common_expression::types::ArgType;
use databend_common_expression::types::ArrayType;
use databend_common_expression::types::BooleanType;
use databend_common_expression::types::DataType;
use databend_common_expression::types::EmptyArrayType;
use databend_common_expression::types::EmptyMapType;
use databend_common_expression::types::GenericType;
Expand All @@ -27,11 +32,20 @@ use databend_common_expression::types::NullType;
use databend_common_expression::types::NullableType;
use databend_common_expression::types::NumberType;
use databend_common_expression::types::SimpleDomain;
use databend_common_expression::types::ValueType;
use databend_common_expression::vectorize_1_arg;
use databend_common_expression::vectorize_with_builder_2_arg;
use databend_common_expression::Column;
use databend_common_expression::EvalContext;
use databend_common_expression::Function;
use databend_common_expression::FunctionDomain;
use databend_common_expression::FunctionEval;
use databend_common_expression::FunctionRegistry;
use databend_common_expression::FunctionSignature;
use databend_common_expression::Scalar;
use databend_common_expression::ScalarRef;
use databend_common_expression::Value;
use databend_common_expression::ValueRef;
use databend_common_hashtable::StackHashSet;
use siphasher::sip128::Hasher128;
use siphasher::sip128::SipHasher24;
Expand Down Expand Up @@ -244,4 +258,134 @@ pub fn register(registry: &mut FunctionRegistry) {
.any(|(k, _)| k == key)
},
);

registry.register_function_factory("map_pick", |_, args_type: &[DataType]| {
if args_type.len() < 2 {
return None;
}
hanxuanliang marked this conversation as resolved.
Show resolved Hide resolved

if !matches!(args_type[0], DataType::Map(_) | DataType::EmptyMap) {
b41sh marked this conversation as resolved.
Show resolved Hide resolved
return None;
}

let inner_key_type = match args_type.first() {
Some(DataType::Map(m)) => m.as_tuple().map(|tuple| &tuple[0]),
_ => None,
};
let key_match = match args_type.len() {
2 => args_type.get(1).map_or(false, |t| match t {
b41sh marked this conversation as resolved.
Show resolved Hide resolved
DataType::Array(_) => inner_key_type.map_or(false, |key_type| {
b41sh marked this conversation as resolved.
Show resolved Hide resolved
t.as_array()
.map_or(false, |array| array.as_ref() == key_type)
}),
DataType::EmptyArray => false,
_ => false,
}),
_ => args_type.iter().skip(1).all(|arg_type| {
inner_key_type.map_or_else(
|| {
matches!(
arg_type,
DataType::String
| DataType::Number(_)
| DataType::Decimal(_)
| DataType::Date
| DataType::Timestamp
)
},
|key_type| arg_type == key_type,
)
}),
};
if !key_match {
return None;
}

Some(Arc::new(Function {
signature: FunctionSignature {
name: "map_pick".to_string(),
args_type: args_type.to_vec(),
return_type: args_type[0].clone(),
},
eval: FunctionEval::Scalar {
calc_domain: Box::new(move |_, _| FunctionDomain::Full),
eval: Box::new(map_pick_fn_vec),
},
}))
});

fn map_pick_fn_vec(args: &[ValueRef<AnyType>], _: &mut EvalContext) -> Value<AnyType> {
let len = args.iter().find_map(|arg| match arg {
ValueRef::Column(col) => Some(col.len()),
_ => None,
});

let source_data_type = match args.first().unwrap() {
ValueRef::Scalar(s) => s.infer_data_type(),
ValueRef::Column(c) => c.data_type(),
};

let source_map = match &args[0] {
ValueRef::Scalar(s) => match s {
ScalarRef::Map(cols) => {
KvPair::<GenericType<0>, GenericType<1>>::try_downcast_column(cols).unwrap()
}
ScalarRef::EmptyMap => {
KvPair::<GenericType<0>, GenericType<1>>::try_downcast_column(
&Column::EmptyMap { len: 0 },
)
.unwrap()
}
_ => unreachable!(),
},
ValueRef::Column(Column::Map(c)) => {
hanxuanliang marked this conversation as resolved.
Show resolved Hide resolved
KvPair::<GenericType<0>, GenericType<1>>::try_downcast_column(&c.values).unwrap()
b41sh marked this conversation as resolved.
Show resolved Hide resolved
}
_ => unreachable!(),
};

let mut builder: ArrayColumnBuilder<KvPair<GenericType<0>, GenericType<1>>> =
ArrayType::create_builder(
args.len() - 1,
source_data_type.as_map().unwrap().as_tuple().unwrap(),
);
let select_keys = match &args[1] {
ValueRef::Scalar(ScalarRef::Array(arr)) if args.len() == 2 => {
arr.iter().collect::<Vec<_>>()
}
_ => args[1..]
.iter()
.map(|arg| arg.as_scalar().unwrap().clone())
.collect::<Vec<_>>(),
};
for key_arg in select_keys {
if let Some((k, v)) = source_map.iter().find(|(k, _)| k == &key_arg) {
builder.put_item((k.clone(), v.clone()));
}
}
builder.commit_row();
b41sh marked this conversation as resolved.
Show resolved Hide resolved

match len {
Some(_) => Value::Column(Column::Map(Box::new(builder.build().upcast()))),
_ => {
let scalar_builder = builder.build_scalar();
Value::Scalar(Scalar::Map(Column::Tuple(vec![
scalar_builder.keys,
scalar_builder.values,
])))
}
}
}

registry.register_2_arg_core::<EmptyMapType, EmptyArrayType, EmptyMapType, _, _>(
"map_pick",
|_, _, _| FunctionDomain::Full,
|_, _, _| Value::Scalar(()),
);

registry.register_2_arg_core::<EmptyMapType, ArrayType<GenericType<0>>, EmptyMapType, _, _>(
"map_pick",
|_, _, _| FunctionDomain::Full,
|_, _, _| Value::Scalar(()),
);
}
29 changes: 29 additions & 0 deletions src/query/functions/tests/it/scalars/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ fn test_map() {
test_map_size(file);
test_map_cat(file);
test_map_contains_key(file);
test_map_pick(file);
}

fn test_map_cat(file: &mut impl Write) {
Expand Down Expand Up @@ -278,3 +279,31 @@ fn test_map_size(file: &mut impl Write) {
&columns,
);
}

fn test_map_pick(file: &mut impl Write) {
run_ast(file, "map_pick({'a':1,'b':2,'c':3}, 'a', 'b')", &[]);
run_ast(file, "map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])", &[]);

let columns = [
("a_col", StringType::from_data(vec!["a", "b", "c"])),
("b_col", StringType::from_data(vec!["d", "e", "f"])),
("c_col", StringType::from_data(vec!["x", "y", "z"])),
(
"d_col",
StringType::from_data_with_validity(vec!["v1", "v2", "v3"], vec![true, true, true]),
),
(
"e_col",
StringType::from_data_with_validity(vec!["v4", "v5", ""], vec![true, true, false]),
),
(
"f_col",
StringType::from_data_with_validity(vec!["v6", "", "v7"], vec![true, false, true]),
),
];
run_ast(
file,
"map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b')",
&columns,
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2443,6 +2443,9 @@ Functions overloads:
0 map_contains_key(Map(Nothing), T0) :: Boolean
1 map_contains_key(Map(T0, T1), T0) :: Boolean
2 map_contains_key(Map(T0, T1) NULL, T0 NULL) :: Boolean NULL
0 map_pick(Map(Nothing), Array(Nothing)) :: Map(Nothing)
1 map_pick(Map(T0, T1), Array(T0)) :: Map(T0, T1)
2 map_pick(Map(T0, T1) NULL, Array(T0) NULL) :: Map(T0, T1) NULL
0 map_keys(Map(Nothing)) :: Array(Nothing)
1 map_keys(Map(T0, T1)) :: Array(T0)
2 map_keys(Map(T0, T1) NULL) :: Array(T0) NULL
Expand Down
43 changes: 43 additions & 0 deletions src/query/functions/tests/it/scalars/testdata/map.txt
Original file line number Diff line number Diff line change
Expand Up @@ -617,3 +617,46 @@ evaluation (internal):
+--------+-----------------------------------------------------------------------------------------------------------------+


ast : map_pick({'a':1,'b':2,'c':3}, 'a', 'b')
raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), 'a', 'b')
checked expr : map_pick<Map(String, UInt8), String, String>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)), "a", "b")
optimized expr : {"a":1_u8, "b":2_u8}
output type : Map(String, UInt8)
output domain : {[{"a"..="b"}], [{1..=2}]}
output : {'a':1, 'b':2}


ast : map_pick({'a':1,'b':2,'c':3}, ['a', 'b'])
raw expr : map_pick(map(array('a', 'b', 'c'), array(1, 2, 3)), array('a', 'b'))
checked expr : map_pick<Map(String, UInt8), Array(String)>(map<T0=String, T1=UInt8><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>("a", "b", "c"), array<T0=UInt8><T0, T0, T0>(1_u8, 2_u8, 3_u8)), array<T0=String><T0, T0>("a", "b"))
optimized expr : {"a":1_u8, "b":2_u8}
output type : Map(String, UInt8)
output domain : {[{"a"..="b"}], [{1..=2}]}
output : {'a':1, 'b':2}


ast : map_pick(map([a_col, b_col, c_col], [d_col, e_col, f_col]), 'a', 'b')
raw expr : map_pick(map(array(a_col::String, b_col::String, c_col::String), array(d_col::String NULL, e_col::String NULL, f_col::String NULL)), 'a', 'b')
checked expr : map_pick<Map(String, String NULL), String, String>(map<T0=String, T1=String NULL><Array(T0), Array(T1)>(array<T0=String><T0, T0, T0>(a_col, b_col, c_col), array<T0=String NULL><T0, T0, T0>(d_col, e_col, f_col)), "a", "b")
evaluation:
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
| | a_col | b_col | c_col | d_col | e_col | f_col | Output |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
| Type | String | String | String | String NULL | String NULL | String NULL | Map(String, String NULL) |
| Domain | {"a"..="c"} | {"d"..="f"} | {"x"..="z"} | {"v1"..="v3"} | {""..="v5"} ∪ {NULL} | {""..="v7"} ∪ {NULL} | Unknown |
| Row 0 | 'a' | 'd' | 'x' | 'v1' | 'v4' | 'v6' | {'a':'v1', 'b':'v2'} |
+--------+-------------+-------------+-------------+---------------+----------------------+----------------------+--------------------------+
evaluation (internal):
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Column | Data |
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| a_col | StringColumn { data: 0x616263, offsets: [0, 1, 2, 3] } |
| b_col | StringColumn { data: 0x646566, offsets: [0, 1, 2, 3] } |
| c_col | StringColumn { data: 0x78797a, offsets: [0, 1, 2, 3] } |
| d_col | NullableColumn { column: StringColumn { data: 0x763176327633, offsets: [0, 2, 4, 6] }, validity: [0b_____111] } |
| e_col | NullableColumn { column: StringColumn { data: 0x76347635, offsets: [0, 2, 4, 4] }, validity: [0b_____011] } |
| f_col | NullableColumn { column: StringColumn { data: 0x76367637, offsets: [0, 2, 2, 4] }, validity: [0b_____101] } |
| Output | ArrayColumn { values: Tuple([StringColumn { data: 0x6162, offsets: [0, 1, 2] }, NullableColumn { column: StringColumn { data: 0x76317632, offsets: [0, 2, 4] }, validity: [0b______11] }]), offsets: [0, 2] } |
+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+


Original file line number Diff line number Diff line change
Expand Up @@ -167,5 +167,30 @@ SELECT map_contains_key({'k1': 'v1', 'k2': NULL}, 'k2')
----
1

# Test map_pick function
query
SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, 'k1', 'k3')
----
{'k1':'v1','k3':NULL}

query
SELECT map_pick({'k1': 'v1', 'k2': 'v2', 'k3': null}, '')
----
{}

statement ok
CREATE TABLE map_pick_test(col_str Map(String, String Null) Not Null, col_int Map(String, Int Null) Null)

statement ok
INSERT INTO map_pick_test VALUES ({'k1':'v1','k2':'v2','k3':null},{'a':10,'b':20}), ({'k5':'v5','k6':'v6'}, {'d':40,'e':null,'f':50})

query
SELECT map_pick(col_str, 'k1', 'k3')
FROM map_pick_test
----
{'k1':'v1','k3':NULL} {} {'a':10,'b':20}
{} {} {}
{} {} NULL

statement ok
DROP DATABASE map_func_test
Loading