From a68ef017d1e79f389e9160e5e2d8de7da88d90be Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 18 Nov 2024 10:41:36 -0500 Subject: [PATCH 1/4] Add option to pass in field name to create array to support retaining field name during cast --- datafusion/common/src/scalar/mod.rs | 61 +++++++++++-------- datafusion/common/src/utils/mod.rs | 37 ++++++++--- .../src/aggregate/count_distinct/bytes.rs | 4 +- .../src/aggregate/count_distinct/native.rs | 4 +- .../functions-aggregate/src/array_agg.rs | 3 +- .../functions-aggregate/src/nth_value.rs | 1 + datafusion/functions-nested/src/make_array.rs | 2 +- datafusion/functions-nested/src/utils.rs | 11 ++-- 8 files changed, 82 insertions(+), 41 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 5595f4f9fa70..2d9edf65ad33 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -2090,7 +2090,7 @@ impl ScalarValue { } else { Self::iter_to_array(values.iter().cloned()).unwrap() }; - Arc::new(array_into_list_array(values, nullable)) + Arc::new(array_into_list_array(values, nullable, None)) } /// Same as [`ScalarValue::new_list`] but with nullable set to true. @@ -2146,7 +2146,7 @@ impl ScalarValue { } else { Self::iter_to_array(values).unwrap() }; - Arc::new(array_into_list_array(values, nullable)) + Arc::new(array_into_list_array(values, nullable, None)) } /// Converts `Vec` where each element has type corresponding to @@ -2183,7 +2183,7 @@ impl ScalarValue { } else { Self::iter_to_array(values.iter().cloned()).unwrap() }; - Arc::new(array_into_large_list_array(values)) + Arc::new(array_into_large_list_array(values, None)) } /// Converts a scalar value into an array of `size` rows. @@ -2663,27 +2663,36 @@ impl ScalarValue { let list_array = array.as_list::(); let nested_array = list_array.value(index); // Produces a single element `ListArray` with the value at `index`. - let arr = - Arc::new(array_into_list_array(nested_array, field.is_nullable())); + let arr = Arc::new(array_into_list_array( + nested_array, + field.is_nullable(), + Some(field.name()), + )); ScalarValue::List(arr) } - DataType::LargeList(_) => { + DataType::LargeList(field) => { let list_array = as_large_list_array(array); let nested_array = list_array.value(index); // Produces a single element `LargeListArray` with the value at `index`. - let arr = Arc::new(array_into_large_list_array(nested_array)); + let arr = Arc::new(array_into_large_list_array( + nested_array, + Some(field.name()), + )); ScalarValue::LargeList(arr) } // TODO: There is no test for FixedSizeList now, add it later - DataType::FixedSizeList(_, _) => { + DataType::FixedSizeList(field, _) => { let list_array = as_fixed_size_list_array(array)?; let nested_array = list_array.value(index); // Produces a single element `ListArray` with the value at `index`. let list_size = nested_array.len(); - let arr = - Arc::new(array_into_fixed_size_list_array(nested_array, list_size)); + let arr = Arc::new(array_into_fixed_size_list_array( + nested_array, + list_size, + Some(field.name()), + )); ScalarValue::FixedSizeList(arr) } @@ -4060,11 +4069,10 @@ mod tests { let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8); - let expected = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ - "rust", - "arrow", - "data-fusion", - ]))); + let expected = array_into_list_array_nullable( + Arc::new(StringArray::from(vec!["rust", "arrow", "data-fusion"])), + None, + ); assert_eq!(*result, expected); } @@ -4272,12 +4280,14 @@ mod tests { #[test] fn iter_to_array_string_test() { - let arr1 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ - "foo", "bar", "baz", - ]))); - let arr2 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ - "rust", "world", - ]))); + let arr1 = array_into_list_array_nullable( + Arc::new(StringArray::from(vec!["foo", "bar", "baz"])), + None, + ); + let arr2 = array_into_list_array_nullable( + Arc::new(StringArray::from(vec!["rust", "world"])), + None, + ); let scalars = vec![ ScalarValue::List(Arc::new(arr1)), @@ -5734,13 +5744,16 @@ mod tests { // Define list-of-structs scalars let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap(); - let nl0 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array))); + let nl0 = + ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array, None))); let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap(); - let nl1 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array))); + let nl1 = + ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array, None))); let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap(); - let nl2 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array))); + let nl2 = + ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array, None))); // iter_to_array for list-of-struct let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap(); diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index dacf90af9bbf..2ef754927c4f 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -324,18 +324,29 @@ pub fn longest_consecutive_prefix>( /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` /// The field in the list array is nullable. -pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray { - array_into_list_array(arr, true) +pub fn array_into_list_array_nullable( + arr: ArrayRef, + field_name: Option<&str>, +) -> ListArray { + array_into_list_array(arr, true, field_name) } /// Array Utils /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` -pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray { +pub fn array_into_list_array( + arr: ArrayRef, + nullable: bool, + field_name: Option<&str>, +) -> ListArray { let offsets = OffsetBuffer::from_lengths([arr.len()]); ListArray::new( - Arc::new(Field::new_list_field(arr.data_type().to_owned(), nullable)), + Arc::new(Field::new( + field_name.unwrap_or("item"), + arr.data_type().to_owned(), + nullable, + )), offsets, arr, None, @@ -344,10 +355,17 @@ pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray { /// Wrap an array into a single element `LargeListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` -pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray { +pub fn array_into_large_list_array( + arr: ArrayRef, + field_name: Option<&str>, +) -> LargeListArray { let offsets = OffsetBuffer::from_lengths([arr.len()]); LargeListArray::new( - Arc::new(Field::new_list_field(arr.data_type().to_owned(), true)), + Arc::new(Field::new( + field_name.unwrap_or("item"), + arr.data_type().to_owned(), + true, + )), offsets, arr, None, @@ -357,10 +375,15 @@ pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray { pub fn array_into_fixed_size_list_array( arr: ArrayRef, list_size: usize, + field_name: Option<&str>, ) -> FixedSizeListArray { let list_size = list_size as i32; FixedSizeListArray::new( - Arc::new(Field::new_list_field(arr.data_type().to_owned(), true)), + Arc::new(Field::new( + field_name.unwrap_or("item"), + arr.data_type().to_owned(), + true, + )), list_size, arr, None, diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs index 07fa4efc990e..e143e839ef94 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs @@ -49,7 +49,7 @@ impl Accumulator for BytesDistinctCountAccumulator { fn state(&mut self) -> datafusion_common::Result> { let set = self.0.take(); let arr = set.into_state(); - let list = Arc::new(array_into_list_array_nullable(arr)); + let list = Arc::new(array_into_list_array_nullable(arr, None)); Ok(vec![ScalarValue::List(list)]) } @@ -109,7 +109,7 @@ impl Accumulator for BytesViewDistinctCountAccumulator { fn state(&mut self) -> datafusion_common::Result> { let set = self.0.take(); let arr = set.into_state(); - let list = Arc::new(array_into_list_array_nullable(arr)); + let list = Arc::new(array_into_list_array_nullable(arr, None)); Ok(vec![ScalarValue::List(list)]) } diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs index 405b2c2db7bd..a00735904473 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs @@ -73,7 +73,7 @@ where PrimitiveArray::::from_iter_values(self.values.iter().cloned()) .with_data_type(self.data_type.clone()), ); - let list = Arc::new(array_into_list_array_nullable(arr)); + let list = Arc::new(array_into_list_array_nullable(arr, None)); Ok(vec![ScalarValue::List(list)]) } @@ -160,7 +160,7 @@ where let arr = Arc::new(PrimitiveArray::::from_iter_values( self.values.iter().map(|v| v.0), )) as ArrayRef; - let list = Arc::new(array_into_list_array_nullable(arr)); + let list = Arc::new(array_into_list_array_nullable(arr, None)); Ok(vec![ScalarValue::List(list)]) } diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index 252a07cb11d8..e9a54caef5ca 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -240,7 +240,7 @@ impl Accumulator for ArrayAggAccumulator { } let concated_array = arrow::compute::concat(&element_arrays)?; - let list_array = array_into_list_array_nullable(concated_array); + let list_array = array_into_list_array_nullable(concated_array, None); Ok(ScalarValue::List(Arc::new(list_array))) } @@ -534,6 +534,7 @@ impl OrderSensitiveArrayAggAccumulator { StructArray::try_new(struct_field, column_wise_ordering_values, None)?; Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable( Arc::new(ordering_array), + None, )))) } } diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs index f3e892fa73d8..1bebdad4486a 100644 --- a/datafusion/functions-aggregate/src/nth_value.rs +++ b/datafusion/functions-aggregate/src/nth_value.rs @@ -427,6 +427,7 @@ impl NthValueAccumulator { Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable( Arc::new(ordering_array), + None, )))) } diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index 7aa3445f6858..b28a4c203c55 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -196,7 +196,7 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result { let length = arrays.iter().map(|a| a.len()).sum(); // By default Int64 let array = new_null_array(&DataType::Int64, length); - Ok(Arc::new(array_into_list_array_nullable(array))) + Ok(Arc::new(array_into_list_array_nullable(array, None))) } LargeList(..) => array_array::(arrays, data_type), _ => array_array::(arrays, data_type), diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index b9a75724bcde..cd9c88cf68e2 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -290,10 +290,12 @@ mod tests { ])); let array2d_1 = Arc::new(array_into_list_array_nullable( - Arc::clone(&array1d_1) as ArrayRef + Arc::clone(&array1d_1) as ArrayRef, + None, )) as ArrayRef; let array2d_2 = Arc::new(array_into_list_array_nullable( - Arc::clone(&array1d_2) as ArrayRef + Arc::clone(&array1d_2) as ArrayRef, + None, )) as ArrayRef; let res = align_array_dimensions::(vec![ @@ -310,8 +312,9 @@ mod tests { expected_dim ); - let array3d_1 = Arc::new(array_into_list_array_nullable(array2d_1)) as ArrayRef; - let array3d_2 = array_into_list_array_nullable(array2d_2.to_owned()); + let array3d_1 = + Arc::new(array_into_list_array_nullable(array2d_1, None)) as ArrayRef; + let array3d_2 = array_into_list_array_nullable(array2d_2.to_owned(), None); let res = align_array_dimensions::(vec![array1d_1, Arc::new(array3d_2)]).unwrap(); From f0292d35898b8044ab701a3b4eae333bc1edc0f3 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 19 Nov 2024 08:54:53 -0500 Subject: [PATCH 2/4] add unit tests for list casting round trip --- datafusion/common/src/scalar/mod.rs | 45 +++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 2d9edf65ad33..ac318e3a4d0b 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -5983,6 +5983,51 @@ mod tests { ScalarValue::from("larger than 12 bytes string"), DataType::Utf8View, ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = + ListBuilder::new(Int32Builder::new()).with_field(element_field); + builder.append_value([Some(1)]); + builder.append(true); + + ScalarValue::List(Arc::new(builder.finish())) + }, + DataType::List(Arc::new(Field::new("element", DataType::Int64, true))), + ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1) + .with_field(element_field); + builder.values().append_value(1); + builder.append(true); + + ScalarValue::FixedSizeList(Arc::new(builder.finish())) + }, + DataType::FixedSizeList( + Arc::new(Field::new("element", DataType::Int64, true)), + 1, + ), + ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = + LargeListBuilder::new(Int32Builder::new()).with_field(element_field); + builder.append_value([Some(1)]); + builder.append(true); + + ScalarValue::LargeList(Arc::new(builder.finish())) + }, + DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))), + ); } // mimics how casting work on scalar values by `casting` `scalar` to `desired_type` From bf61504de98360e761d5a5e0e15f659b9f7bcad9 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Tue, 19 Nov 2024 08:59:01 -0500 Subject: [PATCH 3/4] Documentation example was missing parameter --- datafusion/common/src/scalar/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index ac318e3a4d0b..f7cad69ebe64 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -2572,7 +2572,7 @@ impl ScalarValue { /// ]); /// /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ] - /// let list_arr = array_into_list_array_nullable(Arc::new(list_arr)); + /// let list_arr = array_into_list_array_nullable(Arc::new(list_arr), None); /// /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap(); From 9564d797b05452d1f99b795b30d20d91307081ff Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 23 Nov 2024 14:04:35 -0500 Subject: [PATCH 4/4] Rather than deprecate an existing function or change pub signature add in a parallel function for the small cases where we want to explicitly set the field name --- datafusion/common/src/scalar/mod.rs | 56 ++++++++------- datafusion/common/src/utils/mod.rs | 68 ++++++++++++------- .../src/aggregate/count_distinct/bytes.rs | 4 +- .../src/aggregate/count_distinct/native.rs | 4 +- .../functions-aggregate/src/array_agg.rs | 3 +- .../functions-aggregate/src/nth_value.rs | 1 - datafusion/functions-nested/src/make_array.rs | 2 +- datafusion/functions-nested/src/utils.rs | 11 ++- 8 files changed, 80 insertions(+), 69 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index f7cad69ebe64..edba0b84431f 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -40,7 +40,9 @@ use crate::cast::{ use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err}; use crate::hash_utils::create_hashes; use crate::utils::{ - array_into_fixed_size_list_array, array_into_large_list_array, array_into_list_array, + array_into_fixed_size_list_array_with_field_name, array_into_large_list_array, + array_into_large_list_array_with_field_name, array_into_list_array, + array_into_list_array_with_field_name, }; use arrow::compute::kernels::numeric::*; use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; @@ -2090,7 +2092,7 @@ impl ScalarValue { } else { Self::iter_to_array(values.iter().cloned()).unwrap() }; - Arc::new(array_into_list_array(values, nullable, None)) + Arc::new(array_into_list_array(values, nullable)) } /// Same as [`ScalarValue::new_list`] but with nullable set to true. @@ -2146,7 +2148,7 @@ impl ScalarValue { } else { Self::iter_to_array(values).unwrap() }; - Arc::new(array_into_list_array(values, nullable, None)) + Arc::new(array_into_list_array(values, nullable)) } /// Converts `Vec` where each element has type corresponding to @@ -2183,7 +2185,7 @@ impl ScalarValue { } else { Self::iter_to_array(values.iter().cloned()).unwrap() }; - Arc::new(array_into_large_list_array(values, None)) + Arc::new(array_into_large_list_array(values)) } /// Converts a scalar value into an array of `size` rows. @@ -2572,7 +2574,7 @@ impl ScalarValue { /// ]); /// /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ] - /// let list_arr = array_into_list_array_nullable(Arc::new(list_arr), None); + /// let list_arr = array_into_list_array_nullable(Arc::new(list_arr)); /// /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap(); @@ -2663,10 +2665,10 @@ impl ScalarValue { let list_array = array.as_list::(); let nested_array = list_array.value(index); // Produces a single element `ListArray` with the value at `index`. - let arr = Arc::new(array_into_list_array( + let arr = Arc::new(array_into_list_array_with_field_name( nested_array, field.is_nullable(), - Some(field.name()), + field.name(), )); ScalarValue::List(arr) @@ -2675,9 +2677,9 @@ impl ScalarValue { let list_array = as_large_list_array(array); let nested_array = list_array.value(index); // Produces a single element `LargeListArray` with the value at `index`. - let arr = Arc::new(array_into_large_list_array( + let arr = Arc::new(array_into_large_list_array_with_field_name( nested_array, - Some(field.name()), + field.name(), )); ScalarValue::LargeList(arr) @@ -2688,10 +2690,10 @@ impl ScalarValue { let nested_array = list_array.value(index); // Produces a single element `ListArray` with the value at `index`. let list_size = nested_array.len(); - let arr = Arc::new(array_into_fixed_size_list_array( + let arr = Arc::new(array_into_fixed_size_list_array_with_field_name( nested_array, list_size, - Some(field.name()), + field.name(), )); ScalarValue::FixedSizeList(arr) @@ -4069,10 +4071,11 @@ mod tests { let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8); - let expected = array_into_list_array_nullable( - Arc::new(StringArray::from(vec!["rust", "arrow", "data-fusion"])), - None, - ); + let expected = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ + "rust", + "arrow", + "data-fusion", + ]))); assert_eq!(*result, expected); } @@ -4280,14 +4283,12 @@ mod tests { #[test] fn iter_to_array_string_test() { - let arr1 = array_into_list_array_nullable( - Arc::new(StringArray::from(vec!["foo", "bar", "baz"])), - None, - ); - let arr2 = array_into_list_array_nullable( - Arc::new(StringArray::from(vec!["rust", "world"])), - None, - ); + let arr1 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ + "foo", "bar", "baz", + ]))); + let arr2 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ + "rust", "world", + ]))); let scalars = vec![ ScalarValue::List(Arc::new(arr1)), @@ -5744,16 +5745,13 @@ mod tests { // Define list-of-structs scalars let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap(); - let nl0 = - ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array, None))); + let nl0 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array))); let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap(); - let nl1 = - ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array, None))); + let nl1 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array))); let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap(); - let nl2 = - ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array, None))); + let nl2 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array))); // iter_to_array for list-of-struct let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap(); diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 2ef754927c4f..d4c427aaa4a9 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -324,29 +324,32 @@ pub fn longest_consecutive_prefix>( /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` /// The field in the list array is nullable. -pub fn array_into_list_array_nullable( - arr: ArrayRef, - field_name: Option<&str>, -) -> ListArray { - array_into_list_array(arr, true, field_name) +pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray { + array_into_list_array(arr, true) } /// Array Utils /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` -pub fn array_into_list_array( +pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray { + let offsets = OffsetBuffer::from_lengths([arr.len()]); + ListArray::new( + Arc::new(Field::new_list_field(arr.data_type().to_owned(), nullable)), + offsets, + arr, + None, + ) +} + +pub fn array_into_list_array_with_field_name( arr: ArrayRef, nullable: bool, - field_name: Option<&str>, + field_name: &str, ) -> ListArray { let offsets = OffsetBuffer::from_lengths([arr.len()]); ListArray::new( - Arc::new(Field::new( - field_name.unwrap_or("item"), - arr.data_type().to_owned(), - nullable, - )), + Arc::new(Field::new(field_name, arr.data_type().to_owned(), nullable)), offsets, arr, None, @@ -355,17 +358,23 @@ pub fn array_into_list_array( /// Wrap an array into a single element `LargeListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` -pub fn array_into_large_list_array( +pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray { + let offsets = OffsetBuffer::from_lengths([arr.len()]); + LargeListArray::new( + Arc::new(Field::new_list_field(arr.data_type().to_owned(), true)), + offsets, + arr, + None, + ) +} + +pub fn array_into_large_list_array_with_field_name( arr: ArrayRef, - field_name: Option<&str>, + field_name: &str, ) -> LargeListArray { let offsets = OffsetBuffer::from_lengths([arr.len()]); LargeListArray::new( - Arc::new(Field::new( - field_name.unwrap_or("item"), - arr.data_type().to_owned(), - true, - )), + Arc::new(Field::new(field_name, arr.data_type().to_owned(), true)), offsets, arr, None, @@ -375,15 +384,24 @@ pub fn array_into_large_list_array( pub fn array_into_fixed_size_list_array( arr: ArrayRef, list_size: usize, - field_name: Option<&str>, ) -> FixedSizeListArray { let list_size = list_size as i32; FixedSizeListArray::new( - Arc::new(Field::new( - field_name.unwrap_or("item"), - arr.data_type().to_owned(), - true, - )), + Arc::new(Field::new_list_field(arr.data_type().to_owned(), true)), + list_size, + arr, + None, + ) +} + +pub fn array_into_fixed_size_list_array_with_field_name( + arr: ArrayRef, + list_size: usize, + field_name: &str, +) -> FixedSizeListArray { + let list_size = list_size as i32; + FixedSizeListArray::new( + Arc::new(Field::new(field_name, arr.data_type().to_owned(), true)), list_size, arr, None, diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs index e143e839ef94..07fa4efc990e 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs @@ -49,7 +49,7 @@ impl Accumulator for BytesDistinctCountAccumulator { fn state(&mut self) -> datafusion_common::Result> { let set = self.0.take(); let arr = set.into_state(); - let list = Arc::new(array_into_list_array_nullable(arr, None)); + let list = Arc::new(array_into_list_array_nullable(arr)); Ok(vec![ScalarValue::List(list)]) } @@ -109,7 +109,7 @@ impl Accumulator for BytesViewDistinctCountAccumulator { fn state(&mut self) -> datafusion_common::Result> { let set = self.0.take(); let arr = set.into_state(); - let list = Arc::new(array_into_list_array_nullable(arr, None)); + let list = Arc::new(array_into_list_array_nullable(arr)); Ok(vec![ScalarValue::List(list)]) } diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs index a00735904473..405b2c2db7bd 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs @@ -73,7 +73,7 @@ where PrimitiveArray::::from_iter_values(self.values.iter().cloned()) .with_data_type(self.data_type.clone()), ); - let list = Arc::new(array_into_list_array_nullable(arr, None)); + let list = Arc::new(array_into_list_array_nullable(arr)); Ok(vec![ScalarValue::List(list)]) } @@ -160,7 +160,7 @@ where let arr = Arc::new(PrimitiveArray::::from_iter_values( self.values.iter().map(|v| v.0), )) as ArrayRef; - let list = Arc::new(array_into_list_array_nullable(arr, None)); + let list = Arc::new(array_into_list_array_nullable(arr)); Ok(vec![ScalarValue::List(list)]) } diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index e9a54caef5ca..252a07cb11d8 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -240,7 +240,7 @@ impl Accumulator for ArrayAggAccumulator { } let concated_array = arrow::compute::concat(&element_arrays)?; - let list_array = array_into_list_array_nullable(concated_array, None); + let list_array = array_into_list_array_nullable(concated_array); Ok(ScalarValue::List(Arc::new(list_array))) } @@ -534,7 +534,6 @@ impl OrderSensitiveArrayAggAccumulator { StructArray::try_new(struct_field, column_wise_ordering_values, None)?; Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable( Arc::new(ordering_array), - None, )))) } } diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs index 1bebdad4486a..f3e892fa73d8 100644 --- a/datafusion/functions-aggregate/src/nth_value.rs +++ b/datafusion/functions-aggregate/src/nth_value.rs @@ -427,7 +427,6 @@ impl NthValueAccumulator { Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable( Arc::new(ordering_array), - None, )))) } diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index b28a4c203c55..7aa3445f6858 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -196,7 +196,7 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result { let length = arrays.iter().map(|a| a.len()).sum(); // By default Int64 let array = new_null_array(&DataType::Int64, length); - Ok(Arc::new(array_into_list_array_nullable(array, None))) + Ok(Arc::new(array_into_list_array_nullable(array))) } LargeList(..) => array_array::(arrays, data_type), _ => array_array::(arrays, data_type), diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index cd9c88cf68e2..b9a75724bcde 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -290,12 +290,10 @@ mod tests { ])); let array2d_1 = Arc::new(array_into_list_array_nullable( - Arc::clone(&array1d_1) as ArrayRef, - None, + Arc::clone(&array1d_1) as ArrayRef )) as ArrayRef; let array2d_2 = Arc::new(array_into_list_array_nullable( - Arc::clone(&array1d_2) as ArrayRef, - None, + Arc::clone(&array1d_2) as ArrayRef )) as ArrayRef; let res = align_array_dimensions::(vec![ @@ -312,9 +310,8 @@ mod tests { expected_dim ); - let array3d_1 = - Arc::new(array_into_list_array_nullable(array2d_1, None)) as ArrayRef; - let array3d_2 = array_into_list_array_nullable(array2d_2.to_owned(), None); + let array3d_1 = Arc::new(array_into_list_array_nullable(array2d_1)) as ArrayRef; + let array3d_2 = array_into_list_array_nullable(array2d_2.to_owned()); let res = align_array_dimensions::(vec![array1d_1, Arc::new(array3d_2)]).unwrap();