From 69f64b90d84e47f559eb0e19dd84ac2d2976445a Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 24 Nov 2024 19:13:10 -0500 Subject: [PATCH] Preserve field name when casting List (#13468) * Add option to pass in field name to create array to support retaining field name during cast * add unit tests for list casting round trip * Documentation example was missing parameter * Rather than deprecate an existing function or change pub signature add in a parallel function for the small cases where we want to explicitly set the field name --- datafusion/common/src/scalar/mod.rs | 72 +++++++++++++++++++++++++---- datafusion/common/src/utils/mod.rs | 41 ++++++++++++++++ 2 files changed, 105 insertions(+), 8 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 5595f4f9fa70..edba0b84431f 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -40,7 +40,9 @@ use crate::cast::{ use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err}; use crate::hash_utils::create_hashes; use crate::utils::{ - array_into_fixed_size_list_array, array_into_large_list_array, array_into_list_array, + array_into_fixed_size_list_array_with_field_name, array_into_large_list_array, + array_into_large_list_array_with_field_name, array_into_list_array, + array_into_list_array_with_field_name, }; use arrow::compute::kernels::numeric::*; use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; @@ -2663,27 +2665,36 @@ impl ScalarValue { let list_array = array.as_list::(); let nested_array = list_array.value(index); // Produces a single element `ListArray` with the value at `index`. - let arr = - Arc::new(array_into_list_array(nested_array, field.is_nullable())); + let arr = Arc::new(array_into_list_array_with_field_name( + nested_array, + field.is_nullable(), + field.name(), + )); ScalarValue::List(arr) } - DataType::LargeList(_) => { + DataType::LargeList(field) => { let list_array = as_large_list_array(array); let nested_array = list_array.value(index); // Produces a single element `LargeListArray` with the value at `index`. - let arr = Arc::new(array_into_large_list_array(nested_array)); + let arr = Arc::new(array_into_large_list_array_with_field_name( + nested_array, + field.name(), + )); ScalarValue::LargeList(arr) } // TODO: There is no test for FixedSizeList now, add it later - DataType::FixedSizeList(_, _) => { + DataType::FixedSizeList(field, _) => { let list_array = as_fixed_size_list_array(array)?; let nested_array = list_array.value(index); // Produces a single element `ListArray` with the value at `index`. let list_size = nested_array.len(); - let arr = - Arc::new(array_into_fixed_size_list_array(nested_array, list_size)); + let arr = Arc::new(array_into_fixed_size_list_array_with_field_name( + nested_array, + list_size, + field.name(), + )); ScalarValue::FixedSizeList(arr) } @@ -5970,6 +5981,51 @@ mod tests { ScalarValue::from("larger than 12 bytes string"), DataType::Utf8View, ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = + ListBuilder::new(Int32Builder::new()).with_field(element_field); + builder.append_value([Some(1)]); + builder.append(true); + + ScalarValue::List(Arc::new(builder.finish())) + }, + DataType::List(Arc::new(Field::new("element", DataType::Int64, true))), + ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 1) + .with_field(element_field); + builder.values().append_value(1); + builder.append(true); + + ScalarValue::FixedSizeList(Arc::new(builder.finish())) + }, + DataType::FixedSizeList( + Arc::new(Field::new("element", DataType::Int64, true)), + 1, + ), + ); + check_scalar_cast( + { + let element_field = + Arc::new(Field::new("element", DataType::Int32, true)); + + let mut builder = + LargeListBuilder::new(Int32Builder::new()).with_field(element_field); + builder.append_value([Some(1)]); + builder.append(true); + + ScalarValue::LargeList(Arc::new(builder.finish())) + }, + DataType::LargeList(Arc::new(Field::new("element", DataType::Int64, true))), + ); } // mimics how casting work on scalar values by `casting` `scalar` to `desired_type` diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index dacf90af9bbf..d4c427aaa4a9 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -342,6 +342,20 @@ pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray { ) } +pub fn array_into_list_array_with_field_name( + arr: ArrayRef, + nullable: bool, + field_name: &str, +) -> ListArray { + let offsets = OffsetBuffer::from_lengths([arr.len()]); + ListArray::new( + Arc::new(Field::new(field_name, arr.data_type().to_owned(), nullable)), + offsets, + arr, + None, + ) +} + /// Wrap an array into a single element `LargeListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray { @@ -354,6 +368,19 @@ pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray { ) } +pub fn array_into_large_list_array_with_field_name( + arr: ArrayRef, + field_name: &str, +) -> LargeListArray { + let offsets = OffsetBuffer::from_lengths([arr.len()]); + LargeListArray::new( + Arc::new(Field::new(field_name, arr.data_type().to_owned(), true)), + offsets, + arr, + None, + ) +} + pub fn array_into_fixed_size_list_array( arr: ArrayRef, list_size: usize, @@ -367,6 +394,20 @@ pub fn array_into_fixed_size_list_array( ) } +pub fn array_into_fixed_size_list_array_with_field_name( + arr: ArrayRef, + list_size: usize, + field_name: &str, +) -> FixedSizeListArray { + let list_size = list_size as i32; + FixedSizeListArray::new( + Arc::new(Field::new(field_name, arr.data_type().to_owned(), true)), + list_size, + arr, + None, + ) +} + /// Wrap arrays into a single element `ListArray`. /// /// Example: