From ae85263a9715b873ccc399cbd5b4adef01987a79 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 6 Feb 2024 10:05:27 -0800 Subject: [PATCH] feat: support casting FixedSizeList with new child type (#5360) --- arrow-cast/src/cast.rs | 185 +++++++++++++++++++++++++++++------------ 1 file changed, 131 insertions(+), 54 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index e321ae1b1b2..a813c5f6c87 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -133,11 +133,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { can_cast_types(list_from.data_type(), list_to.data_type()) } (List(_), _) => false, - (FixedSizeList(list_from,_), List(list_to)) => { - list_from.data_type() == list_to.data_type() - } + (FixedSizeList(list_from,_), List(list_to)) | (FixedSizeList(list_from,_), LargeList(list_to)) => { - list_from.data_type() == list_to.data_type() + can_cast_types(list_from.data_type(), list_to.data_type()) + } + (FixedSizeList(inner, size), FixedSizeList(inner_to, size_to)) if size == size_to => { + can_cast_types(inner.data_type(), inner_to.data_type()) } (_, List(list_to)) => can_cast_types(from_type, list_to.data_type()), (_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()), @@ -784,24 +785,41 @@ pub fn cast_with_options( "Cannot cast list to non-list data types".to_string(), )), }, - (FixedSizeList(list_from, _), List(list_to)) => { + (FixedSizeList(list_from, size), List(list_to)) => { if list_to.data_type() != list_from.data_type() { - Err(ArrowError::CastError( - "cannot cast fixed-size-list to list with different child data".into(), - )) + // To transform inner type, can first cast to FSL with new inner type. + let fsl_to = DataType::FixedSizeList(list_to.clone(), *size); + let array = cast_with_options(array, &fsl_to, cast_options)?; + cast_fixed_size_list_to_list::(array.as_ref()) } else { cast_fixed_size_list_to_list::(array) } } - (FixedSizeList(list_from, _), LargeList(list_to)) => { + (FixedSizeList(list_from, size), LargeList(list_to)) => { if list_to.data_type() != list_from.data_type() { - Err(ArrowError::CastError( - "cannot cast fixed-size-list to largelist with different child data".into(), - )) + // To transform inner type, can first cast to FSL with new inner type. + let fsl_to = DataType::FixedSizeList(list_to.clone(), *size); + let array = cast_with_options(array, &fsl_to, cast_options)?; + cast_fixed_size_list_to_list::(array.as_ref()) } else { cast_fixed_size_list_to_list::(array) } } + (FixedSizeList(_, size_from), FixedSizeList(list_to, size_to)) => { + if size_from != size_to { + return Err(ArrowError::CastError( + "cannot cast fixed-size-list to fixed-size-list with different size".into(), + )); + } + let array = array.as_any().downcast_ref::().unwrap(); + let values = cast_with_options(array.values(), list_to.data_type(), cast_options)?; + Ok(Arc::new(FixedSizeListArray::try_new( + list_to.clone(), + *size_from, + values, + array.nulls().cloned(), + )?)) + } (_, List(ref to)) => cast_values_to_list::(array, to, cast_options), (_, LargeList(ref to)) => cast_values_to_list::(array, to, cast_options), (_, FixedSizeList(ref to, size)) if *size == 1 => { @@ -7561,6 +7579,37 @@ mod tests { assert_eq!(946684800000, c.value(5)); } + #[test] + fn test_can_cast_fsl_to_fsl() { + let from_array = Arc::new( + FixedSizeListArray::from_iter_primitive::( + [Some([Some(1.0), Some(2.0)]), None], + 2, + ), + ) as ArrayRef; + let to_array = Arc::new( + FixedSizeListArray::from_iter_primitive::( + [ + Some([Some(f16::from_f32(1.0)), Some(f16::from_f32(2.0))]), + None, + ], + 2, + ), + ) as ArrayRef; + + assert!(can_cast_types(from_array.data_type(), to_array.data_type())); + let actual = cast(&from_array, to_array.data_type()).unwrap(); + assert_eq!(actual.data_type(), to_array.data_type()); + + let invalid_target = + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Binary, true)), 2); + assert!(!can_cast_types(from_array.data_type(), &invalid_target)); + + let invalid_size = + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float16, true)), 5); + assert!(!can_cast_types(from_array.data_type(), &invalid_size)); + } + #[test] fn test_can_cast_types_fixed_size_list_to_list() { // DataType::List @@ -7580,50 +7629,78 @@ mod tests { #[test] fn test_cast_fixed_size_list_to_list() { - // DataType::List - let array1 = Arc::new(make_fixed_size_list_array()) as ArrayRef; - let list_array1 = cast( - &array1, - &DataType::List(Arc::new(Field::new("", DataType::Int32, false))), - ) - .unwrap(); - let actual = list_array1.as_any().downcast_ref::().unwrap(); - let expected = array1 - .as_any() - .downcast_ref::() - .unwrap(); + // Important cases: + // 1. With/without nulls + // 2. LargeList and List + // 3. With and without inner casts + + let cases = [ + // fixed_size_list => list + ( + Arc::new(FixedSizeListArray::from_iter_primitive::( + [[1, 1].map(Some), [2, 2].map(Some)].map(Some), + 2, + )) as ArrayRef, + Arc::new(ListArray::from_iter_primitive::([ + Some([Some(1), Some(1)]), + Some([Some(2), Some(2)]), + ])) as ArrayRef, + ), + // fixed_size_list => list (nullable) + ( + Arc::new(FixedSizeListArray::from_iter_primitive::( + [None, Some([Some(2), Some(2)])], + 2, + )) as ArrayRef, + Arc::new(ListArray::from_iter_primitive::([ + None, + Some([Some(2), Some(2)]), + ])) as ArrayRef, + ), + // fixed_size_list => large_list + ( + Arc::new(FixedSizeListArray::from_iter_primitive::( + [[1, 1].map(Some), [2, 2].map(Some)].map(Some), + 2, + )) as ArrayRef, + Arc::new(LargeListArray::from_iter_primitive::([ + Some([Some(1), Some(1)]), + Some([Some(2), Some(2)]), + ])) as ArrayRef, + ), + // fixed_size_list => large_list (nullable) + ( + Arc::new(FixedSizeListArray::from_iter_primitive::( + [None, Some([Some(2), Some(2)])], + 2, + )) as ArrayRef, + Arc::new(LargeListArray::from_iter_primitive::([ + None, + Some([Some(2), Some(2)]), + ])) as ArrayRef, + ), + ]; - assert_eq!(expected.values(), actual.values()); - assert_eq!(expected.len(), actual.len()); + for (array, expected) in cases { + let array = Arc::new(array) as ArrayRef; - // DataType::LargeList - let array2 = Arc::new(make_fixed_size_list_array_for_large_list()) as ArrayRef; - let list_array2 = cast( - &array2, - &DataType::LargeList(Arc::new(Field::new("", DataType::Int64, false))), - ) - .unwrap(); - let actual = list_array2 - .as_any() - .downcast_ref::() - .unwrap(); - let expected = array2 - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(expected.values(), actual.values()); - assert_eq!(expected.len(), actual.len()); - - // Cast previous LargeList to List - let array3 = Arc::new(actual.clone()) as ArrayRef; - let list_array3 = cast( - &array3, - &DataType::List(Arc::new(Field::new("", DataType::Int64, false))), - ) - .unwrap(); - let actual = list_array3.as_any().downcast_ref::().unwrap(); - let expected = array3.as_any().downcast_ref::().unwrap(); - assert_eq!(expected.values(), actual.values()); + assert!( + can_cast_types(array.data_type(), expected.data_type()), + "can_cast_types claims we cannot cast {:?} to {:?}", + array.data_type(), + expected.data_type() + ); + + let list_array = cast(&array, expected.data_type()) + .unwrap_or_else(|_| panic!("Failed to cast {:?} to {:?}", array, expected)); + assert_eq!( + list_array.as_ref(), + &expected, + "Incorrect result from casting {:?} to {:?}", + array, + expected + ); + } } #[test]