From c3fda8f2f2d9b3c849c6b257e46ca9aa0732f3e2 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 13 Jun 2023 15:14:47 +0100 Subject: [PATCH 1/2] Remove Binary Dictionary Arithmetic Support --- .github/workflows/arrow.yml | 8 +- arrow-arith/Cargo.toml | 4 - arrow-arith/src/arithmetic.rs | 1024 ++------------------------------- arrow/Cargo.toml | 5 +- arrow/README.md | 1 - 5 files changed, 63 insertions(+), 979 deletions(-) diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml index 35e70c8f070c..279e276a7912 100644 --- a/.github/workflows/arrow.yml +++ b/.github/workflows/arrow.yml @@ -83,7 +83,7 @@ jobs: - name: Test arrow-ord with all features except SIMD run: cargo test -p arrow-ord --features dyn_cmp_dict - name: Test arrow-arith with all features except SIMD - run: cargo test -p arrow-arith --features dyn_arith_dict + run: cargo test -p arrow-arith - name: Test arrow-row with all features run: cargo test -p arrow-row --all-features - name: Test arrow-integration-test with all features @@ -91,7 +91,7 @@ jobs: - name: Test arrow with default features run: cargo test -p arrow - name: Test arrow with all features apart from simd - run: cargo test -p arrow --features=force_validate,prettyprint,ipc_compression,ffi,dyn_cmp_dict,dyn_arith_dict,chrono-tz + run: cargo test -p arrow --features=force_validate,prettyprint,ipc_compression,ffi,dyn_cmp_dict,chrono-tz - name: Run examples run: | # Test arrow examples @@ -209,11 +209,11 @@ jobs: - name: Clippy arrow-ord with all features except SIMD run: cargo clippy -p arrow-ord --all-targets --features dyn_cmp_dict -- -D warnings - name: Clippy arrow-arith with all features except SIMD - run: cargo clippy -p arrow-arith --all-targets --features dyn_arith_dict -- -D warnings + run: cargo clippy -p arrow-arith --all-targets -- -D warnings - name: Clippy arrow-row with all features run: cargo clippy -p arrow-row --all-targets --all-features -- -D warnings - name: Clippy arrow with all features except SIMD - run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,dyn_arith_dict,chrono-tz --all-targets -- -D warnings + run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,chrono-tz --all-targets -- -D warnings - name: Clippy arrow-integration-test with all features run: cargo clippy -p arrow-integration-test --all-targets --all-features -- -D warnings - name: Clippy arrow-integration-testing with all features diff --git a/arrow-arith/Cargo.toml b/arrow-arith/Cargo.toml index 4460d116b466..b5ea2e3c4354 100644 --- a/arrow-arith/Cargo.toml +++ b/arrow-arith/Cargo.toml @@ -44,9 +44,5 @@ num = { version = "0.4", default-features = false, features = ["std"] } [dev-dependencies] -[package.metadata.docs.rs] -features = ["dyn_arith_dict"] - [features] -dyn_arith_dict = [] simd = ["arrow-array/simd"] diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs index c3c5cb864ed2..39d8adc0f308 100644 --- a/arrow-arith/src/arithmetic.rs +++ b/arrow-arith/src/arithmetic.rs @@ -89,48 +89,6 @@ where math_checked_op(left, right, op) } -/// Helper function for operations where a valid `0` on the right array should -/// result in an [ArrowError::DivideByZero], namely the division and modulo operations -/// -/// # Errors -/// -/// This function errors if: -/// * the arrays have different lengths -/// * there is an element where both left and right values are valid and the right value is `0` -#[cfg(feature = "dyn_arith_dict")] -fn math_checked_divide_op_on_iters( - left: impl Iterator>, - right: impl Iterator>, - op: F, - nulls: Option, -) -> Result, ArrowError> -where - T: ArrowNumericType, - F: Fn(T::Native, T::Native) -> Result, -{ - let buffer = if nulls.is_some() { - let values = left.zip(right).map(|(left, right)| { - if let (Some(l), Some(r)) = (left, right) { - op(l, r) - } else { - Ok(T::default_value()) - } - }); - // Safety: Iterator comes from a PrimitiveArray which reports its size correctly - unsafe { arrow_buffer::Buffer::try_from_trusted_len_iter(values) } - } else { - // no value is null - let values = left - .map(|l| l.unwrap()) - .zip(right.map(|r| r.unwrap())) - .map(|(left, right)| op(left, right)); - // Safety: Iterator comes from a PrimitiveArray which reports its size correctly - unsafe { arrow_buffer::Buffer::try_from_trusted_len_iter(values) } - }?; - - Ok(PrimitiveArray::new(buffer.into(), nulls)) -} - /// Calculates the modulus operation `left % right` on two SIMD inputs. /// The lower-most bits of `valid_mask` specify which vector lanes are considered as valid. /// @@ -358,290 +316,6 @@ where Ok(PrimitiveArray::new(result.into(), nulls)) } -/// Applies $OP to $LEFT and $RIGHT which are two dictionaries which have (the same) key type $KT -#[cfg(feature = "dyn_arith_dict")] -macro_rules! typed_dict_op { - ($LEFT: expr, $RIGHT: expr, $OP: expr, $KT: tt, $MATH_OP: ident) => {{ - match ($LEFT.value_type(), $RIGHT.value_type()) { - (DataType::Int8, DataType::Int8) => { - let array = $MATH_OP::<$KT, Int8Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::Int16, DataType::Int16) => { - let array = $MATH_OP::<$KT, Int16Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::Int32, DataType::Int32) => { - let array = $MATH_OP::<$KT, Int32Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::Int64, DataType::Int64) => { - let array = $MATH_OP::<$KT, Int64Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::UInt8, DataType::UInt8) => { - let array = $MATH_OP::<$KT, UInt8Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::UInt16, DataType::UInt16) => { - let array = $MATH_OP::<$KT, UInt16Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::UInt32, DataType::UInt32) => { - let array = $MATH_OP::<$KT, UInt32Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::UInt64, DataType::UInt64) => { - let array = $MATH_OP::<$KT, UInt64Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::Float32, DataType::Float32) => { - let array = $MATH_OP::<$KT, Float32Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::Float64, DataType::Float64) => { - let array = $MATH_OP::<$KT, Float64Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::Decimal128(_, s1), DataType::Decimal128(_, s2)) if s1 == s2 => { - let array = $MATH_OP::<$KT, Decimal128Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (DataType::Decimal256(_, s1), DataType::Decimal256(_, s2)) if s1 == s2 => { - let array = $MATH_OP::<$KT, Decimal256Type, _>($LEFT, $RIGHT, $OP)?; - Ok(Arc::new(array)) - } - (t1, t2) => Err(ArrowError::CastError(format!( - "Cannot perform arithmetic operation on two dictionary arrays of different value types ({} and {})", - t1, t2 - ))), - } - }}; -} - -#[cfg(feature = "dyn_arith_dict")] -macro_rules! typed_dict_math_op { - // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are `DictionaryArray` - ($LEFT: expr, $RIGHT: expr, $OP: expr, $MATH_OP: ident) => {{ - match ($LEFT.data_type(), $RIGHT.data_type()) { - (DataType::Dictionary(left_key_type, _), DataType::Dictionary(right_key_type, _))=> { - match (left_key_type.as_ref(), right_key_type.as_ref()) { - (DataType::Int8, DataType::Int8) => { - let left = as_dictionary_array::($LEFT); - let right = as_dictionary_array::($RIGHT); - typed_dict_op!(left, right, $OP, Int8Type, $MATH_OP) - } - (DataType::Int16, DataType::Int16) => { - let left = as_dictionary_array::($LEFT); - let right = as_dictionary_array::($RIGHT); - typed_dict_op!(left, right, $OP, Int16Type, $MATH_OP) - } - (DataType::Int32, DataType::Int32) => { - let left = as_dictionary_array::($LEFT); - let right = as_dictionary_array::($RIGHT); - typed_dict_op!(left, right, $OP, Int32Type, $MATH_OP) - } - (DataType::Int64, DataType::Int64) => { - let left = as_dictionary_array::($LEFT); - let right = as_dictionary_array::($RIGHT); - typed_dict_op!(left, right, $OP, Int64Type, $MATH_OP) - } - (DataType::UInt8, DataType::UInt8) => { - let left = as_dictionary_array::($LEFT); - let right = as_dictionary_array::($RIGHT); - typed_dict_op!(left, right, $OP, UInt8Type, $MATH_OP) - } - (DataType::UInt16, DataType::UInt16) => { - let left = as_dictionary_array::($LEFT); - let right = as_dictionary_array::($RIGHT); - typed_dict_op!(left, right, $OP, UInt16Type, $MATH_OP) - } - (DataType::UInt32, DataType::UInt32) => { - let left = as_dictionary_array::($LEFT); - let right = as_dictionary_array::($RIGHT); - typed_dict_op!(left, right, $OP, UInt32Type, $MATH_OP) - } - (DataType::UInt64, DataType::UInt64) => { - let left = as_dictionary_array::($LEFT); - let right = as_dictionary_array::($RIGHT); - typed_dict_op!(left, right, $OP, UInt64Type, $MATH_OP) - } - (t1, t2) => Err(ArrowError::CastError(format!( - "Cannot perform arithmetic operation on two dictionary arrays of different key types ({} and {})", - t1, t2 - ))), - } - } - (t1, t2) => Err(ArrowError::CastError(format!( - "Cannot perform arithmetic operation on dictionary array with non-dictionary array ({} and {})", - t1, t2 - ))), - } - }}; -} - -#[cfg(not(feature = "dyn_arith_dict"))] -macro_rules! typed_dict_math_op { - // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are `DictionaryArray` - ($LEFT: expr, $RIGHT: expr, $OP: expr, $MATH_OP: ident) => {{ - Err(ArrowError::CastError(format!( - "Arithmetic on arrays of type {} with array of type {} requires \"dyn_arith_dict\" feature", - $LEFT.data_type(), $RIGHT.data_type() - ))) - }}; -} - -/// Perform given operation on two `DictionaryArray`s. -/// Returns an error if the two arrays have different value type -#[cfg(feature = "dyn_arith_dict")] -fn math_op_dict( - left: &DictionaryArray, - right: &DictionaryArray, - op: F, -) -> Result, ArrowError> -where - K: ArrowDictionaryKeyType + ArrowNumericType, - T: ArrowNumericType, - F: Fn(T::Native, T::Native) -> T::Native, -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError(format!( - "Cannot perform operation on arrays of different length ({}, {})", - left.len(), - right.len() - ))); - } - - // Safety justification: Since the inputs are valid Arrow arrays, all values are - // valid indexes into the dictionary (which is verified during construction) - - let left_iter = unsafe { - left.values() - .as_any() - .downcast_ref::>() - .unwrap() - .take_iter_unchecked(left.keys_iter()) - }; - - let right_iter = unsafe { - right - .values() - .as_any() - .downcast_ref::>() - .unwrap() - .take_iter_unchecked(right.keys_iter()) - }; - - let result = left_iter - .zip(right_iter) - .map(|(left_value, right_value)| { - if let (Some(left), Some(right)) = (left_value, right_value) { - Some(op(left, right)) - } else { - None - } - }) - .collect(); - - Ok(result) -} - -/// Perform given operation on two `DictionaryArray`s. -/// Returns an error if the two arrays have different value type -#[cfg(feature = "dyn_arith_dict")] -fn math_checked_op_dict( - left: &DictionaryArray, - right: &DictionaryArray, - op: F, -) -> Result, ArrowError> -where - K: ArrowDictionaryKeyType + ArrowNumericType, - T: ArrowNumericType, - F: Fn(T::Native, T::Native) -> Result, -{ - // left and right's value types are supposed to be same as guaranteed by the caller macro now. - if left.value_type() != T::DATA_TYPE { - return Err(ArrowError::NotYetImplemented(format!( - "Cannot perform provided operation on dictionary array of value type {}", - left.value_type() - ))); - } - - let left = left.downcast_dict::>().unwrap(); - let right = right.downcast_dict::>().unwrap(); - - try_binary(left, right, op) -} - -/// Helper function for operations where a valid `0` on the right array should -/// result in an [ArrowError::DivideByZero], namely the division and modulo operations -/// -/// # Errors -/// -/// This function errors if: -/// * the arrays have different lengths -/// * there is an element where both left and right values are valid and the right value is `0` -#[cfg(feature = "dyn_arith_dict")] -fn math_divide_checked_op_dict( - left: &DictionaryArray, - right: &DictionaryArray, - op: F, -) -> Result, ArrowError> -where - K: ArrowDictionaryKeyType + ArrowNumericType, - T: ArrowNumericType, - F: Fn(T::Native, T::Native) -> Result, -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError(format!( - "Cannot perform operation on arrays of different length ({}, {})", - left.len(), - right.len() - ))); - } - - let nulls = arrow_buffer::NullBuffer::union(left.nulls(), right.nulls()); - - // Safety justification: Since the inputs are valid Arrow arrays, all values are - // valid indexes into the dictionary (which is verified during construction) - - let left_iter = unsafe { - left.values() - .as_any() - .downcast_ref::>() - .unwrap() - .take_iter_unchecked(left.keys_iter()) - }; - - let right_iter = unsafe { - right - .values() - .as_any() - .downcast_ref::>() - .unwrap() - .take_iter_unchecked(right.keys_iter()) - }; - - math_checked_divide_op_on_iters(left_iter, right_iter, op, nulls) -} - -#[cfg(feature = "dyn_arith_dict")] -fn math_divide_safe_op_dict( - left: &DictionaryArray, - right: &DictionaryArray, - op: F, -) -> Result -where - K: ArrowDictionaryKeyType + ArrowNumericType, - T: ArrowNumericType, - F: Fn(T::Native, T::Native) -> Option, -{ - let left = left.downcast_dict::>().unwrap(); - let right = right.downcast_dict::>().unwrap(); - let array: PrimitiveArray = binary_opt::<_, _, _, T>(left, right, op)?; - Ok(Arc::new(array) as ArrayRef) -} - fn math_safe_divide_op( left: &PrimitiveArray, right: &PrimitiveArray, @@ -687,9 +361,6 @@ pub fn add_checked( /// For an overflow-checking variant, use `add_dyn_checked` instead. pub fn add_dyn(left: &dyn Array, right: &dyn Array) -> Result { match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!(left, right, |a, b| a.add_wrapping(b), math_op_dict) - } DataType::Date32 => { let l = left.as_primitive::(); match right.data_type() { @@ -870,14 +541,6 @@ pub fn add_dyn_checked( right: &dyn Array, ) -> Result { match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!( - left, - right, - |a, b| a.add_checked(b), - math_checked_op_dict - ) - } DataType::Date32 => { let l = left.as_primitive::(); match right.data_type() { @@ -1027,9 +690,6 @@ pub fn subtract_checked( /// For an overflow-checking variant, use `subtract_dyn_checked` instead. pub fn subtract_dyn(left: &dyn Array, right: &dyn Array) -> Result { match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!(left, right, |a, b| a.sub_wrapping(b), math_op_dict) - } DataType::Date32 => { let l = left.as_primitive::(); match right.data_type() { @@ -1218,14 +878,6 @@ pub fn subtract_dyn_checked( right: &dyn Array, ) -> Result { match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!( - left, - right, - |a, b| a.sub_checked(b), - math_checked_op_dict - ) - } DataType::Date32 => { let l = left.as_primitive::(); match right.data_type() { @@ -1445,22 +1097,15 @@ pub fn multiply_checked( /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `multiply_dyn_checked` instead. pub fn multiply_dyn(left: &dyn Array, right: &dyn Array) -> Result { - match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!(left, right, |a, b| a.mul_wrapping(b), math_op_dict) + downcast_primitive_array!( + (left, right) => { + math_op(left, right, |a, b| a.mul_wrapping(b)).map(|a| Arc::new(a) as ArrayRef) } - _ => { - downcast_primitive_array!( - (left, right) => { - math_op(left, right, |a, b| a.mul_wrapping(b)).map(|a| Arc::new(a) as ArrayRef) - } - _ => Err(ArrowError::CastError(format!( - "Unsupported data type {}, {}", - left.data_type(), right.data_type() - ))) - ) - } - } + _ => Err(ArrowError::CastError(format!( + "Unsupported data type {}, {}", + left.data_type(), right.data_type() + ))) + ) } /// Perform `left * right` operation on two arrays. If either left or right value is null @@ -1473,14 +1118,6 @@ pub fn multiply_dyn_checked( right: &dyn Array, ) -> Result { match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!( - left, - right, - |a, b| a.mul_checked(b), - math_checked_op_dict - ) - } _ => { downcast_primitive_array!( (left, right) => { @@ -1495,16 +1132,6 @@ pub fn multiply_dyn_checked( } } -#[cfg(feature = "dyn_arith_dict")] -fn get_precision_scale(dt: &DataType) -> Result<(u8, i8), ArrowError> { - match dt { - DataType::Decimal128(precision, scale) => Ok((*precision, *scale)), - _ => Err(ArrowError::ComputeError( - "Cannot get precision and scale from non-decimal type".to_string(), - )), - } -} - /// Returns the precision and scale of the result of a multiplication of two decimal types, /// and the divisor for fixed point multiplication. fn get_fixed_point_info( @@ -1528,7 +1155,6 @@ fn get_fixed_point_info( Ok((precision, product_scale, divisor)) } -#[cfg(feature = "dyn_arith_dict")] /// Perform `left * right` operation on two decimal arrays. If either left or right value is /// null then the result is also null. /// @@ -1549,45 +1175,6 @@ pub fn multiply_fixed_point_dyn( required_scale: i8, ) -> Result { match (left.data_type(), right.data_type()) { - ( - DataType::Dictionary(_, lhs_value_type), - DataType::Dictionary(_, rhs_value_type), - ) if matches!(lhs_value_type.as_ref(), &DataType::Decimal128(_, _)) - && matches!(rhs_value_type.as_ref(), &DataType::Decimal128(_, _)) => - { - downcast_dictionary_array!( - left => match left.values().data_type() { - DataType::Decimal128(_, _) => { - let lhs_precision_scale = get_precision_scale(lhs_value_type.as_ref())?; - let rhs_precision_scale = get_precision_scale(rhs_value_type.as_ref())?; - - let (precision, product_scale, divisor) = get_fixed_point_info(lhs_precision_scale, rhs_precision_scale, required_scale)?; - - let right = as_dictionary_array::<_>(right); - - if required_scale == product_scale { - let mul = multiply_dyn(left, right)?; - let array = mul.as_any().downcast_ref::().unwrap(); - let array = array.clone().with_precision_and_scale(precision, required_scale)?; - return Ok(Arc::new(array)) - } - - let array = math_op_dict::<_, Decimal128Type, _>(left, right, |a, b| { - let a = i256::from_i128(a); - let b = i256::from_i128(b); - - let mut mul = a.wrapping_mul(b); - mul = divide_and_round::(mul, divisor); - mul.as_i128() - }).and_then(|a| a.with_precision_and_scale(precision, required_scale))?; - - Ok(Arc::new(array)) - } - t => unreachable!("Unsupported dictionary value type {}", t), - }, - t => unreachable!("Unsupported data type {}", t), - ) - } (DataType::Decimal128(_, _), DataType::Decimal128(_, _)) => { let left = left.as_any().downcast_ref::().unwrap(); let right = right.as_any().downcast_ref::().unwrap(); @@ -1782,39 +1369,21 @@ pub fn modulus( /// then the result is also null. If any right hand value is zero then the result of this /// operation will be `Err(ArrowError::DivideByZero)`. pub fn modulus_dyn(left: &dyn Array, right: &dyn Array) -> Result { - match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!( - left, - right, - |a, b| { - if b.is_zero() { - Err(ArrowError::DivideByZero) - } else { - Ok(a.mod_wrapping(b)) - } - }, - math_divide_checked_op_dict - ) + downcast_primitive_array!( + (left, right) => { + math_checked_divide_op(left, right, |a, b| { + if b.is_zero() { + Err(ArrowError::DivideByZero) + } else { + Ok(a.mod_wrapping(b)) + } + }).map(|a| Arc::new(a) as ArrayRef) } - _ => { - downcast_primitive_array!( - (left, right) => { - math_checked_divide_op(left, right, |a, b| { - if b.is_zero() { - Err(ArrowError::DivideByZero) - } else { - Ok(a.mod_wrapping(b)) - } - }).map(|a| Arc::new(a) as ArrayRef) - } - _ => Err(ArrowError::CastError(format!( - "Unsupported data type {}, {}", - left.data_type(), right.data_type() - ))) - ) - } - } + _ => Err(ArrowError::CastError(format!( + "Unsupported data type {}, {}", + left.data_type(), right.data_type() + ))) + ) } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -1869,39 +1438,21 @@ pub fn divide_opt( /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `divide_dyn_checked` instead. pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result { - match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!( - left, - right, - |a, b| { - if b.is_zero() { - Err(ArrowError::DivideByZero) - } else { - Ok(a.div_wrapping(b)) - } - }, - math_divide_checked_op_dict - ) - } - _ => { - downcast_primitive_array!( - (left, right) => { - math_checked_divide_op(left, right, |a, b| { - if b.is_zero() { - Err(ArrowError::DivideByZero) - } else { - Ok(a.div_wrapping(b)) - } - }).map(|a| Arc::new(a) as ArrayRef) - } - _ => Err(ArrowError::CastError(format!( - "Unsupported data type {}, {}", - left.data_type(), right.data_type() - ))) - ) + downcast_primitive_array!( + (left, right) => { + math_checked_divide_op(left, right, |a, b| { + if b.is_zero() { + Err(ArrowError::DivideByZero) + } else { + Ok(a.div_wrapping(b)) + } + }).map(|a| Arc::new(a) as ArrayRef) } - } + _ => Err(ArrowError::CastError(format!( + "Unsupported data type {}, {}", + left.data_type(), right.data_type() + ))) + ) } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -1914,27 +1465,15 @@ pub fn divide_dyn_checked( left: &dyn Array, right: &dyn Array, ) -> Result { - match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!( - left, - right, - |a, b| a.div_checked(b), - math_divide_checked_op_dict - ) + downcast_primitive_array!( + (left, right) => { + math_checked_divide_op(left, right, |a, b| a.div_checked(b)).map(|a| Arc::new(a) as ArrayRef) } - _ => { - downcast_primitive_array!( - (left, right) => { - math_checked_divide_op(left, right, |a, b| a.div_checked(b)).map(|a| Arc::new(a) as ArrayRef) - } - _ => Err(ArrowError::CastError(format!( - "Unsupported data type {}, {}", - left.data_type(), right.data_type() - ))) - ) - } - } + _ => Err(ArrowError::CastError(format!( + "Unsupported data type {}, {}", + left.data_type(), right.data_type() + ))) + ) } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -1950,39 +1489,21 @@ pub fn divide_dyn_opt( left: &dyn Array, right: &dyn Array, ) -> Result { - match left.data_type() { - DataType::Dictionary(_, _) => { - typed_dict_math_op!( - left, - right, - |a, b| { - if b.is_zero() { - None - } else { - Some(a.div_wrapping(b)) - } - }, - math_divide_safe_op_dict - ) - } - _ => { - downcast_primitive_array!( - (left, right) => { - math_safe_divide_op(left, right, |a, b| { - if b.is_zero() { - None - } else { - Some(a.div_wrapping(b)) - } - }) - } - _ => Err(ArrowError::CastError(format!( - "Unsupported data type {}, {}", - left.data_type(), right.data_type() - ))) - ) + downcast_primitive_array!( + (left, right) => { + math_safe_divide_op(left, right, |a, b| { + if b.is_zero() { + None + } else { + Some(a.div_wrapping(b)) + } + }) } - } + _ => Err(ArrowError::CastError(format!( + "Unsupported data type {}, {}", + left.data_type(), right.data_type() + ))) + ) } /// Perform `left / right` operation on two arrays without checking for @@ -2279,34 +1800,6 @@ mod tests { assert_eq!(17, c.value(4)); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_primitive_array_add_dyn_dict() { - let mut builder = PrimitiveDictionaryBuilder::::new(); - builder.append(5).unwrap(); - builder.append(6).unwrap(); - builder.append(7).unwrap(); - builder.append(8).unwrap(); - builder.append(9).unwrap(); - let a = builder.finish(); - - let mut builder = PrimitiveDictionaryBuilder::::new(); - builder.append(6).unwrap(); - builder.append(7).unwrap(); - builder.append(8).unwrap(); - builder.append_null(); - builder.append(10).unwrap(); - let b = builder.finish(); - - let c = add_dyn(&a, &b).unwrap(); - let c = c.as_any().downcast_ref::().unwrap(); - assert_eq!(11, c.value(0)); - assert_eq!(13, c.value(1)); - assert_eq!(15, c.value(2)); - assert!(c.is_null(3)); - assert_eq!(19, c.value(4)); - } - #[test] fn test_primitive_array_add_scalar_dyn() { let a = Int32Array::from(vec![Some(5), Some(6), Some(7), None, Some(9)]); @@ -2452,34 +1945,6 @@ mod tests { ); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_primitive_array_subtract_dyn_dict() { - let mut builder = PrimitiveDictionaryBuilder::::new(); - builder.append(15).unwrap(); - builder.append(8).unwrap(); - builder.append(7).unwrap(); - builder.append(8).unwrap(); - builder.append(20).unwrap(); - let a = builder.finish(); - - let mut builder = PrimitiveDictionaryBuilder::::new(); - builder.append(6).unwrap(); - builder.append(7).unwrap(); - builder.append(8).unwrap(); - builder.append_null(); - builder.append(10).unwrap(); - let b = builder.finish(); - - let c = subtract_dyn(&a, &b).unwrap(); - let c = c.as_any().downcast_ref::().unwrap(); - assert_eq!(9, c.value(0)); - assert_eq!(1, c.value(1)); - assert_eq!(-1, c.value(2)); - assert!(c.is_null(3)); - assert_eq!(10, c.value(4)); - } - #[test] fn test_primitive_array_subtract_scalar_dyn() { let a = Int32Array::from(vec![Some(5), Some(6), Some(7), None, Some(9)]); @@ -2531,34 +1996,6 @@ mod tests { assert_eq!(72, c.value(4)); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_primitive_array_multiply_dyn_dict() { - let mut builder = PrimitiveDictionaryBuilder::::new(); - builder.append(5).unwrap(); - builder.append(6).unwrap(); - builder.append(7).unwrap(); - builder.append(8).unwrap(); - builder.append(9).unwrap(); - let a = builder.finish(); - - let mut builder = PrimitiveDictionaryBuilder::::new(); - builder.append(6).unwrap(); - builder.append(7).unwrap(); - builder.append(8).unwrap(); - builder.append_null(); - builder.append(10).unwrap(); - let b = builder.finish(); - - let c = multiply_dyn(&a, &b).unwrap(); - let c = c.as_any().downcast_ref::().unwrap(); - assert_eq!(30, c.value(0)); - assert_eq!(42, c.value(1)); - assert_eq!(56, c.value(2)); - assert!(c.is_null(3)); - assert_eq!(90, c.value(4)); - } - #[test] fn test_primitive_array_divide_dyn() { let a = Int32Array::from(vec![Some(15), Some(6), Some(1), Some(8), Some(9)]); @@ -2572,34 +2009,6 @@ mod tests { assert_eq!(3, c.value(4)); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_primitive_array_divide_dyn_dict() { - let mut builder = PrimitiveDictionaryBuilder::::new(); - builder.append(15).unwrap(); - builder.append(6).unwrap(); - builder.append(1).unwrap(); - builder.append(8).unwrap(); - builder.append(9).unwrap(); - let a = builder.finish(); - - let mut builder = PrimitiveDictionaryBuilder::::new(); - builder.append(5).unwrap(); - builder.append(3).unwrap(); - builder.append(1).unwrap(); - builder.append_null(); - builder.append(3).unwrap(); - let b = builder.finish(); - - let c = divide_dyn(&a, &b).unwrap(); - let c = c.as_any().downcast_ref::().unwrap(); - assert_eq!(3, c.value(0)); - assert_eq!(2, c.value(1)); - assert_eq!(1, c.value(2)); - assert!(c.is_null(3)); - assert_eq!(3, c.value(4)); - } - #[test] fn test_primitive_array_multiply_scalar_dyn() { let a = Int32Array::from(vec![Some(5), Some(6), Some(7), None, Some(9)]); @@ -3154,40 +2563,6 @@ mod tests { divide_dyn(&a, &b).unwrap(); } - #[test] - #[should_panic(expected = "DivideByZero")] - #[cfg(feature = "dyn_arith_dict")] - fn test_int_array_divide_dyn_by_zero_dict() { - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(15).unwrap(); - let a = builder.finish(); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(0).unwrap(); - let b = builder.finish(); - - divide_dyn(&a, &b).unwrap(); - } - - #[test] - #[should_panic(expected = "DivideByZero")] - #[cfg(feature = "dyn_arith_dict")] - fn test_f32_dict_array_divide_dyn_by_zero() { - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(1.5).unwrap(); - let a = builder.finish(); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(0.0).unwrap(); - let b = builder.finish(); - - divide_dyn(&a, &b).unwrap(); - } - #[test] #[should_panic(expected = "DivideByZero")] fn test_i32_array_modulus_by_zero() { @@ -3449,30 +2824,6 @@ mod tests { overflow.expect_err("overflow should be detected"); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_dictionary_add_dyn_wrapping_overflow() { - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(2, 2); - builder.append(i32::MAX).unwrap(); - builder.append(i32::MIN).unwrap(); - let a = builder.finish(); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(2, 2); - builder.append(1).unwrap(); - builder.append(1).unwrap(); - let b = builder.finish(); - - let wrapped = add_dyn(&a, &b).unwrap(); - let expected = - Arc::new(Int32Array::from(vec![-2147483648, -2147483647])) as ArrayRef; - assert_eq!(&expected, &wrapped); - - let overflow = add_dyn_checked(&a, &b); - overflow.expect_err("overflow should be detected"); - } - #[test] fn test_primitive_subtract_dyn_wrapping_overflow() { let a = Int32Array::from(vec![-2]); @@ -3486,27 +2837,6 @@ mod tests { overflow.expect_err("overflow should be detected"); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_dictionary_subtract_dyn_wrapping_overflow() { - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(-2).unwrap(); - let a = builder.finish(); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(i32::MAX).unwrap(); - let b = builder.finish(); - - let wrapped = subtract_dyn(&a, &b).unwrap(); - let expected = Arc::new(Int32Array::from(vec![i32::MAX])) as ArrayRef; - assert_eq!(&expected, &wrapped); - - let overflow = subtract_dyn_checked(&a, &b); - overflow.expect_err("overflow should be detected"); - } - #[test] fn test_primitive_mul_dyn_wrapping_overflow() { let a = Int32Array::from(vec![10]); @@ -3520,27 +2850,6 @@ mod tests { overflow.expect_err("overflow should be detected"); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_dictionary_mul_dyn_wrapping_overflow() { - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(10).unwrap(); - let a = builder.finish(); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(i32::MAX).unwrap(); - let b = builder.finish(); - - let wrapped = multiply_dyn(&a, &b).unwrap(); - let expected = Arc::new(Int32Array::from(vec![-10])) as ArrayRef; - assert_eq!(&expected, &wrapped); - - let overflow = multiply_dyn_checked(&a, &b); - overflow.expect_err("overflow should be detected"); - } - #[test] fn test_primitive_div_dyn_wrapping_overflow() { let a = Int32Array::from(vec![i32::MIN]); @@ -3616,51 +2925,6 @@ mod tests { assert_eq!(e, r); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_dictionary_div_dyn_wrapping_overflow() { - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(i32::MIN).unwrap(); - let a = builder.finish(); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(-1).unwrap(); - let b = builder.finish(); - - let wrapped = divide_dyn(&a, &b).unwrap(); - let expected = Arc::new(Int32Array::from(vec![-2147483648])) as ArrayRef; - assert_eq!(&expected, &wrapped); - - let overflow = divide_dyn_checked(&a, &b); - overflow.expect_err("overflow should be detected"); - } - - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_div_dyn_opt_overflow_division_by_zero() { - let a = Int32Array::from(vec![i32::MIN]); - let b = Int32Array::from(vec![0]); - - let division_by_zero = divide_dyn_opt(&a, &b); - let expected = Arc::new(Int32Array::from(vec![None])) as ArrayRef; - assert_eq!(&expected, &division_by_zero.unwrap()); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(i32::MIN).unwrap(); - let a = builder.finish(); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(0).unwrap(); - let b = builder.finish(); - - let division_by_zero = divide_dyn_opt(&a, &b); - assert_eq!(&expected, &division_by_zero.unwrap()); - } - #[test] fn test_div_scalar_dyn_opt_overflow_division_by_zero() { let a = Int32Array::from(vec![i32::MIN]); @@ -3802,74 +3066,6 @@ mod tests { let _ = overflow.unwrap().expect_err("overflow should be detected"); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_dict_decimal() { - let values = Decimal128Array::from_iter_values([0, 1, 2, 3, 4, 5]); - let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]); - let array1 = DictionaryArray::new(keys, Arc::new(values)); - - let values = Decimal128Array::from_iter_values([7, -3, 4, 3, 5]); - let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]); - let array2 = DictionaryArray::new(keys, Arc::new(values)); - - let result = add_dyn(&array1, &array2).unwrap(); - let expected = - Arc::new(Decimal128Array::from(vec![8, 9, 2, 8, 6, 5])) as ArrayRef; - assert_eq!(&result, &expected); - - let result = subtract_dyn(&array1, &array2).unwrap(); - let expected = - Arc::new(Decimal128Array::from(vec![-6, -5, 8, 0, 0, -5])) as ArrayRef; - assert_eq!(&result, &expected); - - let values = Decimal256Array::from_iter_values([ - i256::from_i128(0), - i256::from_i128(1), - i256::from_i128(2), - i256::from_i128(3), - i256::from_i128(4), - i256::from_i128(5), - ]); - let keys = - Int8Array::from(vec![Some(1_i8), None, Some(5), Some(4), Some(3), None]); - let array1 = DictionaryArray::new(keys, Arc::new(values)); - - let values = Decimal256Array::from_iter_values([ - i256::from_i128(7), - i256::from_i128(-3), - i256::from_i128(4), - i256::from_i128(3), - i256::from_i128(5), - ]); - let keys = - Int8Array::from(vec![Some(0_i8), Some(0), None, Some(2), Some(3), Some(4)]); - let array2 = DictionaryArray::new(keys, Arc::new(values)); - - let result = add_dyn(&array1, &array2).unwrap(); - let expected = Arc::new(Decimal256Array::from(vec![ - Some(i256::from_i128(8)), - None, - None, - Some(i256::from_i128(8)), - Some(i256::from_i128(6)), - None, - ])) as ArrayRef; - - assert_eq!(&result, &expected); - - let result = subtract_dyn(&array1, &array2).unwrap(); - let expected = Arc::new(Decimal256Array::from(vec![ - Some(i256::from_i128(-6)), - None, - None, - Some(i256::from_i128(0)), - Some(i256::from_i128(0)), - None, - ])) as ArrayRef; - assert_eq!(&result, &expected); - } - #[test] fn test_decimal_add_scalar_dyn() { let a = Decimal128Array::from(vec![100, 210, 320]) @@ -4047,110 +3243,6 @@ mod tests { ); } - #[test] - #[cfg(feature = "dyn_arith_dict")] - fn test_decimal_multiply_fixed_point_dyn() { - // [123456789] - let a = Decimal128Array::from(vec![123456789000000000000000000]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // [10] - let b = Decimal128Array::from(vec![10000000000000000000]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // Avoid overflow by reducing the scale. - let result = multiply_fixed_point_dyn(&a, &b, 28).unwrap(); - // [1234567890] - let expected = Arc::new( - Decimal128Array::from(vec![12345678900000000000000000000000000000]) - .with_precision_and_scale(38, 28) - .unwrap(), - ) as ArrayRef; - - assert_eq!(&expected, &result); - assert_eq!( - result.as_primitive::().value_as_string(0), - "1234567890.0000000000000000000000000000" - ); - - // [123456789, 10] - let a = Decimal128Array::from(vec![ - 123456789000000000000000000, - 10000000000000000000, - ]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // [10, 123456789, 12] - let b = Decimal128Array::from(vec![ - 10000000000000000000, - 123456789000000000000000000, - 12000000000000000000, - ]) - .with_precision_and_scale(38, 18) - .unwrap(); - - let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1), None]); - let array1 = DictionaryArray::new(keys, Arc::new(a)); - let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(2), None]); - let array2 = DictionaryArray::new(keys, Arc::new(b)); - - let result = multiply_fixed_point_dyn(&array1, &array2, 28).unwrap(); - let expected = Arc::new( - Decimal128Array::from(vec![ - Some(12345678900000000000000000000000000000), - Some(12345678900000000000000000000000000000), - Some(1200000000000000000000000000000), - None, - ]) - .with_precision_and_scale(38, 28) - .unwrap(), - ) as ArrayRef; - - assert_eq!(&expected, &result); - assert_eq!( - result.as_primitive::().value_as_string(0), - "1234567890.0000000000000000000000000000" - ); - assert_eq!( - result.as_primitive::().value_as_string(1), - "1234567890.0000000000000000000000000000" - ); - assert_eq!( - result.as_primitive::().value_as_string(2), - "120.0000000000000000000000000000" - ); - - // Required scale is same as the product of the input scales. Behavior is same as multiply_dyn. - let a = Decimal128Array::from(vec![123, 100]) - .with_precision_and_scale(3, 2) - .unwrap(); - - let b = Decimal128Array::from(vec![100, 123, 120]) - .with_precision_and_scale(3, 2) - .unwrap(); - - let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1), None]); - let array1 = DictionaryArray::new(keys, Arc::new(a)); - let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(2), None]); - let array2 = DictionaryArray::new(keys, Arc::new(b)); - - let result = multiply_fixed_point_dyn(&array1, &array2, 4).unwrap(); - let expected = multiply_dyn(&array1, &array2).unwrap(); - let expected = Arc::new( - expected - .as_any() - .downcast_ref::() - .unwrap() - .clone() - .with_precision_and_scale(7, 4) - .unwrap(), - ) as ArrayRef; - assert_eq!(&expected, &result); - } - #[test] fn test_timestamp_second_add_interval() { // timestamp second + interval year month diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 998d077fa105..bc126a2f4c2d 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -63,7 +63,7 @@ rand = { version = "0.8", default-features = false, features = ["std", "std_rng" pyo3 = { version = "0.19", default-features = false, optional = true } [package.metadata.docs.rs] -features = ["prettyprint", "ipc_compression", "dyn_cmp_dict", "dyn_arith_dict", "ffi", "pyarrow"] +features = ["prettyprint", "ipc_compression", "dyn_cmp_dict", "ffi", "pyarrow"] [features] default = ["csv", "ipc", "json"] @@ -88,9 +88,6 @@ ffi = ["arrow-schema/ffi", "arrow-data/ffi"] # Enable dyn-comparison of dictionary arrays with other arrays # Note: this does not impact comparison against scalars dyn_cmp_dict = ["arrow-string/dyn_cmp_dict", "arrow-ord/dyn_cmp_dict"] -# Enable dyn-arithmetic kernels for dictionary arrays -# Note: this does not impact arithmetic with scalars -dyn_arith_dict = ["arrow-arith/dyn_arith_dict"] chrono-tz = ["arrow-array/chrono-tz"] [dev-dependencies] diff --git a/arrow/README.md b/arrow/README.md index fde71607246e..5f611bfec1aa 100644 --- a/arrow/README.md +++ b/arrow/README.md @@ -53,7 +53,6 @@ The `arrow` crate provides the following features which may be enabled in your ` - `ffi` - bindings for the Arrow C [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html) - `pyarrow` - bindings for pyo3 to call arrow-rs from python - `dyn_cmp_dict` - enables comparison of dictionary arrays within dyn comparison kernels -- `dyn_arith_dict` - enables arithmetic on dictionary arrays within dyn arithmetic kernels ## Arrow Feature Status From 30e3a8fbefb6dea20740eb3106814f98990a6b97 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 26 Jun 2023 16:21:50 +0100 Subject: [PATCH 2/2] Clippy --- arrow-arith/src/arithmetic.rs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs index 39d8adc0f308..8e7ab44042cf 100644 --- a/arrow-arith/src/arithmetic.rs +++ b/arrow-arith/src/arithmetic.rs @@ -1117,19 +1117,15 @@ pub fn multiply_dyn_checked( left: &dyn Array, right: &dyn Array, ) -> Result { - match left.data_type() { - _ => { - downcast_primitive_array!( - (left, right) => { - math_checked_op(left, right, |a, b| a.mul_checked(b)).map(|a| Arc::new(a) as ArrayRef) - } - _ => Err(ArrowError::CastError(format!( - "Unsupported data type {}, {}", - left.data_type(), right.data_type() - ))) - ) + downcast_primitive_array!( + (left, right) => { + math_checked_op(left, right, |a, b| a.mul_checked(b)).map(|a| Arc::new(a) as ArrayRef) } - } + _ => Err(ArrowError::CastError(format!( + "Unsupported data type {}, {}", + left.data_type(), right.data_type() + ))) + ) } /// Returns the precision and scale of the result of a multiplication of two decimal types,