From 154f8a5a12d169f35a1a411f064ec539410b679d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 2 Sep 2022 22:23:11 -0700 Subject: [PATCH 01/11] Add overflow-checking variant for add kernel and explicitly define overflow behavior for add --- arrow/src/compute/kernels/arithmetic.rs | 86 +++++++++++++++++++++++-- arrow/src/datatypes/native.rs | 39 +++++++++++ 2 files changed, 121 insertions(+), 4 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index fff687e18b3c..775fc53611e6 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -35,8 +35,9 @@ use crate::compute::unary_dyn; use crate::compute::util::combine_option_bitmap; use crate::datatypes; use crate::datatypes::{ - ArrowNumericType, DataType, Date32Type, Date64Type, IntervalDayTimeType, - IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, + ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType, DataType, Date32Type, + Date64Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, + IntervalYearMonthType, }; use crate::datatypes::{ Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, @@ -103,6 +104,51 @@ where Ok(PrimitiveArray::::from(data)) } +/// This is similar to `math_op` as it performs given operation between two input primitive arrays. +/// But the given can return `None` if overflow is detected. For the case, this function returns an +/// `Err`. +fn math_checked_op( + left: &PrimitiveArray, + right: &PrimitiveArray, + op: F, +) -> Result> +where + LT: ArrowNumericType, + RT: ArrowNumericType, + F: Fn(LT::Native, RT::Native) -> Option, +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + + let left_iter = ArrayIter::new(left); + let right_iter = ArrayIter::new(right); + + let values: Result::Native>>> = left_iter + .into_iter() + .zip(right_iter.into_iter()) + .map(|(l, r)| { + if let (Some(l), Some(r)) = (l, r) { + let result = op(l, r); + if let Some(r) = result { + Ok(Some(r)) + } else { + // Overflow + Err(ArrowError::ComputeError("Overflow happened".to_string())) + } + } else { + Ok(None) + } + }) + .collect(); + + let values = values?; + + Ok(PrimitiveArray::::from_iter(values)) +} + /// Helper function for operations where a valid `0` on the right array should /// result in an [ArrowError::DivideByZero], namely the division and modulo operations /// @@ -760,15 +806,34 @@ where /// Perform `left + right` operation on two arrays. If either left or right value is null /// then the result is also null. +/// +/// This doesn't detect overflow. Once overflowing, the result will wrap around. +/// For an overflow-checking variant, use `add_checked` instead. pub fn add( left: &PrimitiveArray, right: &PrimitiveArray, ) -> Result> where T: ArrowNumericType, - T::Native: Add, + T::Native: ArrowNativeTypeOp, +{ + math_op(left, right, |a, b| a.wrapping_add_if_applied(b)) +} + +/// Perform `left + right` operation on two arrays. If either left or right value is null +/// then the result is also null. Once +/// +/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, +/// use `add` instead. +pub fn add_checked( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: ArrowNumericType, + T::Native: ArrowNativeTypeOp, { - math_op(left, right, |a, b| a + b) + math_checked_op(left, right, |a, b| a.checked_add_if_applied(b)) } /// Perform `left + right` operation on two arrays. If either left or right value is null @@ -2019,4 +2084,17 @@ mod tests { let expected = Float64Array::from(vec![Some(1.0), None, Some(9.0)]); assert_eq!(expected, actual); } + + #[test] + fn test_primitive_add_wrapping_overflow() { + let a = Int32Array::from(vec![i32::MAX, i32::MIN]); + let b = Int32Array::from(vec![1, 1]); + + let wrapped = add(&a, &b); + let expected = Int32Array::from(vec![-2147483648, -2147483647]); + assert_eq!(expected, wrapped.unwrap()); + + let overflow = add_checked(&a, &b); + overflow.expect_err("overflow should be detected"); + } } diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index 207e8cb40330..2163184137a4 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -17,6 +17,7 @@ use super::DataType; use half::f16; +use std::ops::Add; mod private { pub trait Sealed {} @@ -114,6 +115,44 @@ pub trait ArrowPrimitiveType: 'static { } } +/// Trait for ArrowNativeType to provide overflow-aware operations. +pub trait ArrowNativeTypeOp: ArrowNativeType + Add { + fn checked_add_if_applied(self, rhs: Self) -> Option { + Some(self + rhs) + } + + fn wrapping_add_if_applied(self, rhs: Self) -> Self { + self + rhs + } +} + +macro_rules! native_type_op { + ($t:tt) => { + impl ArrowNativeTypeOp for $t { + fn checked_add_if_applied(self, rhs: Self) -> Option { + self.checked_add(rhs) + } + + fn wrapping_add_if_applied(self, rhs: Self) -> Self { + self.wrapping_add(rhs) + } + } + }; +} + +native_type_op!(i8); +native_type_op!(i16); +native_type_op!(i32); +native_type_op!(i64); +native_type_op!(u8); +native_type_op!(u16); +native_type_op!(u32); +native_type_op!(u64); + +impl ArrowNativeTypeOp for f16 {} +impl ArrowNativeTypeOp for f32 {} +impl ArrowNativeTypeOp for f64 {} + impl private::Sealed for i8 {} impl ArrowNativeType for i8 { #[inline] From 02a2a807852bd8f66411d40c1ea31c004d3a9a0a Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 2 Sep 2022 23:13:41 -0700 Subject: [PATCH 02/11] For subtract, multiply, divide --- arrow/benches/arithmetic_kernels.rs | 4 +- arrow/src/compute/kernels/arithmetic.rs | 105 +++++++++++++++++++++--- arrow/src/datatypes/native.rs | 58 ++++++++++++- 3 files changed, 152 insertions(+), 15 deletions(-) diff --git a/arrow/benches/arithmetic_kernels.rs b/arrow/benches/arithmetic_kernels.rs index 4be4a26933aa..10af0b5432ef 100644 --- a/arrow/benches/arithmetic_kernels.rs +++ b/arrow/benches/arithmetic_kernels.rs @@ -55,13 +55,13 @@ fn bench_multiply(arr_a: &ArrayRef, arr_b: &ArrayRef) { fn bench_divide(arr_a: &ArrayRef, arr_b: &ArrayRef) { let arr_a = arr_a.as_any().downcast_ref::().unwrap(); let arr_b = arr_b.as_any().downcast_ref::().unwrap(); - criterion::black_box(divide(arr_a, arr_b).unwrap()); + criterion::black_box(divide_checked(arr_a, arr_b).unwrap()); } fn bench_divide_unchecked(arr_a: &ArrayRef, arr_b: &ArrayRef) { let arr_a = arr_a.as_any().downcast_ref::().unwrap(); let arr_b = arr_b.as_any().downcast_ref::().unwrap(); - criterion::black_box(divide_unchecked(arr_a, arr_b).unwrap()); + criterion::black_box(divide(arr_a, arr_b).unwrap()); } fn bench_divide_scalar(array: &ArrayRef, divisor: f32) { diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 775fc53611e6..3660fbf52e77 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -921,15 +921,34 @@ where /// Perform `left - right` operation on two arrays. If either left or right value is null /// then the result is also null. +/// +/// This doesn't detect overflow. Once overflowing, the result will wrap around. +/// For an overflow-checking variant, use `subtract_checked` instead. pub fn subtract( left: &PrimitiveArray, right: &PrimitiveArray, ) -> Result> where T: datatypes::ArrowNumericType, - T::Native: Sub, + T::Native: ArrowNativeTypeOp, +{ + math_op(left, right, |a, b| a.wrapping_sub_if_applied(b)) +} + +/// Perform `left - right` operation on two arrays. If either left or right value is null +/// then the result is also null. +/// +/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, +/// use `subtract` instead. +pub fn subtract_checked( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: datatypes::ArrowNumericType, + T::Native: ArrowNativeTypeOp, { - math_op(left, right, |a, b| a - b) + math_checked_op(left, right, |a, b| a.checked_sub_if_applied(b)) } /// Perform `left - right` operation on two arrays. If either left or right value is null @@ -998,15 +1017,34 @@ where /// Perform `left * right` operation on two arrays. If either left or right value is null /// then the result is also null. +/// +/// This doesn't detect overflow. Once overflowing, the result will wrap around. +/// For an overflow-checking variant, use `multiply_check` instead. pub fn multiply( left: &PrimitiveArray, right: &PrimitiveArray, ) -> Result> where T: datatypes::ArrowNumericType, - T::Native: Mul, + T::Native: ArrowNativeTypeOp, { - math_op(left, right, |a, b| a * b) + math_op(left, right, |a, b| a.wrapping_mul_if_applied(b)) +} + +/// Perform `left * right` operation on two arrays. If either left or right value is null +/// then the result is also null. +/// +/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, +/// use `multiply` instead. +pub fn multiply_check( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: datatypes::ArrowNumericType, + T::Native: ArrowNativeTypeOp, +{ + math_checked_op(left, right, |a, b| a.checked_mul_if_applied(b)) } /// Perform `left * right` operation on two arrays. If either left or right value is null @@ -1078,18 +1116,21 @@ where /// Perform `left / right` operation on two arrays. If either left or right value is null /// then the result is also null. If any right hand value is zero then the result of this /// operation will be `Err(ArrowError::DivideByZero)`. -pub fn divide( +/// +/// When `simd` feature is not enabled. This detects overflow and returns an `Err` for that. +/// For an non-overflow-checking variant, use `divide` instead. +pub fn divide_checked( left: &PrimitiveArray, right: &PrimitiveArray, ) -> Result> where T: datatypes::ArrowNumericType, - T::Native: Div + Zero + One, + T::Native: ArrowNativeTypeOp + Zero + One, { #[cfg(feature = "simd")] return simd_checked_divide_op(&left, &right, simd_checked_divide::, |a, b| a / b); #[cfg(not(feature = "simd"))] - return math_checked_divide_op(left, right, |a, b| a / b); + return math_checked_op(left, right, |a, b| a.checked_div_if_applied(b)); } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -1107,15 +1148,18 @@ pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result { /// Perform `left / right` operation on two arrays without checking for division by zero. /// The result of dividing by zero follows normal floating point rules. /// If either left or right value is null then the result is also null. If any right hand value is zero then the result of this -pub fn divide_unchecked( +/// +/// This doesn't detect overflow. Once overflowing, the result will wrap around. +/// For an overflow-checking variant, use `divide_checked` instead. +pub fn divide( left: &PrimitiveArray, right: &PrimitiveArray, ) -> Result> where - T: datatypes::ArrowFloatNumericType, - T::Native: Div, + T: datatypes::ArrowNumericType, + T::Native: ArrowNativeTypeOp, { - math_op(left, right, |a, b| a / b) + math_op(left, right, |a, b| a.wrapping_div_if_applied(b)) } /// Modulus every value in an array by a scalar. If any value in the array is null then the @@ -2097,4 +2141,43 @@ mod tests { let overflow = add_checked(&a, &b); overflow.expect_err("overflow should be detected"); } + + #[test] + fn test_primitive_subtract_wrapping_overflow() { + let a = Int32Array::from(vec![-2]); + let b = Int32Array::from(vec![i32::MAX]); + + let wrapped = subtract(&a, &b); + let expected = Int32Array::from(vec![i32::MAX]); + assert_eq!(expected, wrapped.unwrap()); + + let overflow = subtract_checked(&a, &b); + overflow.expect_err("overflow should be detected"); + } + + #[test] + fn test_primitive_mul_wrapping_overflow() { + let a = Int32Array::from(vec![10]); + let b = Int32Array::from(vec![i32::MAX]); + + let wrapped = multiply(&a, &b); + let expected = Int32Array::from(vec![-10]); + assert_eq!(expected, wrapped.unwrap()); + + let overflow = multiply_check(&a, &b); + overflow.expect_err("overflow should be detected"); + } + + #[test] + fn test_primitive_div_wrapping_overflow() { + let a = Int32Array::from(vec![i32::MIN]); + let b = Int32Array::from(vec![-1]); + + let wrapped = divide(&a, &b); + let expected = Int32Array::from(vec![-2147483648]); + assert_eq!(expected, wrapped.unwrap()); + + let overflow = divide_checked(&a, &b); + overflow.expect_err("overflow should be detected"); + } } diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index 2163184137a4..e31998e5ebbf 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -17,7 +17,7 @@ use super::DataType; use half::f16; -use std::ops::Add; +use std::ops::{Add, Div, Mul, Sub}; mod private { pub trait Sealed {} @@ -116,7 +116,13 @@ pub trait ArrowPrimitiveType: 'static { } /// Trait for ArrowNativeType to provide overflow-aware operations. -pub trait ArrowNativeTypeOp: ArrowNativeType + Add { +pub trait ArrowNativeTypeOp: + ArrowNativeType + + Add + + Sub + + Mul + + Div +{ fn checked_add_if_applied(self, rhs: Self) -> Option { Some(self + rhs) } @@ -124,6 +130,30 @@ pub trait ArrowNativeTypeOp: ArrowNativeType + Add { fn wrapping_add_if_applied(self, rhs: Self) -> Self { self + rhs } + + fn checked_sub_if_applied(self, rhs: Self) -> Option { + Some(self + rhs) + } + + fn wrapping_sub_if_applied(self, rhs: Self) -> Self { + self + rhs + } + + fn checked_mul_if_applied(self, rhs: Self) -> Option { + Some(self * rhs) + } + + fn wrapping_mul_if_applied(self, rhs: Self) -> Self { + self * rhs + } + + fn checked_div_if_applied(self, rhs: Self) -> Option { + Some(self / rhs) + } + + fn wrapping_div_if_applied(self, rhs: Self) -> Self { + self / rhs + } } macro_rules! native_type_op { @@ -136,6 +166,30 @@ macro_rules! native_type_op { fn wrapping_add_if_applied(self, rhs: Self) -> Self { self.wrapping_add(rhs) } + + fn checked_sub_if_applied(self, rhs: Self) -> Option { + self.checked_sub(rhs) + } + + fn wrapping_sub_if_applied(self, rhs: Self) -> Self { + self.wrapping_sub(rhs) + } + + fn checked_mul_if_applied(self, rhs: Self) -> Option { + self.checked_mul(rhs) + } + + fn wrapping_mul_if_applied(self, rhs: Self) -> Self { + self.wrapping_mul(rhs) + } + + fn checked_div_if_applied(self, rhs: Self) -> Option { + self.checked_div(rhs) + } + + fn wrapping_div_if_applied(self, rhs: Self) -> Self { + self.wrapping_div(rhs) + } } }; } From 74d8cc975387dc4e594a4762e39024eaadd090ee Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Sep 2022 00:18:49 -0700 Subject: [PATCH 03/11] Fix tests --- arrow/src/compute/kernels/arithmetic.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 3660fbf52e77..9e731f3f653e 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -1878,7 +1878,7 @@ mod tests { fn test_primitive_array_divide_with_nulls() { let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9), None]); let b = Int32Array::from(vec![Some(5), Some(6), Some(8), Some(9), None, None]); - let c = divide(&a, &b).unwrap(); + let c = divide_checked(&a, &b).unwrap(); assert_eq!(3, c.value(0)); assert!(c.is_null(1)); assert_eq!(1, c.value(2)); @@ -1963,7 +1963,7 @@ mod tests { let b = b.slice(8, 6); let b = b.as_any().downcast_ref::().unwrap(); - let c = divide(a, b).unwrap(); + let c = divide_checked(a, b).unwrap(); assert_eq!(6, c.len()); assert_eq!(3, c.value(0)); assert!(c.is_null(1)); @@ -2027,11 +2027,11 @@ mod tests { } #[test] - #[should_panic(expected = "DivideByZero")] + #[should_panic(expected = "Overflow happened")] fn test_primitive_array_divide_by_zero() { let a = Int32Array::from(vec![15]); let b = Int32Array::from(vec![0]); - divide(&a, &b).unwrap(); + divide_checked(&a, &b).unwrap(); } #[test] @@ -2169,6 +2169,7 @@ mod tests { } #[test] + #[cfg(not(feature = "simd"))] fn test_primitive_div_wrapping_overflow() { let a = Int32Array::from(vec![i32::MIN]); let b = Int32Array::from(vec![-1]); From 314886f3b02eb36388799b2d8ed4fe7d0d7e1cce Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Sep 2022 00:44:53 -0700 Subject: [PATCH 04/11] Fix different error message --- arrow/src/compute/kernels/arithmetic.rs | 56 +++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 9e731f3f653e..b1e07e7dbb7d 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -105,8 +105,8 @@ where } /// This is similar to `math_op` as it performs given operation between two input primitive arrays. -/// But the given can return `None` if overflow is detected. For the case, this function returns an -/// `Err`. +/// But the given operation can return `None` if overflow is detected. For the case, this function +/// returns an `Err`. fn math_checked_op( left: &PrimitiveArray, right: &PrimitiveArray, @@ -149,6 +149,54 @@ where Ok(PrimitiveArray::::from_iter(values)) } +/// This is similar to `math_checked_op` but just for divide op. +fn math_checked_divide( + left: &PrimitiveArray, + right: &PrimitiveArray, + op: F, +) -> Result> +where + LT: ArrowNumericType, + RT: ArrowNumericType, + RT::Native: One + Zero, + F: Fn(LT::Native, RT::Native) -> Option, +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform math operation on arrays of different length".to_string(), + )); + } + + let left_iter = ArrayIter::new(left); + let right_iter = ArrayIter::new(right); + + let values: Result::Native>>> = left_iter + .into_iter() + .zip(right_iter.into_iter()) + .map(|(l, r)| { + if let (Some(l), Some(r)) = (l, r) { + let result = op(l, r); + if let Some(r) = result { + Ok(Some(r)) + } else { + if r.is_zero() { + Err(ArrowError::ComputeError("DivideByZero".to_string())) + } else { + // Overflow + Err(ArrowError::ComputeError("Overflow happened".to_string())) + } + } + } else { + Ok(None) + } + }) + .collect(); + + let values = values?; + + Ok(PrimitiveArray::::from_iter(values)) +} + /// Helper function for operations where a valid `0` on the right array should /// result in an [ArrowError::DivideByZero], namely the division and modulo operations /// @@ -1130,7 +1178,7 @@ where #[cfg(feature = "simd")] return simd_checked_divide_op(&left, &right, simd_checked_divide::, |a, b| a / b); #[cfg(not(feature = "simd"))] - return math_checked_op(left, right, |a, b| a.checked_div_if_applied(b)); + return math_checked_divide(left, right, |a, b| a.checked_div_if_applied(b)); } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -2027,7 +2075,7 @@ mod tests { } #[test] - #[should_panic(expected = "Overflow happened")] + #[should_panic(expected = "DivideByZero")] fn test_primitive_array_divide_by_zero() { let a = Int32Array::from(vec![15]); let b = Int32Array::from(vec![0]); From de42e04198b5ef0b4b92ab8d38d0641dfeac0ae5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Sep 2022 09:20:27 -0700 Subject: [PATCH 05/11] Fix typo --- arrow/src/datatypes/native.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index e31998e5ebbf..89d60c3210e7 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -132,7 +132,7 @@ pub trait ArrowNativeTypeOp: } fn checked_sub_if_applied(self, rhs: Self) -> Option { - Some(self + rhs) + Some(self - rhs) } fn wrapping_sub_if_applied(self, rhs: Self) -> Self { From e8218c444be9a58da3a38a8f2049c04172c48f69 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Sep 2022 17:35:34 -0700 Subject: [PATCH 06/11] Rename APIs and add more comments. Print values in error message. --- arrow/src/compute/kernels/arithmetic.rs | 35 ++++++++++++------- arrow/src/datatypes/native.rs | 45 +++++++++++++++---------- 2 files changed, 49 insertions(+), 31 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index b1e07e7dbb7d..c0f0d2d961ff 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -136,7 +136,10 @@ where Ok(Some(r)) } else { // Overflow - Err(ArrowError::ComputeError("Overflow happened".to_string())) + Err(ArrowError::ComputeError(format!( + "Overflow happened on: {:?}, {:?}", + l, r + ))) } } else { Ok(None) @@ -180,10 +183,16 @@ where Ok(Some(r)) } else { if r.is_zero() { - Err(ArrowError::ComputeError("DivideByZero".to_string())) + Err(ArrowError::ComputeError(format!( + "DivideByZero on: {:?}, {:?}", + l, r + ))) } else { // Overflow - Err(ArrowError::ComputeError("Overflow happened".to_string())) + Err(ArrowError::ComputeError(format!( + "Overflow happened on: {:?}, {:?}", + l, r + ))) } } } else { @@ -865,7 +874,7 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_op(left, right, |a, b| a.wrapping_add_if_applied(b)) + math_op(left, right, |a, b| a.add_wrapping(b)) } /// Perform `left + right` operation on two arrays. If either left or right value is null @@ -881,7 +890,7 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_checked_op(left, right, |a, b| a.checked_add_if_applied(b)) + math_checked_op(left, right, |a, b| a.add_checked(b)) } /// Perform `left + right` operation on two arrays. If either left or right value is null @@ -980,7 +989,7 @@ where T: datatypes::ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_op(left, right, |a, b| a.wrapping_sub_if_applied(b)) + math_op(left, right, |a, b| a.sub_wrapping(b)) } /// Perform `left - right` operation on two arrays. If either left or right value is null @@ -996,7 +1005,7 @@ where T: datatypes::ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_checked_op(left, right, |a, b| a.checked_sub_if_applied(b)) + math_checked_op(left, right, |a, b| a.sub_checked(b)) } /// Perform `left - right` operation on two arrays. If either left or right value is null @@ -1076,7 +1085,7 @@ where T: datatypes::ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_op(left, right, |a, b| a.wrapping_mul_if_applied(b)) + math_op(left, right, |a, b| a.mul_wrapping(b)) } /// Perform `left * right` operation on two arrays. If either left or right value is null @@ -1084,7 +1093,7 @@ where /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, /// use `multiply` instead. -pub fn multiply_check( +pub fn multiply_checked( left: &PrimitiveArray, right: &PrimitiveArray, ) -> Result> @@ -1092,7 +1101,7 @@ where T: datatypes::ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_checked_op(left, right, |a, b| a.checked_mul_if_applied(b)) + math_checked_op(left, right, |a, b| a.mul_checked(b)) } /// Perform `left * right` operation on two arrays. If either left or right value is null @@ -1178,7 +1187,7 @@ where #[cfg(feature = "simd")] return simd_checked_divide_op(&left, &right, simd_checked_divide::, |a, b| a / b); #[cfg(not(feature = "simd"))] - return math_checked_divide(left, right, |a, b| a.checked_div_if_applied(b)); + return math_checked_divide(left, right, |a, b| a.div_checked(b)); } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -1207,7 +1216,7 @@ where T: datatypes::ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_op(left, right, |a, b| a.wrapping_div_if_applied(b)) + math_op(left, right, |a, b| a.div_wrapping(b)) } /// Modulus every value in an array by a scalar. If any value in the array is null then the @@ -2212,7 +2221,7 @@ mod tests { let expected = Int32Array::from(vec![-10]); assert_eq!(expected, wrapped.unwrap()); - let overflow = multiply_check(&a, &b); + let overflow = multiply_checked(&a, &b); overflow.expect_err("overflow should be detected"); } diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index 89d60c3210e7..f7584fe419cb 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -115,7 +115,16 @@ pub trait ArrowPrimitiveType: 'static { } } -/// Trait for ArrowNativeType to provide overflow-aware operations. +/// Trait for ArrowNativeType to provide overflow-checking and non-overflow-checking +/// variants for arithmetic operations. For floating point types, this provides some +/// default implementations. Integer types that need to deal with overflow can implement +/// this trait. +/// +/// The APIs with `wrapping` suffix are the variant of non-overflow-checking. If overflow +/// occurred, they will supposedly wrap around the boundary of the type. +/// +/// The APIs with `_check` suffix are the variant of overflow-checking which return `None` +/// if overflow occurred. pub trait ArrowNativeTypeOp: ArrowNativeType + Add @@ -123,35 +132,35 @@ pub trait ArrowNativeTypeOp: + Mul + Div { - fn checked_add_if_applied(self, rhs: Self) -> Option { + fn add_checked(self, rhs: Self) -> Option { Some(self + rhs) } - fn wrapping_add_if_applied(self, rhs: Self) -> Self { + fn add_wrapping(self, rhs: Self) -> Self { self + rhs } - fn checked_sub_if_applied(self, rhs: Self) -> Option { + fn sub_checked(self, rhs: Self) -> Option { Some(self - rhs) } - fn wrapping_sub_if_applied(self, rhs: Self) -> Self { - self + rhs + fn sub_wrapping(self, rhs: Self) -> Self { + self - rhs } - fn checked_mul_if_applied(self, rhs: Self) -> Option { + fn mul_checked(self, rhs: Self) -> Option { Some(self * rhs) } - fn wrapping_mul_if_applied(self, rhs: Self) -> Self { + fn mul_wrapping(self, rhs: Self) -> Self { self * rhs } - fn checked_div_if_applied(self, rhs: Self) -> Option { + fn div_checked(self, rhs: Self) -> Option { Some(self / rhs) } - fn wrapping_div_if_applied(self, rhs: Self) -> Self { + fn div_wrapping(self, rhs: Self) -> Self { self / rhs } } @@ -159,35 +168,35 @@ pub trait ArrowNativeTypeOp: macro_rules! native_type_op { ($t:tt) => { impl ArrowNativeTypeOp for $t { - fn checked_add_if_applied(self, rhs: Self) -> Option { + fn add_checked(self, rhs: Self) -> Option { self.checked_add(rhs) } - fn wrapping_add_if_applied(self, rhs: Self) -> Self { + fn add_wrapping(self, rhs: Self) -> Self { self.wrapping_add(rhs) } - fn checked_sub_if_applied(self, rhs: Self) -> Option { + fn sub_checked(self, rhs: Self) -> Option { self.checked_sub(rhs) } - fn wrapping_sub_if_applied(self, rhs: Self) -> Self { + fn sub_wrapping(self, rhs: Self) -> Self { self.wrapping_sub(rhs) } - fn checked_mul_if_applied(self, rhs: Self) -> Option { + fn mul_checked(self, rhs: Self) -> Option { self.checked_mul(rhs) } - fn wrapping_mul_if_applied(self, rhs: Self) -> Self { + fn mul_wrapping(self, rhs: Self) -> Self { self.wrapping_mul(rhs) } - fn checked_div_if_applied(self, rhs: Self) -> Option { + fn div_checked(self, rhs: Self) -> Option { self.checked_div(rhs) } - fn wrapping_div_if_applied(self, rhs: Self) -> Self { + fn div_wrapping(self, rhs: Self) -> Self { self.wrapping_div(rhs) } } From 06f4ce3bf3f11904f54710a37475c4b9cc6b7d7f Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Sep 2022 17:42:18 -0700 Subject: [PATCH 07/11] Add one more test to distinct divide_by_zero behavior on divide. --- arrow/src/compute/kernels/arithmetic.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index c0f0d2d961ff..6f7036db2d23 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -2085,12 +2085,20 @@ mod tests { #[test] #[should_panic(expected = "DivideByZero")] - fn test_primitive_array_divide_by_zero() { + fn test_primitive_array_divide_by_zero_with_checked() { let a = Int32Array::from(vec![15]); let b = Int32Array::from(vec![0]); divide_checked(&a, &b).unwrap(); } + #[test] + #[should_panic(expected = "attempt to divide by zero")] + fn test_primitive_array_divide_by_zero() { + let a = Int32Array::from(vec![15]); + let b = Int32Array::from(vec![0]); + divide(&a, &b).unwrap(); + } + #[test] #[should_panic(expected = "DivideByZero")] fn test_primitive_array_divide_dyn_by_zero() { From 4aa443215c8552629083f74ddb40449ea371e7ab Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Sep 2022 18:14:25 -0700 Subject: [PATCH 08/11] Fix clippy --- arrow/src/compute/kernels/arithmetic.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 6f7036db2d23..ca84b0c1e308 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -181,19 +181,17 @@ where let result = op(l, r); if let Some(r) = result { Ok(Some(r)) + } else if r.is_zero() { + Err(ArrowError::ComputeError(format!( + "DivideByZero on: {:?}, {:?}", + l, r + ))) } else { - if r.is_zero() { - Err(ArrowError::ComputeError(format!( - "DivideByZero on: {:?}, {:?}", - l, r - ))) - } else { - // Overflow - Err(ArrowError::ComputeError(format!( - "Overflow happened on: {:?}, {:?}", - l, r - ))) - } + // Overflow + Err(ArrowError::ComputeError(format!( + "Overflow happened on: {:?}, {:?}", + l, r + ))) } } else { Ok(None) From 3d98affffeeec746b9aa7eb79ecfb55fe90b784d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Sep 2022 18:56:47 -0700 Subject: [PATCH 09/11] Update divide doc with dividing by zero behavior for other numeric types. --- arrow/src/compute/kernels/arithmetic.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index ca84b0c1e308..9bd6047f508a 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -1201,7 +1201,8 @@ pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result { } /// Perform `left / right` operation on two arrays without checking for division by zero. -/// The result of dividing by zero follows normal floating point rules. +/// For floating point types, the result of dividing by zero follows normal floating point +/// rules. For other numeric types, dividing by zero will panic, /// If either left or right value is null then the result is also null. If any right hand value is zero then the result of this /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. From c930f85a13cfcb037e2255bacea1a70a7a1ea0a1 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 3 Sep 2022 22:38:59 -0700 Subject: [PATCH 10/11] Hide ArrowNativeTypeOp --- arrow/src/compute/kernels/arithmetic.rs | 4 +- arrow/src/datatypes/native.rs | 94 +++++++++++++------------ 2 files changed, 51 insertions(+), 47 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 9bd6047f508a..53f48570d927 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -35,8 +35,8 @@ use crate::compute::unary_dyn; use crate::compute::util::combine_option_bitmap; use crate::datatypes; use crate::datatypes::{ - ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType, DataType, Date32Type, - Date64Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, + native_op::ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType, DataType, + Date32Type, Date64Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, }; use crate::datatypes::{ diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index f7584fe419cb..63a8dfdc6fbd 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -17,7 +17,6 @@ use super::DataType; use half::f16; -use std::ops::{Add, Div, Mul, Sub}; mod private { pub trait Sealed {} @@ -115,59 +114,64 @@ pub trait ArrowPrimitiveType: 'static { } } -/// Trait for ArrowNativeType to provide overflow-checking and non-overflow-checking -/// variants for arithmetic operations. For floating point types, this provides some -/// default implementations. Integer types that need to deal with overflow can implement -/// this trait. -/// -/// The APIs with `wrapping` suffix are the variant of non-overflow-checking. If overflow -/// occurred, they will supposedly wrap around the boundary of the type. -/// -/// The APIs with `_check` suffix are the variant of overflow-checking which return `None` -/// if overflow occurred. -pub trait ArrowNativeTypeOp: - ArrowNativeType - + Add - + Sub - + Mul - + Div -{ - fn add_checked(self, rhs: Self) -> Option { - Some(self + rhs) - } +pub(crate) mod native_op { + use super::ArrowNativeType; + use std::ops::{Add, Div, Mul, Sub}; - fn add_wrapping(self, rhs: Self) -> Self { - self + rhs - } + /// Trait for ArrowNativeType to provide overflow-checking and non-overflow-checking + /// variants for arithmetic operations. For floating point types, this provides some + /// default implementations. Integer types that need to deal with overflow can implement + /// this trait. + /// + /// The APIs with `wrapping` suffix are the variant of non-overflow-checking. If overflow + /// occurred, they will supposedly wrap around the boundary of the type. + /// + /// The APIs with `_checked` suffix are the variant of overflow-checking which return `None` + /// if overflow occurred. + pub trait ArrowNativeTypeOp: + ArrowNativeType + + Add + + Sub + + Mul + + Div + { + fn add_checked(self, rhs: Self) -> Option { + Some(self + rhs) + } - fn sub_checked(self, rhs: Self) -> Option { - Some(self - rhs) - } + fn add_wrapping(self, rhs: Self) -> Self { + self + rhs + } - fn sub_wrapping(self, rhs: Self) -> Self { - self - rhs - } + fn sub_checked(self, rhs: Self) -> Option { + Some(self - rhs) + } - fn mul_checked(self, rhs: Self) -> Option { - Some(self * rhs) - } + fn sub_wrapping(self, rhs: Self) -> Self { + self - rhs + } - fn mul_wrapping(self, rhs: Self) -> Self { - self * rhs - } + fn mul_checked(self, rhs: Self) -> Option { + Some(self * rhs) + } - fn div_checked(self, rhs: Self) -> Option { - Some(self / rhs) - } + fn mul_wrapping(self, rhs: Self) -> Self { + self * rhs + } - fn div_wrapping(self, rhs: Self) -> Self { - self / rhs + fn div_checked(self, rhs: Self) -> Option { + Some(self / rhs) + } + + fn div_wrapping(self, rhs: Self) -> Self { + self / rhs + } } } macro_rules! native_type_op { ($t:tt) => { - impl ArrowNativeTypeOp for $t { + impl native_op::ArrowNativeTypeOp for $t { fn add_checked(self, rhs: Self) -> Option { self.checked_add(rhs) } @@ -212,9 +216,9 @@ native_type_op!(u16); native_type_op!(u32); native_type_op!(u64); -impl ArrowNativeTypeOp for f16 {} -impl ArrowNativeTypeOp for f32 {} -impl ArrowNativeTypeOp for f64 {} +impl native_op::ArrowNativeTypeOp for f16 {} +impl native_op::ArrowNativeTypeOp for f32 {} +impl native_op::ArrowNativeTypeOp for f64 {} impl private::Sealed for i8 {} impl ArrowNativeType for i8 { From 34ea894f2f38b3809de72434cfde3692556dad81 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 4 Sep 2022 01:05:22 -0700 Subject: [PATCH 11/11] Fix a typo --- arrow/src/datatypes/native.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index 63a8dfdc6fbd..444f2b27dce6 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -123,7 +123,7 @@ pub(crate) mod native_op { /// default implementations. Integer types that need to deal with overflow can implement /// this trait. /// - /// The APIs with `wrapping` suffix are the variant of non-overflow-checking. If overflow + /// The APIs with `_wrapping` suffix are the variant of non-overflow-checking. If overflow /// occurred, they will supposedly wrap around the boundary of the type. /// /// The APIs with `_checked` suffix are the variant of overflow-checking which return `None`