From b8f9bc2c7bb1267b88fb24d11c1f09be603f00fa Mon Sep 17 00:00:00 2001 From: Guillaume Balaine Date: Thu, 3 Feb 2022 10:21:13 +0100 Subject: [PATCH] fix decimal add because arrow2 doesn't include decimal add in arithmetics::add --- datafusion/benches/data_utils/mod.rs | 2 +- .../src/physical_plan/expressions/binary.rs | 70 ++++++++++++------- .../src/physical_plan/expressions/cast.rs | 18 ++--- .../src/physical_plan/expressions/try_cast.rs | 17 ++--- datafusion/src/test_util.rs | 37 ++++++++++ 5 files changed, 100 insertions(+), 44 deletions(-) diff --git a/datafusion/benches/data_utils/mod.rs b/datafusion/benches/data_utils/mod.rs index ce7bb2b47e64..7d2885e380ae 100644 --- a/datafusion/benches/data_utils/mod.rs +++ b/datafusion/benches/data_utils/mod.rs @@ -123,7 +123,7 @@ fn create_record_batch( schema, vec![ Arc::new(Utf8Array::::from_slice(keys)), - Arc::new(Float32Array::from_slice(&[i as f32; batch_size])), + Arc::new(Float32Array::from_slice(vec![i as f32; batch_size])), Arc::new(Float64Array::from(values)), Arc::new(UInt64Array::from(integer_values_wide)), Arc::new(UInt64Array::from_slice(integer_values_narrow)), diff --git a/datafusion/src/physical_plan/expressions/binary.rs b/datafusion/src/physical_plan/expressions/binary.rs index 673a254f692c..732fa1d8c676 100644 --- a/datafusion/src/physical_plan/expressions/binary.rs +++ b/datafusion/src/physical_plan/expressions/binary.rs @@ -20,6 +20,7 @@ use std::{any::Any, convert::TryInto, sync::Arc}; use crate::record_batch::RecordBatch; use arrow::array::*; use arrow::compute; +use arrow::datatypes::DataType::Decimal; use arrow::datatypes::{DataType, Schema}; use crate::error::{DataFusionError, Result}; @@ -247,9 +248,24 @@ fn evaluate_regex_case_insensitive( fn evaluate(lhs: &dyn Array, op: &Operator, rhs: &dyn Array) -> Result> { use Operator::*; - if matches!(op, Plus | Minus | Divide | Multiply | Modulo | BitwiseAnd) { + if matches!(op, Plus) { + let arr: ArrayRef = match (lhs.data_type(), rhs.data_type()) { + (Decimal(p1, s1), Decimal(p2, s2)) => { + let left_array = + lhs.as_any().downcast_ref::>().unwrap(); + let right_array = + rhs.as_any().downcast_ref::>().unwrap(); + Arc::new(if *p1 == *p2 && *s1 == *s2 { + compute::arithmetics::decimal::add(left_array, right_array) + } else { + compute::arithmetics::decimal::adaptive_add(left_array, right_array)? + }) + } + _ => compute::arithmetics::add(lhs, rhs).into(), + }; + Ok(arr) + } else if matches!(op, Minus | Divide | Multiply | Modulo) { let arr = match op { - Operator::Plus => compute::arithmetics::add(lhs, rhs), Operator::Minus => compute::arithmetics::sub(lhs, rhs), Operator::Divide => compute::arithmetics::div(lhs, rhs), Operator::Multiply => compute::arithmetics::mul(lhs, rhs), @@ -828,6 +844,7 @@ mod tests { use crate::error::Result; use crate::field_util::SchemaExt; use crate::physical_plan::expressions::{col, lit}; + use crate::test_util::create_decimal_array; use arrow::datatypes::{Field, SchemaRef}; use arrow::error::ArrowError; @@ -1015,7 +1032,11 @@ mod tests { } fn add_decimal(left: &Int128Array, right: &Int128Array) -> Result { - let mut decimal_builder = Int128Vec::with_capacity(left.len()); + let mut decimal_builder = Int128Vec::from_data( + left.data_type().clone(), + Vec::::with_capacity(left.len()), + None, + ); for i in 0..left.len() { if left.is_null(i) || right.is_null(i) { decimal_builder.push(None); @@ -1027,7 +1048,11 @@ mod tests { } fn subtract_decimal(left: &Int128Array, right: &Int128Array) -> Result { - let mut decimal_builder = Int128Vec::with_capacity(left.len()); + let mut decimal_builder = Int128Vec::from_data( + left.data_type().clone(), + Vec::::with_capacity(left.len()), + None, + ); for i in 0..left.len() { if left.is_null(i) || right.is_null(i) { decimal_builder.push(None); @@ -1043,7 +1068,11 @@ mod tests { right: &Int128Array, scale: u32, ) -> Result { - let mut decimal_builder = Int128Vec::with_capacity(left.len()); + let mut decimal_builder = Int128Vec::from_data( + left.data_type().clone(), + Vec::::with_capacity(left.len()), + None, + ); let divide = 10_i128.pow(scale); for i in 0..left.len() { if left.is_null(i) || right.is_null(i) { @@ -1061,7 +1090,11 @@ mod tests { right: &Int128Array, scale: i32, ) -> Result { - let mut decimal_builder = Int128Vec::with_capacity(left.len()); + let mut decimal_builder = Int128Vec::from_data( + left.data_type().clone(), + Vec::::with_capacity(left.len()), + None, + ); let mul = 10_f64.powi(scale); for i in 0..left.len() { if left.is_null(i) || right.is_null(i) { @@ -1081,7 +1114,11 @@ mod tests { } fn modulus_decimal(left: &Int128Array, right: &Int128Array) -> Result { - let mut decimal_builder = Int128Vec::with_capacity(left.len()); + let mut decimal_builder = Int128Vec::from_data( + left.data_type().clone(), + Vec::::with_capacity(left.len()), + None, + ); for i in 0..left.len() { if left.is_null(i) || right.is_null(i) { decimal_builder.push(None); @@ -2135,25 +2172,6 @@ mod tests { assert_eq!(result.as_ref(), &expected as &dyn Array); } - fn create_decimal_array( - array: &[Option], - _precision: usize, - _scale: usize, - ) -> Result { - let mut decimal_builder = Int128Vec::with_capacity(array.len()); - for value in array { - match value { - None => { - decimal_builder.push(None); - } - Some(v) => { - decimal_builder.try_push(Some(*v))?; - } - } - } - Ok(decimal_builder.into()) - } - #[test] fn comparison_decimal_op_test() -> Result<()> { let value_i128: i128 = 123; diff --git a/datafusion/src/physical_plan/expressions/cast.rs b/datafusion/src/physical_plan/expressions/cast.rs index 8b7fbbd311cb..90fb359921f4 100644 --- a/datafusion/src/physical_plan/expressions/cast.rs +++ b/datafusion/src/physical_plan/expressions/cast.rs @@ -105,7 +105,6 @@ pub fn cast_with_error( ) -> Result> { let result = cast::cast(array, cast_type, options)?; if result.null_count() != array.null_count() { - println!("{result:?} : {array:?}"); let casted_valids = result.validity().unwrap(); let failed_casts = match array.validity() { Some(valids) => valids ^ casted_valids, @@ -192,6 +191,7 @@ mod tests { use crate::error::Result; use crate::field_util::SchemaExt; use crate::physical_plan::expressions::col; + use crate::test_util::create_decimal_array_from_slice; use arrow::{array::*, datatypes::*}; type StringArray = Utf8Array; @@ -298,7 +298,7 @@ mod tests { #[test] fn test_cast_decimal_to_decimal() -> Result<()> { let array: Vec = vec![1234, 2222, 3, 4000, 5000]; - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -315,7 +315,7 @@ mod tests { DEFAULT_DATAFUSION_CAST_OPTIONS ); - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -339,7 +339,7 @@ mod tests { fn test_cast_decimal_to_numeric() -> Result<()> { let array: Vec = vec![1, 2, 3, 4, 5]; // decimal to i8 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -356,7 +356,7 @@ mod tests { DEFAULT_DATAFUSION_CAST_OPTIONS ); // decimal to i16 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -373,7 +373,7 @@ mod tests { DEFAULT_DATAFUSION_CAST_OPTIONS ); // decimal to i32 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -390,7 +390,7 @@ mod tests { DEFAULT_DATAFUSION_CAST_OPTIONS ); // decimal to i64 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -408,7 +408,7 @@ mod tests { ); // decimal to float32 let array: Vec = vec![1234, 2222, 3, 4000, 5000]; - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -425,7 +425,7 @@ mod tests { DEFAULT_DATAFUSION_CAST_OPTIONS ); // decimal to float64 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(20, 6), diff --git a/datafusion/src/physical_plan/expressions/try_cast.rs b/datafusion/src/physical_plan/expressions/try_cast.rs index d47270c8a3a9..a2e74bbac798 100644 --- a/datafusion/src/physical_plan/expressions/try_cast.rs +++ b/datafusion/src/physical_plan/expressions/try_cast.rs @@ -129,6 +129,7 @@ mod tests { use crate::error::Result; use crate::field_util::SchemaExt; use crate::physical_plan::expressions::col; + use crate::test_util::create_decimal_array_from_slice; use arrow::{array::*, datatypes::*}; type StringArray = Utf8Array; @@ -234,7 +235,7 @@ mod tests { fn test_try_cast_decimal_to_decimal() -> Result<()> { // try cast one decimal data type to another decimal data type let array: Vec = vec![1234, 2222, 3, 4000, 5000]; - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -250,7 +251,7 @@ mod tests { ] ); - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -274,7 +275,7 @@ mod tests { // TODO we should add function to create Int128Array with value and metadata // https://github.com/apache/arrow-rs/issues/1009 let array: Vec = vec![1, 2, 3, 4, 5]; - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; // decimal to i8 generic_decimal_to_other_test_cast!( decimal_array, @@ -292,7 +293,7 @@ mod tests { ); // decimal to i16 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -309,7 +310,7 @@ mod tests { ); // decimal to i32 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -326,7 +327,7 @@ mod tests { ); // decimal to i64 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -344,7 +345,7 @@ mod tests { // decimal to float32 let array: Vec = vec![1234, 2222, 3, 4000, 5000]; - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -360,7 +361,7 @@ mod tests { ] ); // decimal to float64 - let decimal_array = Int128Array::from_slice(&array); + let decimal_array = create_decimal_array_from_slice(&array, 10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(20, 6), diff --git a/datafusion/src/test_util.rs b/datafusion/src/test_util.rs index 02ea07a33d9d..edb0f60c6d53 100644 --- a/datafusion/src/test_util.rs +++ b/datafusion/src/test_util.rs @@ -334,3 +334,40 @@ mod tests { assert!(PathBuf::from(res).is_dir()); } } + +#[cfg(test)] +pub fn create_decimal_array( + array: &[Option], + precision: usize, + scale: usize, +) -> crate::error::Result { + use arrow::array::{Int128Vec, TryPush}; + let mut decimal_builder = Int128Vec::from_data( + DataType::Decimal(precision, scale), + Vec::::with_capacity(array.len()), + None, + ); + + for value in array { + match value { + None => { + decimal_builder.push(None); + } + Some(v) => { + decimal_builder.try_push(Some(*v))?; + } + } + } + Ok(decimal_builder.into()) +} + +#[cfg(test)] +pub fn create_decimal_array_from_slice( + array: &[i128], + precision: usize, + scale: usize, +) -> crate::error::Result { + let decimal_array_values: Vec> = + array.into_iter().map(|v| Some(*v)).collect(); + create_decimal_array(&decimal_array_values, precision, scale) +}