-
Notifications
You must be signed in to change notification settings - Fork 784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optionally disable validate_decimal_precision
check in DecimalBuilder.append_value
for interop test
#1767
Optionally disable validate_decimal_precision
check in DecimalBuilder.append_value
for interop test
#1767
Changes from 5 commits
6367da4
e2ad586
da44930
3a221ce
b289f11
2d8a69c
5665071
d6eeba5
48798f0
7a6be90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1486,7 +1486,7 @@ mod tests { | |
192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, | ||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
]; | ||
let array_data = ArrayData::builder(DataType::Decimal(23, 6)) | ||
let array_data = ArrayData::builder(DataType::Decimal(38, 6)) | ||
.len(2) | ||
.add_buffer(Buffer::from(&values[..])) | ||
.build() | ||
|
@@ -1501,34 +1501,23 @@ mod tests { | |
fn test_decimal_append_error_value() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recommend we change this test to show that it is ok to store these values in a decimal rather than removing it completely (aka change the test to validate there is no error). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed the test. As I move the precision check to ArrayData full validation, I add another test for that too. |
||
let mut decimal_builder = DecimalBuilder::new(10, 5, 3); | ||
let mut result = decimal_builder.append_value(123456); | ||
let mut error = result.unwrap_err(); | ||
assert_eq!( | ||
"Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999", | ||
error.to_string() | ||
); | ||
assert!(result.is_ok()); | ||
decimal_builder.append_value(12345).unwrap(); | ||
let arr = decimal_builder.finish(); | ||
assert_eq!("12.345", arr.value_as_string(0)); | ||
assert_eq!("12.345", arr.value_as_string(1)); | ||
|
||
decimal_builder = DecimalBuilder::new(10, 2, 1); | ||
result = decimal_builder.append_value(100); | ||
error = result.unwrap_err(); | ||
assert_eq!( | ||
"Invalid argument error: 100 is too large to store in a Decimal of precision 2. Max is 99", | ||
error.to_string() | ||
); | ||
assert!(result.is_ok()); | ||
decimal_builder.append_value(99).unwrap(); | ||
result = decimal_builder.append_value(-100); | ||
error = result.unwrap_err(); | ||
assert_eq!( | ||
"Invalid argument error: -100 is too small to store in a Decimal of precision 2. Min is -99", | ||
error.to_string() | ||
); | ||
assert!(result.is_ok()); | ||
decimal_builder.append_value(-99).unwrap(); | ||
let arr = decimal_builder.finish(); | ||
assert_eq!("9.9", arr.value_as_string(0)); | ||
assert_eq!("-9.9", arr.value_as_string(1)); | ||
assert_eq!("9.9", arr.value_as_string(1)); | ||
assert_eq!("-9.9", arr.value_as_string(3)); | ||
} | ||
|
||
#[test] | ||
fn test_decimal_from_iter_values() { | ||
let array = DecimalArray::from_iter_values(vec![-100, 0, 101].into_iter()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ | |
//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates | ||
//! common attributes and operations for Arrow array. | ||
|
||
use crate::datatypes::{DataType, IntervalUnit, UnionMode}; | ||
use crate::datatypes::{validate_decimal_precision, DataType, IntervalUnit, UnionMode}; | ||
use crate::error::{ArrowError, Result}; | ||
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; | ||
use crate::{ | ||
|
@@ -999,6 +999,27 @@ impl ArrayData { | |
|
||
pub fn validate_dictionary_offset(&self) -> Result<()> { | ||
match &self.data_type { | ||
DataType::Decimal(p, _) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. C++ ArrayData full validation performs the precision check for decimal type. I think this is necessary to add even we don't remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree this code is necessary in |
||
let values_buffer = &self.buffers[0]; | ||
|
||
for pos in 0..values_buffer.len() { | ||
let raw_val = unsafe { | ||
std::slice::from_raw_parts( | ||
values_buffer.as_ptr().offset(pos as isize), | ||
16 as usize, | ||
) | ||
}; | ||
let as_array = raw_val.try_into(); | ||
match as_array { | ||
Ok(v) if raw_val.len() == 16 => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For some reason this code seems overly complicated -- I wonder if you could call i128::from_le_bytes directly on a slice like for pos in 0..values_buffer.len() {
let v = value_buffer.data[pos..pos+16];
let value = i128::from_le_bytes(v)
} or something 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I simplified it as: for pos in 0..values_buffer.len() {
let raw_val = unsafe {
std::slice::from_raw_parts(
values_buffer.as_ptr().add(pos),
16_usize,
)
};
let value = i128::from_le_bytes(raw_val.try_into().unwrap());
validate_decimal_precision(value, *p)?;
} |
||
let value = i128::from_le_bytes(v); | ||
validate_decimal_precision(value, *p)?; | ||
}, | ||
_ => panic!("The elements of ArrayData with Decimal type are not 128bit integers."), | ||
} | ||
} | ||
Ok(()) | ||
} | ||
DataType::Utf8 => self.validate_utf8::<i32>(), | ||
DataType::LargeUtf8 => self.validate_utf8::<i64>(), | ||
DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()), | ||
|
@@ -1492,8 +1513,9 @@ mod tests { | |
use std::ptr::NonNull; | ||
|
||
use crate::array::{ | ||
make_array, Array, BooleanBuilder, Int32Array, Int32Builder, Int64Array, | ||
StringArray, StructBuilder, UInt64Array, | ||
make_array, Array, BooleanBuilder, DecimalBuilder, FixedSizeListBuilder, | ||
Int32Array, Int32Builder, Int64Array, StringArray, StructBuilder, UInt64Array, | ||
UInt8Builder, | ||
}; | ||
use crate::buffer::Buffer; | ||
use crate::datatypes::Field; | ||
|
@@ -2707,4 +2729,35 @@ mod tests { | |
|
||
assert_eq!(array, &expected); | ||
} | ||
|
||
#[test] | ||
fn test_decimal_full_validation() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
let values_builder = UInt8Builder::new(10); | ||
let byte_width = 16; | ||
let mut fixed_size_builder = | ||
FixedSizeListBuilder::new(values_builder, byte_width); | ||
let value_as_bytes = DecimalBuilder::from_i128_to_fixed_size_bytes( | ||
123456, | ||
fixed_size_builder.value_length() as usize, | ||
) | ||
.unwrap(); | ||
fixed_size_builder | ||
.values() | ||
.append_slice(value_as_bytes.as_slice()) | ||
.unwrap(); | ||
fixed_size_builder.append(true).unwrap(); | ||
let fixed_size_array = fixed_size_builder.finish(); | ||
|
||
// Build ArrayData for Decimal | ||
let builder = ArrayData::builder(DataType::Decimal(5, 3)) | ||
.len(fixed_size_array.len()) | ||
.add_buffer(fixed_size_array.data_ref().child_data()[0].buffers()[0].clone()); | ||
let array_data = unsafe { builder.build_unchecked() }; | ||
let validation_result = array_data.validate_full(); | ||
let error = validation_result.unwrap_err(); | ||
assert_eq!( | ||
"Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999", | ||
error.to_string() | ||
); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Caught this invalid decimal value by decimal check in full validation. Increasing precision to pass it.