Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optionally disable validate_decimal_precision check in DecimalBuilder.append_value for interop test #1767

Merged
merged 10 commits into from
Jun 3, 2022
28 changes: 19 additions & 9 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ mod tests {
192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
];
let array_data = ArrayData::builder(DataType::Decimal(23, 6))
let array_data = ArrayData::builder(DataType::Decimal(38, 6))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caught this invalid decimal value by decimal check in full validation. Increasing precision to pass it.

.len(2)
.add_buffer(Buffer::from(&values[..]))
.build()
Expand All @@ -1498,6 +1498,7 @@ mod tests {
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_append_error_value() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend we change this test to show that it is ok to store these values in a decimal rather than removing it completely (aka change the test to validate there is no error).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the test. As I move the precision check to ArrayData full validation, I add another test for that too.

let mut decimal_builder = DecimalBuilder::new(10, 5, 3);
let mut result = decimal_builder.append_value(123456);
Expand All @@ -1506,9 +1507,15 @@ mod tests {
"Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999",
error.to_string()
);

unsafe {
decimal_builder.disable_value_validation();
}
result = decimal_builder.append_value(123456);
assert!(result.is_ok());
decimal_builder.append_value(12345).unwrap();
let arr = decimal_builder.finish();
assert_eq!("12.345", arr.value_as_string(0));
assert_eq!("12.345", arr.value_as_string(1));

decimal_builder = DecimalBuilder::new(10, 2, 1);
result = decimal_builder.append_value(100);
Expand All @@ -1517,18 +1524,21 @@ mod tests {
"Invalid argument error: 100 is too large to store in a Decimal of precision 2. Max is 99",
error.to_string()
);

unsafe {
decimal_builder.disable_value_validation();
}
result = decimal_builder.append_value(100);
assert!(result.is_ok());
decimal_builder.append_value(99).unwrap();
result = decimal_builder.append_value(-100);
error = result.unwrap_err();
assert_eq!(
"Invalid argument error: -100 is too small to store in a Decimal of precision 2. Min is -99",
error.to_string()
);
assert!(result.is_ok());
decimal_builder.append_value(-99).unwrap();
let arr = decimal_builder.finish();
assert_eq!("9.9", arr.value_as_string(0));
assert_eq!("-9.9", arr.value_as_string(1));
assert_eq!("9.9", arr.value_as_string(1));
assert_eq!("-9.9", arr.value_as_string(3));
}

#[test]
fn test_decimal_from_iter_values() {
let array = DecimalArray::from_iter_values(vec![-100, 0, 101].into_iter());
Expand Down
28 changes: 24 additions & 4 deletions arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,10 @@ pub struct DecimalBuilder {
builder: FixedSizeListBuilder<UInt8Builder>,
precision: usize,
scale: usize,

/// Should i128 values be validated for compatibility with scale and precision?
/// defaults to true
value_validation: bool,
}

impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericBinaryBuilder<OffsetSize> {
Expand Down Expand Up @@ -1453,16 +1457,32 @@ impl DecimalBuilder {
builder: FixedSizeListBuilder::new(values_builder, byte_width),
precision,
scale,
value_validation: true,
}
}

/// Disable validation
///
/// # Safety
///
/// After disabling validation, caller must ensure that appended values are compatible
/// for the specified precision and scale.
pub unsafe fn disable_value_validation(&mut self) {
self.value_validation = false;
}

/// Appends a byte slice into the builder.
///
/// Automatically calls the `append` method to delimit the slice appended in as a
/// distinct array element.
#[inline]
pub fn append_value(&mut self, value: i128) -> Result<()> {
let value = validate_decimal_precision(value, self.precision)?;
let value = if self.value_validation {
validate_decimal_precision(value, self.precision)?
} else {
value
};

let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
value,
self.builder.value_length() as usize,
Expand All @@ -1478,7 +1498,7 @@ impl DecimalBuilder {
self.builder.append(true)
}

fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
pub(crate) fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
if size > 16 {
return Err(ArrowError::InvalidArgumentError(
"DecimalBuilder only supports values up to 16 bytes.".to_string(),
Expand Down Expand Up @@ -3418,14 +3438,14 @@ mod tests {

#[test]
fn test_decimal_builder() {
let mut builder = DecimalBuilder::new(30, 23, 6);
let mut builder = DecimalBuilder::new(30, 38, 6);

builder.append_value(8_887_000_000).unwrap();
builder.append_null().unwrap();
builder.append_value(-8_887_000_000).unwrap();
let decimal_array: DecimalArray = builder.finish();

assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type());
assert_eq!(3, decimal_array.len());
assert_eq!(1, decimal_array.null_count());
assert_eq!(32, decimal_array.value_offset(2));
Expand Down
54 changes: 51 additions & 3 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
//! common attributes and operations for Arrow array.

use crate::datatypes::{DataType, IntervalUnit, UnionMode};
use crate::datatypes::{validate_decimal_precision, DataType, IntervalUnit, UnionMode};
use crate::error::{ArrowError, Result};
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
use crate::{
Expand Down Expand Up @@ -999,6 +999,21 @@ impl ArrayData {

pub fn validate_dictionary_offset(&self) -> Result<()> {
match &self.data_type {
DataType::Decimal(p, _) => {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

C++ ArrayData full validation performs the precision check for decimal type. I think this is necessary to add even we don't remove validate_decimal_precision from DecimalBuilder.append_value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree this code is necessary in validate 👍

let values_buffer = &self.buffers[0];

for pos in 0..values_buffer.len() {
let raw_val = unsafe {
std::slice::from_raw_parts(
values_buffer.as_ptr().add(pos),
16_usize,
)
};
let value = i128::from_le_bytes(raw_val.try_into().unwrap());
validate_decimal_precision(value, *p)?;
}
Ok(())
}
DataType::Utf8 => self.validate_utf8::<i32>(),
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
Expand Down Expand Up @@ -1492,8 +1507,9 @@ mod tests {
use std::ptr::NonNull;

use crate::array::{
make_array, Array, BooleanBuilder, Int32Array, Int32Builder, Int64Array,
StringArray, StructBuilder, UInt64Array,
make_array, Array, BooleanBuilder, DecimalBuilder, FixedSizeListBuilder,
Int32Array, Int32Builder, Int64Array, StringArray, StructBuilder, UInt64Array,
UInt8Builder,
};
use crate::buffer::Buffer;
use crate::datatypes::Field;
Expand Down Expand Up @@ -2707,4 +2723,36 @@ mod tests {

assert_eq!(array, &expected);
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_full_validation() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

let values_builder = UInt8Builder::new(10);
let byte_width = 16;
let mut fixed_size_builder =
FixedSizeListBuilder::new(values_builder, byte_width);
let value_as_bytes = DecimalBuilder::from_i128_to_fixed_size_bytes(
123456,
fixed_size_builder.value_length() as usize,
)
.unwrap();
fixed_size_builder
.values()
.append_slice(value_as_bytes.as_slice())
.unwrap();
fixed_size_builder.append(true).unwrap();
let fixed_size_array = fixed_size_builder.finish();

// Build ArrayData for Decimal
let builder = ArrayData::builder(DataType::Decimal(5, 3))
.len(fixed_size_array.len())
.add_buffer(fixed_size_array.data_ref().child_data()[0].buffers()[0].clone());
let array_data = unsafe { builder.build_unchecked() };
let validation_result = array_data.validate_full();
let error = validation_result.unwrap_err();
assert_eq!(
"Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999",
error.to_string()
);
}
}
3 changes: 3 additions & 0 deletions arrow/src/array/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,7 @@ mod tests {
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal() {
let decimal_array =
create_decimal_array(&[Some(1), Some(2), None, Some(3)], 10, 3);
Expand All @@ -734,6 +735,7 @@ mod tests {
assert_eq!(array, expected);
}
#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_offset() {
let decimal_array =
create_decimal_array(&[Some(1), Some(2), None, Some(3)], 10, 3);
Expand All @@ -748,6 +750,7 @@ mod tests {
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_null_offset_nulls() {
let decimal_array =
create_decimal_array(&[Some(1), Some(2), None, Some(3)], 10, 3);
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1203,8 +1203,8 @@ mod tests {
fn test_csv_reader_with_decimal() {
let schema = Schema::new(vec![
Field::new("city", DataType::Utf8, false),
Field::new("lat", DataType::Decimal(26, 6), false),
Field::new("lng", DataType::Decimal(26, 6), false),
Field::new("lat", DataType::Decimal(38, 6), false),
Field::new("lng", DataType::Decimal(38, 6), false),
]);

let file = File::open("test/data/decimal_test.csv").unwrap();
Expand Down
1 change: 1 addition & 0 deletions arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,7 @@ mod tests {
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_round_trip() -> Result<()> {
// create an array natively
let original_array = [Some(12345_i128), Some(-12345_i128), None]
Expand Down
2 changes: 2 additions & 0 deletions arrow/src/ipc/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,7 @@ mod tests {
use crate::{datatypes, util::integration_util::*};

#[test]
#[cfg(not(feature = "force_validate"))]
fn read_generated_files_014() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "0.14.1";
Expand Down Expand Up @@ -1156,6 +1157,7 @@ mod tests {
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn read_generated_streams_014() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "0.14.1";
Expand Down
2 changes: 2 additions & 0 deletions arrow/src/ipc/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,7 @@ mod tests {
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn read_and_rewrite_generated_files_014() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "0.14.1";
Expand Down Expand Up @@ -1130,6 +1131,7 @@ mod tests {
}

#[test]
#[cfg(not(feature = "force_validate"))]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these tests use the same 0.14.1 decimal file too.

fn read_and_rewrite_generated_streams_014() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "0.14.1";
Expand Down
4 changes: 4 additions & 0 deletions integration-testing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,10 @@ fn array_from_json(
}
DataType::Decimal(precision, scale) => {
let mut b = DecimalBuilder::new(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();
}
for (is_valid, value) in json_col
.validity
.as_ref()
Expand Down