Skip to content

Commit

Permalink
Speed up Decimal256 validation based on bytes comparison and add be…
Browse files Browse the repository at this point in the history
…nchmark test (#2360)
  • Loading branch information
liukun4515 authored Aug 12, 2022
1 parent e7dcfbc commit b235173
Show file tree
Hide file tree
Showing 8 changed files with 789 additions and 159 deletions.
4 changes: 4 additions & 0 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,7 @@ required-features = ["test_utils"]
[[bench]]
name = "array_data_validate"
harness = false

[[bench]]
name = "decimal_validate"
harness = false
71 changes: 71 additions & 0 deletions arrow/benches/decimal_validate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;

use arrow::array::{Array, Decimal128Array, Decimal256Array, Decimal256Builder};
use criterion::Criterion;

extern crate arrow;

use arrow::util::decimal::Decimal256;

fn validate_decimal128_array(array: Decimal128Array) {
array.with_precision_and_scale(35, 0).unwrap();
}

fn validate_decimal256_array(array: Decimal256Array) {
array.with_precision_and_scale(35, 0).unwrap();
}

fn validate_decimal128_benchmark(c: &mut Criterion) {
let decimal_array = Decimal128Array::from_iter_values(vec![12324; 20000]);
let data = decimal_array.into_data();
c.bench_function("validate_decimal128_array 20000", |b| {
b.iter(|| {
let array = Decimal128Array::from(data.clone());
validate_decimal128_array(array);
})
});
}

fn validate_decimal256_benchmark(c: &mut Criterion) {
let mut decimal_builder = Decimal256Builder::new(20000, 76, 0);
let mut bytes = vec![0; 32];
bytes[0..16].clone_from_slice(&12324_i128.to_le_bytes());
for _ in 0..20000 {
decimal_builder
.append_value(&Decimal256::new(76, 0, &bytes))
.unwrap();
}
let decimal_array256_data = decimal_builder.finish();
let data = decimal_array256_data.into_data();
c.bench_function("validate_decimal256_array 20000", |b| {
b.iter(|| {
let array = Decimal256Array::from(data.clone());
validate_decimal256_array(array);
})
});
}

criterion_group!(
benches,
validate_decimal128_benchmark,
validate_decimal256_benchmark,
);
criterion_main!(benches);
49 changes: 29 additions & 20 deletions arrow/src/array/array_decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@ use super::{BasicDecimalIter, BooleanBufferBuilder, FixedSizeBinaryArray};
#[allow(deprecated)]
pub use crate::array::DecimalIter;
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::DataType;
use crate::datatypes::{validate_decimal256_precision_with_lt_bytes, DataType};
use crate::datatypes::{
validate_decimal256_precision, validate_decimal_precision, DECIMAL256_MAX_PRECISION,
DECIMAL_DEFAULT_SCALE,
validate_decimal_precision, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE,
};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal256};
Expand Down Expand Up @@ -271,13 +270,11 @@ impl Decimal128Array {
Decimal128Array::from(data)
}

/// Validates decimal values in this array can be properly interpreted
/// with the specified precision.
pub fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
if precision < self.precision {
for v in self.iter().flatten() {
validate_decimal_precision(v.as_i128(), precision)?;
}
// Validates decimal values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
for v in self.iter().flatten() {
validate_decimal_precision(v.as_i128(), precision)?;
}
Ok(())
}
Expand Down Expand Up @@ -317,7 +314,9 @@ impl Decimal128Array {
// Ensure that all values are within the requested
// precision. For performance, only check if the precision is
// decreased
self.validate_decimal_precision(precision)?;
if precision < self.precision {
self.validate_decimal_precision(precision)?;
}

let data_type = Self::TYPE_CONSTRUCTOR(self.precision, self.scale);
assert_eq!(self.data().data_type(), &data_type);
Expand All @@ -330,15 +329,23 @@ impl Decimal128Array {
}

impl Decimal256Array {
/// Validates decimal values in this array can be properly interpreted
/// with the specified precision.
pub fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
if precision < self.precision {
for v in self.iter().flatten() {
validate_decimal256_precision(&v.to_big_int(), precision)?;
// Validates decimal values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let raw_val = unsafe {
let pos = self.value_offset(idx);
std::slice::from_raw_parts(
self.raw_value_data_ptr().offset(pos as isize),
Self::VALUE_LENGTH as usize,
)
};
validate_decimal256_precision_with_lt_bytes(raw_val, precision)
} else {
Ok(())
}
}
Ok(())
})
}

/// Returns a Decimal array with the same data as self, with the
Expand Down Expand Up @@ -376,7 +383,9 @@ impl Decimal256Array {
// Ensure that all values are within the requested
// precision. For performance, only check if the precision is
// decreased
self.validate_decimal_precision(precision)?;
if precision < self.precision {
self.validate_decimal_precision(precision)?;
}

let data_type = Self::TYPE_CONSTRUCTOR(self.precision, self.scale);
assert_eq!(self.data().data_type(), &data_type);
Expand Down
10 changes: 5 additions & 5 deletions arrow/src/array/builder/decimal_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.

use num::BigInt;
use std::any::Any;
use std::sync::Arc;

Expand All @@ -26,7 +25,9 @@ use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder};

use crate::error::{ArrowError, Result};

use crate::datatypes::{validate_decimal256_precision, validate_decimal_precision};
use crate::datatypes::{
validate_decimal256_precision_with_lt_bytes, validate_decimal_precision,
};
use crate::util::decimal::Decimal256;

/// Array Builder for [`Decimal128Array`]
Expand Down Expand Up @@ -201,8 +202,7 @@ impl Decimal256Builder {
pub fn append_value(&mut self, value: &Decimal256) -> Result<()> {
let value = if self.value_validation {
let raw_bytes = value.raw_value();
let integer = BigInt::from_signed_bytes_le(raw_bytes);
validate_decimal256_precision(&integer, self.precision)?;
validate_decimal256_precision_with_lt_bytes(raw_bytes, self.precision)?;
value
} else {
value
Expand Down Expand Up @@ -255,7 +255,7 @@ impl Decimal256Builder {
#[cfg(test)]
mod tests {
use super::*;
use num::Num;
use num::{BigInt, Num};

use crate::array::array_decimal::Decimal128Array;
use crate::array::{array_decimal, Array};
Expand Down
8 changes: 3 additions & 5 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
//! common attributes and operations for Arrow array.
use crate::datatypes::{
validate_decimal256_precision, validate_decimal_precision, DataType, IntervalUnit,
UnionMode,
validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, DataType,
IntervalUnit, UnionMode,
};
use crate::error::{ArrowError, Result};
use crate::util::bit_iterator::BitSliceIterator;
Expand All @@ -30,7 +30,6 @@ use crate::{
util::bit_util,
};
use half::f16;
use num::BigInt;
use std::convert::TryInto;
use std::mem;
use std::ops::Range;
Expand Down Expand Up @@ -1049,8 +1048,7 @@ impl ArrayData {
for pos in 0..self.len() {
let offset = pos * 32;
let raw_bytes = &values[offset..offset + 32];
let integer = BigInt::from_signed_bytes_le(raw_bytes);
validate_decimal256_precision(&integer, *p)?;
validate_decimal256_precision_with_lt_bytes(raw_bytes, *p)?;
}
Ok(())
}
Expand Down
Loading

0 comments on commit b235173

Please sign in to comment.