From de9a90b1e09a43a6e8d2d3f3375f02a755d0cde1 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Thu, 23 Mar 2023 22:01:35 +0000 Subject: [PATCH] Cleanup uses of Array::data_ref (#3880) (#3918) * Cleanup uses of Array::data_ref (#3880) * Further cleanup and fixes --- arrow-arith/src/arithmetic.rs | 54 +-- arrow-arith/src/boolean.rs | 351 +++++++----------- arrow-array/src/array/dictionary_array.rs | 2 +- .../src/array/fixed_size_binary_array.rs | 2 +- .../src/array/fixed_size_list_array.rs | 2 +- arrow-array/src/array/list_array.rs | 2 +- arrow-array/src/array/null_array.rs | 2 +- arrow-ord/src/sort.rs | 21 +- arrow-select/src/filter.rs | 22 +- arrow/benches/mutable_array.rs | 3 +- arrow/tests/array_equal.rs | 6 +- .../src/arrow/array_reader/struct_array.rs | 2 +- 12 files changed, 170 insertions(+), 299 deletions(-) diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs index de4b0ccb8858..7d60d131bf52 100644 --- a/arrow-arith/src/arithmetic.rs +++ b/arrow-arith/src/arithmetic.rs @@ -103,14 +103,13 @@ fn math_checked_divide_op_on_iters( left: impl Iterator>, right: impl Iterator>, op: F, - len: usize, - null_bit_buffer: Option, + nulls: Option, ) -> Result, ArrowError> where T: ArrowNumericType, F: Fn(T::Native, T::Native) -> Result, { - let buffer = if null_bit_buffer.is_some() { + let buffer = if nulls.is_some() { let values = left.zip(right).map(|(left, right)| { if let (Some(l), Some(r)) = (left, right) { op(l, r) @@ -130,18 +129,7 @@ where unsafe { arrow_buffer::Buffer::try_from_trusted_len_iter(values) } }?; - let data = unsafe { - arrow_data::ArrayData::new_unchecked( - T::DATA_TYPE, - len, - None, - null_bit_buffer, - 0, - vec![buffer], - vec![], - ) - }; - Ok(PrimitiveArray::::from(data)) + Ok(PrimitiveArray::new(T::DATA_TYPE, buffer.into(), nulls)) } /// Calculates the modulus operation `left % right` on two SIMD inputs. @@ -284,20 +272,16 @@ where } // Create the combined `Bitmap` - let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap( - &[left.data_ref(), right.data_ref()], - left.len(), - ); + let nulls = arrow_buffer::NullBuffer::union(left.nulls(), right.nulls()); let lanes = T::lanes(); let buffer_size = left.len() * std::mem::size_of::(); let mut result = arrow_buffer::MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); - match &null_bit_buffer { + match &nulls { Some(b) => { - // combine_option_bitmap returns a slice or new buffer starting at 0 - let valid_chunks = b.bit_chunks(0, left.len()); + let valid_chunks = b.inner().bit_chunks(); // process data in chunks of 64 elements since we also get 64 bits of validity information at a time @@ -372,18 +356,7 @@ where } } - let data = unsafe { - arrow_data::ArrayData::new_unchecked( - T::DATA_TYPE, - left.len(), - None, - null_bit_buffer, - 0, - vec![result.into()], - vec![], - ) - }; - Ok(PrimitiveArray::::from(data)) + Ok(PrimitiveArray::new(T::DATA_TYPE, result.into(), nulls)) } /// Applies $OP to $LEFT and $RIGHT which are two dictionaries which have (the same) key type $KT @@ -628,10 +601,7 @@ where ))); } - let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap( - &[left.data_ref(), right.data_ref()], - left.len(), - ); + let nulls = arrow_buffer::NullBuffer::union(left.nulls(), right.nulls()); // Safety justification: Since the inputs are valid Arrow arrays, all values are // valid indexes into the dictionary (which is verified during construction) @@ -653,13 +623,7 @@ where .take_iter_unchecked(right.keys_iter()) }; - math_checked_divide_op_on_iters( - left_iter, - right_iter, - op, - left.len(), - null_bit_buffer, - ) + math_checked_divide_op_on_iters(left_iter, right_iter, op, nulls) } #[cfg(feature = "dyn_arith_dict")] diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs index 3e21c2f1b484..eaef1378258b 100644 --- a/arrow-arith/src/boolean.rs +++ b/arrow-arith/src/boolean.rs @@ -24,30 +24,55 @@ use arrow_array::*; use arrow_buffer::bit_util::ceil; -use arrow_buffer::buffer::{ - bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and, buffer_bin_or, - buffer_unary_not, -}; -use arrow_buffer::{Buffer, MutableBuffer}; -use arrow_data::bit_mask::combine_option_bitmap; +use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper}; +use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer}; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType}; -/// Updates null buffer based on data buffer and null buffer of the operand at other side -/// in boolean AND kernel with Kleene logic. In short, because for AND kernel, null AND false -/// results false. So we cannot simply AND two null buffers. This function updates null buffer -/// of one side if other side is a false value. -pub(crate) fn build_null_buffer_for_and_kleene( - left_data: &ArrayData, - right_data: &ArrayData, -) -> Option { - let left_buffer = &left_data.buffers()[0]; - let right_buffer = &right_data.buffers()[0]; - - let left_null_buffer = left_data.nulls(); - let right_null_buffer = right_data.nulls(); - - match (left_null_buffer, right_null_buffer) { +/// Logical 'and' boolean values with Kleene logic +/// +/// # Behavior +/// +/// This function behaves as follows with nulls: +/// +/// * `true` and `null` = `null` +/// * `null` and `true` = `null` +/// * `false` and `null` = `false` +/// * `null` and `false` = `false` +/// * `null` and `null` = `null` +/// +/// In other words, in this context a null value really means \"unknown\", +/// and an unknown value 'and' false is always false. +/// For a different null behavior, see function \"and\". +/// +/// # Example +/// +/// ```rust +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::and_kleene; +/// let a = BooleanArray::from(vec![Some(true), Some(false), None]); +/// let b = BooleanArray::from(vec![None, None, None]); +/// let and_ab = and_kleene(&a, &b).unwrap(); +/// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None])); +/// ``` +/// +/// # Fails +/// +/// If the operands have different lengths +pub fn and_kleene( + left: &BooleanArray, + right: &BooleanArray, +) -> Result { + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform bitwise operation on arrays of different length".to_string(), + )); + } + + let left_values = left.values(); + let right_values = right.values(); + + let buffer = match (left.nulls(), right.nulls()) { (None, None) => None, (Some(left_null_buffer), None) => { // The right side has no null values. @@ -57,9 +82,9 @@ pub(crate) fn build_null_buffer_for_and_kleene( Some(bitwise_bin_op_helper( left_null_buffer.buffer(), left_null_buffer.offset(), - right_buffer, - right_data.offset(), - left_data.len(), + right_values.inner(), + right_values.offset(), + left.len(), |a, b| a | !b, )) } @@ -68,9 +93,9 @@ pub(crate) fn build_null_buffer_for_and_kleene( Some(bitwise_bin_op_helper( right_null_buffer.buffer(), right_null_buffer.offset(), - left_buffer, - left_data.offset(), - left_data.len(), + left_values.inner(), + left_values.offset(), + left.len(), |a, b| a | !b, )) } @@ -83,44 +108,69 @@ pub(crate) fn build_null_buffer_for_and_kleene( Some(bitwise_quaternary_op_helper( [ left_null_buffer.buffer(), - left_buffer, + left_values.inner(), right_null_buffer.buffer(), - right_buffer, + right_values.inner(), ], [ left_null_buffer.offset(), - left_data.offset(), + left_values.offset(), right_null_buffer.offset(), - right_data.offset(), + right_values.offset(), ], - left_data.len(), + left.len(), |a, b, c, d| (a | (c & !d)) & (c | (a & !b)), )) } - } + }; + let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len()))); + Ok(BooleanArray::new(left_values & right_values, nulls)) } -/// For AND/OR kernels, the result of null buffer is simply a bitwise `and` operation. -pub(crate) fn build_null_buffer_for_and_or( - left_data: &ArrayData, - right_data: &ArrayData, -) -> Option { - // `arrays` are not empty, so safely do `unwrap` directly. - combine_option_bitmap(&[left_data, right_data], left_data.len()) -} +/// Logical 'or' boolean values with Kleene logic +/// +/// # Behavior +/// +/// This function behaves as follows with nulls: +/// +/// * `true` or `null` = `true` +/// * `null` or `true` = `true` +/// * `false` or `null` = `null` +/// * `null` or `false` = `null` +/// * `null` or `null` = `null` +/// +/// In other words, in this context a null value really means \"unknown\", +/// and an unknown value 'or' true is always true. +/// For a different null behavior, see function \"or\". +/// +/// # Example +/// +/// ```rust +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::or_kleene; +/// let a = BooleanArray::from(vec![Some(true), Some(false), None]); +/// let b = BooleanArray::from(vec![None, None, None]); +/// let or_ab = or_kleene(&a, &b).unwrap(); +/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None])); +/// ``` +/// +/// # Fails +/// +/// If the operands have different lengths +pub fn or_kleene( + left: &BooleanArray, + right: &BooleanArray, +) -> Result { + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform bitwise operation on arrays of different length".to_string(), + )); + } + + let left_values = left.values(); + let right_values = right.values(); -/// Updates null buffer based on data buffer and null buffer of the operand at other side -/// in boolean OR kernel with Kleene logic. In short, because for OR kernel, null OR true -/// results true. So we cannot simply AND two null buffers. This function updates null -/// buffer of one side if other side is a true value. -pub(crate) fn build_null_buffer_for_or_kleene( - left_data: &ArrayData, - right_data: &ArrayData, -) -> Option { - let left_buffer = &left_data.buffers()[0]; - let right_buffer = &right_data.buffers()[0]; - - match (left_data.nulls(), right_data.nulls()) { + let buffer = match (left.nulls(), right.nulls()) { (None, None) => None, (Some(left_nulls), None) => { // The right side has no null values. @@ -130,9 +180,9 @@ pub(crate) fn build_null_buffer_for_or_kleene( Some(bitwise_bin_op_helper( left_nulls.buffer(), left_nulls.offset(), - right_buffer, - right_data.offset(), - right_data.len(), + right_values.inner(), + right_values.offset(), + left.len(), |a, b| a | b, )) } @@ -141,9 +191,9 @@ pub(crate) fn build_null_buffer_for_or_kleene( Some(bitwise_bin_op_helper( right_nulls.buffer(), right_nulls.offset(), - left_buffer, - left_data.offset(), - left_data.len(), + left_values.inner(), + left_values.offset(), + left.len(), |a, b| a | b, )) } @@ -156,33 +206,34 @@ pub(crate) fn build_null_buffer_for_or_kleene( Some(bitwise_quaternary_op_helper( [ left_nulls.buffer(), - left_buffer, + left_values.inner(), right_nulls.buffer(), - right_buffer, + right_values.inner(), ], [ left_nulls.offset(), - left_data.offset(), + left_values.offset(), right_nulls.offset(), - right_data.offset(), + right_values.offset(), ], - left_data.len(), + left.len(), |a, b, c, d| (a | (c & d)) & (c | (a & b)), )) } - } + }; + + let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len()))); + Ok(BooleanArray::new(left_values | right_values, nulls)) } /// Helper function to implement binary kernels -pub(crate) fn binary_boolean_kernel( +pub(crate) fn binary_boolean_kernel( left: &BooleanArray, right: &BooleanArray, op: F, - null_op: U, ) -> Result where - F: Fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer, - U: Fn(&ArrayData, &ArrayData) -> Option, + F: Fn(&BooleanBuffer, &BooleanBuffer) -> BooleanBuffer, { if left.len() != right.len() { return Err(ArrowError::ComputeError( @@ -190,32 +241,9 @@ where )); } - let len = left.len(); - - let left_data = left.data_ref(); - let right_data = right.data_ref(); - - let left_buffer = &left_data.buffers()[0]; - let right_buffer = &right_data.buffers()[0]; - let left_offset = left.offset(); - let right_offset = right.offset(); - - let null_bit_buffer = null_op(left_data, right_data); - - let values = op(left_buffer, left_offset, right_buffer, right_offset, len); - - let data = unsafe { - ArrayData::new_unchecked( - DataType::Boolean, - len, - None, - null_bit_buffer, - 0, - vec![values], - vec![], - ) - }; - Ok(BooleanArray::from(data)) + let nulls = NullBuffer::union(left.nulls(), right.nulls()); + let values = op(left.values(), right.values()); + Ok(BooleanArray::new(values, nulls)) } /// Performs `AND` operation on two arrays. If either left or right value is null then the @@ -235,49 +263,7 @@ pub fn and( left: &BooleanArray, right: &BooleanArray, ) -> Result { - binary_boolean_kernel(left, right, buffer_bin_and, build_null_buffer_for_and_or) -} - -/// Logical 'and' boolean values with Kleene logic -/// -/// # Behavior -/// -/// This function behaves as follows with nulls: -/// -/// * `true` and `null` = `null` -/// * `null` and `true` = `null` -/// * `false` and `null` = `false` -/// * `null` and `false` = `false` -/// * `null` and `null` = `null` -/// -/// In other words, in this context a null value really means \"unknown\", -/// and an unknown value 'and' false is always false. -/// For a different null behavior, see function \"and\". -/// -/// # Example -/// -/// ```rust -/// # use arrow_array::BooleanArray; -/// # use arrow_arith::boolean::and_kleene; -/// let a = BooleanArray::from(vec![Some(true), Some(false), None]); -/// let b = BooleanArray::from(vec![None, None, None]); -/// let and_ab = and_kleene(&a, &b).unwrap(); -/// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None])); -/// ``` -/// -/// # Fails -/// -/// If the operands have different lengths -pub fn and_kleene( - left: &BooleanArray, - right: &BooleanArray, -) -> Result { - binary_boolean_kernel( - left, - right, - buffer_bin_and, - build_null_buffer_for_and_kleene, - ) + binary_boolean_kernel(left, right, |a, b| a & b) } /// Performs `OR` operation on two arrays. If either left or right value is null then the @@ -294,44 +280,7 @@ pub fn and_kleene( /// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), Some(true), None])); /// ``` pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result { - binary_boolean_kernel(left, right, buffer_bin_or, build_null_buffer_for_and_or) -} - -/// Logical 'or' boolean values with Kleene logic -/// -/// # Behavior -/// -/// This function behaves as follows with nulls: -/// -/// * `true` or `null` = `true` -/// * `null` or `true` = `true` -/// * `false` or `null` = `null` -/// * `null` or `false` = `null` -/// * `null` or `null` = `null` -/// -/// In other words, in this context a null value really means \"unknown\", -/// and an unknown value 'or' true is always true. -/// For a different null behavior, see function \"or\". -/// -/// # Example -/// -/// ```rust -/// # use arrow_array::BooleanArray; -/// # use arrow_arith::boolean::or_kleene; -/// let a = BooleanArray::from(vec![Some(true), Some(false), None]); -/// let b = BooleanArray::from(vec![None, None, None]); -/// let or_ab = or_kleene(&a, &b).unwrap(); -/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None])); -/// ``` -/// -/// # Fails -/// -/// If the operands have different lengths -pub fn or_kleene( - left: &BooleanArray, - right: &BooleanArray, -) -> Result { - binary_boolean_kernel(left, right, buffer_bin_or, build_null_buffer_for_or_kleene) + binary_boolean_kernel(left, right, |a, b| a | b) } /// Performs unary `NOT` operation on an arrays. If value is null then the result is also @@ -347,26 +296,9 @@ pub fn or_kleene( /// assert_eq!(not_a, BooleanArray::from(vec![Some(true), Some(false), None])); /// ``` pub fn not(left: &BooleanArray) -> Result { - let left_offset = left.offset(); - let len = left.len(); - - let data = left.data_ref(); - let null_bit_buffer = data.nulls().map(|b| b.inner().sliced()); - - let values = buffer_unary_not(data.buffers()[0], left_offset, len); - - let data = unsafe { - ArrayData::new_unchecked( - DataType::Boolean, - len, - None, - null_bit_buffer, - 0, - vec![values], - vec![], - ) - }; - Ok(BooleanArray::from(data)) + let nulls = left.nulls().cloned(); + let values = !left.values(); + Ok(BooleanArray::new(values, nulls)) } /// Returns a non-null [BooleanArray] with whether each value of the array is null. @@ -381,29 +313,12 @@ pub fn not(left: &BooleanArray) -> Result { /// assert_eq!(a_is_null, BooleanArray::from(vec![false, false, true])); /// ``` pub fn is_null(input: &dyn Array) -> Result { - let len = input.len(); - - let output = match input.data_ref().nulls() { - None => { - let len_bytes = ceil(len, 8); - MutableBuffer::from_len_zeroed(len_bytes).into() - } - Some(nulls) => buffer_unary_not(nulls.buffer(), nulls.offset(), nulls.len()), + let values = match input.nulls() { + None => NullBuffer::new_null(input.len()).into_inner(), + Some(nulls) => !nulls.inner(), }; - let data = unsafe { - ArrayData::new_unchecked( - DataType::Boolean, - len, - None, - None, - 0, - vec![output], - vec![], - ) - }; - - Ok(BooleanArray::from(data)) + Ok(BooleanArray::new(values, None)) } /// Returns a non-null [BooleanArray] with whether each value of the array is not null. @@ -420,7 +335,7 @@ pub fn is_null(input: &dyn Array) -> Result { pub fn is_not_null(input: &dyn Array) -> Result { let len = input.len(); - let output = match input.data_ref().nulls() { + let output = match input.nulls() { None => { let len_bytes = ceil(len, 8); MutableBuffer::new(len_bytes) diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index 0862230a499e..49a184369801 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -294,7 +294,7 @@ impl DictionaryArray { /// Returns a clone of the value type of this list. pub fn value_type(&self) -> DataType { - self.values.data_ref().data_type().clone() + self.values.data_type().clone() } /// The length of the dictionary is the length of the keys array. diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index 062961a20abb..af51ff787722 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -425,7 +425,7 @@ impl From for FixedSizeBinaryArray { .len(v.len()) .offset(v.offset()) .add_buffer(child_data.buffers()[0].slice(child_data.offset())) - .nulls(v.data_ref().nulls().cloned()); + .nulls(v.nulls().cloned()); let data = unsafe { builder.build_unchecked() }; Self::from(data) diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index 7d65927cdeec..0910e2944f76 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -76,7 +76,7 @@ impl FixedSizeListArray { /// Returns a clone of the value type of this list. pub fn value_type(&self) -> DataType { - self.values.data_ref().data_type().clone() + self.values.data_type().clone() } /// Returns ith value of this list array. diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index dca256008db2..c7e2a817ba33 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -85,7 +85,7 @@ impl GenericListArray { /// Returns a clone of the value type of this list. pub fn value_type(&self) -> DataType { - self.values.data_ref().data_type().clone() + self.values.data_type().clone() } /// Returns ith value of this list array. diff --git a/arrow-array/src/array/null_array.rs b/arrow-array/src/array/null_array.rs index fba6e41e871d..3d65e9e9ebad 100644 --- a/arrow-array/src/array/null_array.rs +++ b/arrow-array/src/array/null_array.rs @@ -97,7 +97,7 @@ impl Array for NullArray { /// Returns the total number of null values in this array. /// The null count of a `NullArray` always equals its length. fn null_count(&self) -> usize { - self.data_ref().len() + self.len() } } diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index ab6460e835f9..c6fedb960345 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -22,7 +22,7 @@ use arrow_array::builder::BufferBuilder; use arrow_array::cast::*; use arrow_array::types::*; use arrow_array::*; -use arrow_buffer::{ArrowNativeType, MutableBuffer}; +use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer}; use arrow_data::ArrayData; use arrow_data::ArrayDataBuilder; use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit}; @@ -1145,9 +1145,9 @@ where } type LexicographicalCompareItem<'a> = ( - &'a ArrayData, // data - DynComparator, // comparator - SortOptions, // sort_option + Option<&'a NullBuffer>, // nulls + DynComparator, // comparator + SortOptions, // sort_option ); /// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data @@ -1159,8 +1159,13 @@ pub struct LexicographicalComparator<'a> { impl LexicographicalComparator<'_> { /// lexicographically compare values at the wrapped columns with given indices. pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering { - for (data, comparator, sort_option) in &self.compare_items { - match (data.is_valid(a_idx), data.is_valid(b_idx)) { + for (nulls, comparator, sort_option) in &self.compare_items { + let (lhs_valid, rhs_valid) = match nulls { + Some(n) => (n.is_valid(a_idx), n.is_valid(b_idx)), + None => (true, true), + }; + + match (lhs_valid, rhs_valid) { (true, true) => { match (comparator)(a_idx, b_idx) { // equal, move on to next column @@ -1205,11 +1210,9 @@ impl LexicographicalComparator<'_> { .iter() .map(|column| { // flatten and convert build comparators - // use ArrayData for is_valid checks later to avoid dynamic call let values = column.values.as_ref(); - let data = values.data_ref(); Ok(( - data, + values.nulls(), build_compare(values, values)?, column.options.unwrap_or_default(), )) diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index 14fd5d9d1d32..567aaa58e8bf 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -53,11 +53,7 @@ pub struct SlicesIterator<'a>(BitSliceIterator<'a>); impl<'a> SlicesIterator<'a> { pub fn new(filter: &'a BooleanArray) -> Self { - let values = &filter.data_ref().buffers()[0]; - let len = filter.len(); - let offset = filter.offset(); - - Self(BitSliceIterator::new(values, offset, len)) + Self(filter.values().set_slices()) } } @@ -149,18 +145,9 @@ pub fn build_filter(filter: &BooleanArray) -> Result { /// Remove null values by do a bitmask AND operation with null bits and the boolean bits. pub fn prep_null_mask_filter(filter: &BooleanArray) -> BooleanArray { - let array_data = filter.data_ref(); - let nulls = array_data.nulls().unwrap(); + let nulls = filter.nulls().unwrap(); let mask = filter.values() & nulls.inner(); - - let array_data = ArrayData::builder(DataType::Boolean) - .len(mask.len()) - .offset(mask.offset()) - .add_buffer(mask.into_inner()); - - let array_data = unsafe { array_data.build_unchecked() }; - - BooleanArray::from(array_data) + BooleanArray::new(mask, None) } /// Filters an [Array], returning elements matching the filter (i.e. where the values are true). @@ -365,9 +352,10 @@ fn filter_array( t => unimplemented!("Filter not supported for dictionary type {:?}", t) } _ => { + let data = values.to_data(); // fallback to using MutableArrayData let mut mutable = MutableArrayData::new( - vec![values.data_ref()], + vec![&data], false, predicate.count, ); diff --git a/arrow/benches/mutable_array.rs b/arrow/benches/mutable_array.rs index 3a42ec1be3c3..b04e5cd84926 100644 --- a/arrow/benches/mutable_array.rs +++ b/arrow/benches/mutable_array.rs @@ -39,7 +39,8 @@ fn create_slices(size: usize) -> Vec<(usize, usize)> { } fn bench(v1: &T, slices: &[(usize, usize)]) { - let mut mutable = MutableArrayData::new(vec![v1.data_ref()], false, 5); + let data = v1.to_data(); + let mut mutable = MutableArrayData::new(vec![&data], false, 5); for (start, end) in slices { mutable.extend(0, *start, *end) } diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs index b6f81f6a4c1a..af81b17e4aa8 100644 --- a/arrow/tests/array_equal.rs +++ b/arrow/tests/array_equal.rs @@ -856,7 +856,7 @@ fn test_struct_equal_null() { )])) .null_bit_buffer(Some(Buffer::from(vec![0b00011110]))) .len(5) - .add_child_data(a.data_ref().clone()) + .add_child_data(a.to_data()) .build() .unwrap(); let a = make_array(a); @@ -920,7 +920,7 @@ fn test_struct_equal_null_variable_size() { )])) .null_bit_buffer(Some(Buffer::from(vec![0b00001010]))) .len(5) - .add_child_data(strings1.data_ref().clone()) + .add_child_data(strings1.to_data()) .build() .unwrap(); let a = make_array(a); @@ -932,7 +932,7 @@ fn test_struct_equal_null_variable_size() { )])) .null_bit_buffer(Some(Buffer::from(vec![0b00001010]))) .len(5) - .add_child_data(strings2.data_ref().clone()) + .add_child_data(strings2.to_data()) .build() .unwrap(); let b = make_array(b); diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs index b470be5ad408..91e839fc1890 100644 --- a/parquet/src/arrow/array_reader/struct_array.rs +++ b/parquet/src/arrow/array_reader/struct_array.rs @@ -257,7 +257,7 @@ mod tests { assert_eq!( vec![true, false, false, false, false], (0..5) - .map(|idx| struct_array.data_ref().is_null(idx)) + .map(|idx| struct_array.is_null(idx)) .collect::>() ); assert_eq!(