diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs index ccce3cda9989..be861474f659 100644 --- a/arrow-array/src/array/binary_array.rs +++ b/arrow-array/src/array/binary_array.rs @@ -683,7 +683,7 @@ mod tests { let data = vec![None]; let array = BinaryArray::from(data); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } @@ -693,7 +693,7 @@ mod tests { let data = vec![None]; let array = LargeBinaryArray::from(data); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index 343fed76846a..04e40b5e6f3e 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -252,11 +252,11 @@ impl DictionaryArray { // Note: This use the ArrayDataBuilder::build_unchecked and afterwards // call the new function which only validates that the keys are in bounds. - let data = keys.data().clone(); + let data = keys.to_data(); let builder = data .into_builder() .data_type(dict_data_type) - .add_child_data(values.data().clone()); + .add_child_data(values.to_data()); // Safety: `validate` ensures key type is correct, and // `validate_values` ensures all offsets are within range @@ -397,7 +397,7 @@ impl DictionaryArray { Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()), )) - .child_data(vec![values.data().clone()]); + .child_data(vec![values.to_data()]); // SAFETY: // Offsets were valid before and verified length is greater than or equal @@ -1076,7 +1076,7 @@ mod tests { let boxed: ArrayRef = Arc::new(dict_array); let col: DictionaryArray = - DictionaryArray::::from(boxed.data().clone()); + DictionaryArray::::from(boxed.to_data()); let err = col.into_primitive_dict_builder::(); let returned = err.unwrap_err(); diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs index 3d78387cdf50..30cd0dec455c 100644 --- a/arrow-array/src/array/map_array.rs +++ b/arrow-array/src/array/map_array.rs @@ -189,7 +189,7 @@ impl MapArray { let entry_struct = StructArray::from(vec![ (keys_field, Arc::new(keys_data) as ArrayRef), - (values_field, make_array(values.data().clone())), + (values_field, make_array(values.to_data())), ]); let map_data_type = DataType::Map( diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 589cf1eaf4aa..41d5c8bebe29 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -290,7 +290,7 @@ impl Array for ArrayRef { } fn into_data(self) -> ArrayData { - self.data().clone() + self.to_data() } #[allow(deprecated)] @@ -357,7 +357,7 @@ impl<'a, T: Array> Array for &'a T { } fn into_data(self) -> ArrayData { - self.data().clone() + self.to_data() } #[allow(deprecated)] diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index f857e26c7f89..e8ad1fec5ca4 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1185,7 +1185,7 @@ impl PrimitiveArray { pub fn precision(&self) -> u8 { match T::BYTE_LENGTH { 16 => { - if let DataType::Decimal128(p, _) = self.data().data_type() { + if let DataType::Decimal128(p, _) = self.data_type() { *p } else { unreachable!( @@ -1195,7 +1195,7 @@ impl PrimitiveArray { } } 32 => { - if let DataType::Decimal256(p, _) = self.data().data_type() { + if let DataType::Decimal256(p, _) = self.data_type() { *p } else { unreachable!( @@ -1212,7 +1212,7 @@ impl PrimitiveArray { pub fn scale(&self) -> i8 { match T::BYTE_LENGTH { 16 => { - if let DataType::Decimal128(_, s) = self.data().data_type() { + if let DataType::Decimal128(_, s) = self.data_type() { *s } else { unreachable!( @@ -1222,7 +1222,7 @@ impl PrimitiveArray { } } 32 => { - if let DataType::Decimal256(_, s) = self.data().data_type() { + if let DataType::Decimal256(_, s) = self.data_type() { *s } else { unreachable!( @@ -1874,7 +1874,7 @@ mod tests { let array = PrimitiveArray::::from(values.clone()); assert_eq!(array.values(), &values); - let array = PrimitiveArray::::from(array.data().clone()); + let array = PrimitiveArray::::from(array.to_data()); assert_eq!(array.values(), &values); } @@ -1894,7 +1894,7 @@ mod tests { let array = PrimitiveArray::::from(values.clone()); assert_eq!(array.values(), &values); - let array = PrimitiveArray::::from(array.data().clone()); + let array = PrimitiveArray::::from(array.to_data()); assert_eq!(array.values(), &values); } @@ -2190,7 +2190,7 @@ mod tests { let boxed: ArrayRef = Arc::new(array); - let col: Int32Array = PrimitiveArray::::from(boxed.data().clone()); + let col: Int32Array = PrimitiveArray::::from(boxed.to_data()); let err = col.into_builder(); match err { diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs index ada34b47f8a5..287c9efa0195 100644 --- a/arrow-array/src/array/run_array.rs +++ b/arrow-array/src/array/run_array.rs @@ -104,8 +104,8 @@ impl RunArray { let len = RunArray::logical_len(run_ends); let builder = ArrayDataBuilder::new(ree_array_type) .len(len) - .add_child_data(run_ends.data().clone()) - .add_child_data(values.data().clone()); + .add_child_data(run_ends.to_data()) + .add_child_data(values.to_data()); // `build_unchecked` is used to avoid recursive validation of child arrays. let array_data = unsafe { builder.build_unchecked() }; diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index 304f0ab3eee9..e042f29c22d1 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -456,7 +456,7 @@ mod tests { let data: Vec> = vec![None]; let array = StringArray::from(data); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } @@ -466,7 +466,7 @@ mod tests { let data: Vec> = vec![None]; let array = LargeStringArray::from(data); array - .data() + .into_data() .validate_full() .expect("All null array has valid array data"); } diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs index 5f726a5b121c..adaabe9a9e40 100644 --- a/arrow-array/src/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -17,7 +17,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::{ArrayBuilder, BufferBuilder}; -use crate::{ArrayRef, GenericListArray, OffsetSizeTrait}; +use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::Field; @@ -228,7 +228,7 @@ where pub fn finish(&mut self) -> GenericListArray { let len = self.len(); let values_arr = self.values_builder.finish(); - let values_data = values_arr.data(); + let values_data = values_arr.to_data(); let offset_buffer = self.offsets_builder.finish(); let null_bit_buffer = self.null_buffer_builder.finish(); @@ -242,7 +242,7 @@ where let array_data_builder = ArrayData::builder(data_type) .len(len) .add_buffer(offset_buffer) - .add_child_data(values_data.clone()) + .add_child_data(values_data) .null_bit_buffer(null_bit_buffer); let array_data = unsafe { array_data_builder.build_unchecked() }; @@ -254,7 +254,7 @@ where pub fn finish_cloned(&self) -> GenericListArray { let len = self.len(); let values_arr = self.values_builder.finish_cloned(); - let values_data = values_arr.data(); + let values_data = values_arr.to_data(); let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice()); let null_bit_buffer = self @@ -270,7 +270,7 @@ where let array_data_builder = ArrayData::builder(data_type) .len(len) .add_buffer(offset_buffer) - .add_child_data(values_data.clone()) + .add_child_data(values_data) .null_bit_buffer(null_bit_buffer); let array_data = unsafe { array_data_builder.build_unchecked() }; diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 499ae183f3e9..6471811d82d6 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -233,7 +233,7 @@ impl StructBuilder { let mut child_data = Vec::with_capacity(self.field_builders.len()); for f in &mut self.field_builders { let arr = f.finish(); - child_data.push(arr.data().clone()); + child_data.push(arr.to_data()); } let length = self.len(); let null_bit_buffer = self.null_buffer_builder.finish(); @@ -254,7 +254,7 @@ impl StructBuilder { let mut child_data = Vec::with_capacity(self.field_builders.len()); for f in &self.field_builders { let arr = f.finish_cloned(); - child_data.push(arr.data().clone()); + child_data.push(arr.to_data()); } let length = self.len(); let null_bit_buffer = self diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 5d7bea0e9d0f..0ea6332a7ea5 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -634,7 +634,7 @@ pub fn cast_with_options( let from_type = array.data_type(); // clone array if types are the same if from_type == to_type { - return Ok(make_array(array.data().clone())); + return Ok(make_array(array.to_data())); } match (from_type, to_type) { ( @@ -3108,7 +3108,7 @@ fn dictionary_cast( })?; let keys_array: ArrayRef = - Arc::new(PrimitiveArray::::from(dict_array.keys().data().clone())); + Arc::new(PrimitiveArray::::from(dict_array.keys().to_data())); let values_array = dict_array.values(); let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?; let cast_values = @@ -3182,7 +3182,7 @@ where // Note take requires first casting the indices to u32 let keys_array: ArrayRef = - Arc::new(PrimitiveArray::::from(dict_array.keys().data().clone())); + Arc::new(PrimitiveArray::::from(dict_array.keys().to_data())); let indices = cast_with_options(&keys_array, &DataType::UInt32, cast_options)?; let u32_indices = indices @@ -3379,7 +3379,7 @@ fn cast_list_inner( to_type: &DataType, cast_options: &CastOptions, ) -> Result { - let data = array.data().clone(); + let data = array.to_data(); let underlying_array = make_array(data.child_data()[0].clone()); let cast_array = cast_with_options(underlying_array.as_ref(), to.data_type(), cast_options)?; diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs index e68e064c775d..c290cb7d0265 100644 --- a/arrow-ord/src/comparison.rs +++ b/arrow-ord/src/comparison.rs @@ -1192,7 +1192,7 @@ where { // TODO: Use take_boolean (#2967) let array = take(&dict_comparison, dict.keys(), None)?; - Ok(BooleanArray::from(array.data().clone())) + Ok(BooleanArray::from(array.to_data())) } /// Helper function to perform boolean lambda function on values from two arrays using diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs index db1fff6d3e2f..bfe74d9e3e7a 100644 --- a/arrow-ord/src/ord.rs +++ b/arrow-ord/src/ord.rs @@ -17,6 +17,7 @@ //! Contains functions and function factories to compare arrays. +use arrow_array::cast::AsArray; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::ArrowNativeType; @@ -33,21 +34,21 @@ fn compare_primitives( where T::Native: ArrowNativeTypeOp, { - let left: PrimitiveArray = PrimitiveArray::from(left.data().clone()); - let right: PrimitiveArray = PrimitiveArray::from(right.data().clone()); + let left: PrimitiveArray = PrimitiveArray::from(left.to_data()); + let right: PrimitiveArray = PrimitiveArray::from(right.to_data()); Box::new(move |i, j| left.value(i).compare(right.value(j))) } fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left: BooleanArray = BooleanArray::from(left.data().clone()); - let right: BooleanArray = BooleanArray::from(right.data().clone()); + let left: BooleanArray = BooleanArray::from(left.to_data()); + let right: BooleanArray = BooleanArray::from(right.to_data()); Box::new(move |i, j| left.value(i).cmp(&right.value(j))) } fn compare_string(left: &dyn Array, right: &dyn Array) -> DynComparator { - let left: StringArray = StringArray::from(left.data().clone()); - let right: StringArray = StringArray::from(right.data().clone()); + let left: StringArray = StringArray::from(left.to_data()); + let right: StringArray = StringArray::from(right.to_data()); Box::new(move |i, j| left.value(i).cmp(right.value(j))) } @@ -58,15 +59,13 @@ where V: ArrowPrimitiveType, V::Native: ArrowNativeTypeOp, { - let left = left.as_any().downcast_ref::>().unwrap(); - let right = right.as_any().downcast_ref::>().unwrap(); + let left = left.as_dictionary::(); + let right = right.as_dictionary::(); - let left_keys: PrimitiveArray = PrimitiveArray::from(left.keys().data().clone()); - let right_keys: PrimitiveArray = PrimitiveArray::from(right.keys().data().clone()); - let left_values: PrimitiveArray = - PrimitiveArray::from(left.values().data().clone()); - let right_values: PrimitiveArray = - PrimitiveArray::from(right.values().data().clone()); + let left_keys: PrimitiveArray = PrimitiveArray::from(left.keys().to_data()); + let right_keys: PrimitiveArray = PrimitiveArray::from(right.keys().to_data()); + let left_values: PrimitiveArray = left.values().to_data().into(); + let right_values: PrimitiveArray = right.values().to_data().into(); Box::new(move |i: usize, j: usize| { let key_left = left_keys.value(i).as_usize(); @@ -81,13 +80,13 @@ fn compare_dict_string(left: &dyn Array, right: &dyn Array) -> DynComparator where T: ArrowDictionaryKeyType, { - let left = left.as_any().downcast_ref::>().unwrap(); - let right = right.as_any().downcast_ref::>().unwrap(); + let left = left.as_dictionary::(); + let right = right.as_dictionary::(); - let left_keys: PrimitiveArray = PrimitiveArray::from(left.keys().data().clone()); - let right_keys: PrimitiveArray = PrimitiveArray::from(right.keys().data().clone()); - let left_values = StringArray::from(left.values().data().clone()); - let right_values = StringArray::from(right.values().data().clone()); + let left_keys: PrimitiveArray = PrimitiveArray::from(left.keys().to_data()); + let right_keys: PrimitiveArray = PrimitiveArray::from(right.keys().to_data()); + let left_values = StringArray::from(left.values().to_data()); + let right_values = StringArray::from(right.values().to_data()); Box::new(move |i: usize, j: usize| { let key_left = left_keys.value(i).as_usize(); @@ -264,10 +263,8 @@ pub fn build_compare( } } (FixedSizeBinary(_), FixedSizeBinary(_)) => { - let left: FixedSizeBinaryArray = - FixedSizeBinaryArray::from(left.data().clone()); - let right: FixedSizeBinaryArray = - FixedSizeBinaryArray::from(right.data().clone()); + let left: FixedSizeBinaryArray = left.to_data().into(); + let right: FixedSizeBinaryArray = right.to_data().into(); Box::new(move |i, j| left.value(i).cmp(right.value(j))) } diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 56b3ec2b36b0..18d18606015f 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -2171,7 +2171,7 @@ mod tests { .into_data() .into_builder() .data_type(data_type) - .add_child_data(values.data().clone()) + .add_child_data(values.to_data()) .build() .unwrap(); diff --git a/arrow-row/src/list.rs b/arrow-row/src/list.rs index e232e717c9e8..e4ff878dd135 100644 --- a/arrow-row/src/list.rs +++ b/arrow-row/src/list.rs @@ -164,7 +164,7 @@ pub unsafe fn decode( let child = converter.convert_raw(&mut child_rows, validate_utf8)?; assert_eq!(child.len(), 1); - let child_data = child[0].data().clone(); + let child_data = child[0].to_data(); let builder = ArrayDataBuilder::new(field.data_type.clone()) .len(rows.len()) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 2befcd05447a..e2f3630bdfbc 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -2086,7 +2086,7 @@ mod tests { .downcast_ref::>() .unwrap(); - let result_values: StringArray = result.values().data().clone().into(); + let result_values: StringArray = result.values().to_data().into(); // dictionary values should stay the same let expected_values = StringArray::from(vec!["foo", "bar", ""]); diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs index 7b6c7d50cac3..383ac5fd11c6 100644 --- a/arrow-string/src/like.rs +++ b/arrow-string/src/like.rs @@ -152,7 +152,7 @@ pub fn $fn_name( let dict_comparison = $fn_name(left.values().as_ref(), right)?; // TODO: Use take_boolean (#2967) let array = take(&dict_comparison, left.keys(), None)?; - Ok(BooleanArray::from(array.data().clone())) + Ok(BooleanArray::from(array.to_data())) } t => Err(ArrowError::ComputeError(format!( "Should be DictionaryArray but got: {}", t diff --git a/arrow/benches/array_data_validate.rs b/arrow/benches/array_data_validate.rs index 68fc66a635bc..529205e7e28f 100644 --- a/arrow/benches/array_data_validate.rs +++ b/arrow/benches/array_data_validate.rs @@ -37,8 +37,8 @@ fn create_binary_array_data(length: i32) -> ArrayData { .unwrap() } -fn validate_utf8_array(arr: &StringArray) { - arr.data().validate_values().unwrap(); +fn validate_utf8_array(arr: &ArrayData) { + arr.validate_values().unwrap(); } fn validate_benchmark(c: &mut Criterion) { @@ -48,7 +48,7 @@ fn validate_benchmark(c: &mut Criterion) { }); //Utf8 Array - let str_arr = StringArray::from(vec!["test"; 20000]); + let str_arr = StringArray::from(vec!["test"; 20000]).to_data(); c.bench_function("validate_utf8_array_data 20000", |b| { b.iter(|| validate_utf8_array(&str_arr)) }); diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs index 0249a70d168f..f40a04e82667 100644 --- a/arrow/src/array/ffi.rs +++ b/arrow/src/array/ffi.rs @@ -57,8 +57,8 @@ pub unsafe fn export_array_into_raw( out_array: *mut ffi::FFI_ArrowArray, out_schema: *mut ffi::FFI_ArrowSchema, ) -> Result<()> { - let data = src.data(); - let array = ffi::FFI_ArrowArray::new(data); + let data = src.to_data(); + let array = ffi::FFI_ArrowArray::new(&data); let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?; std::ptr::write_unaligned(out_array, array); @@ -101,22 +101,22 @@ mod tests { #[test] fn test_u32() -> Result<()> { let array = UInt32Array::from(vec![Some(2), None, Some(1), None]); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] fn test_u64() -> Result<()> { let array = UInt64Array::from(vec![Some(2), None, Some(1), None]); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] fn test_i64() -> Result<()> { let array = Int64Array::from(vec![Some(2), None, Some(1), None]); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -169,8 +169,8 @@ mod tests { ]); let array = DictionaryArray::try_new(&keys, &values)?; - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -178,8 +178,8 @@ mod tests { let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]]; let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?; - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -195,8 +195,8 @@ mod tests { let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?; - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -214,8 +214,8 @@ mod tests { .build()?; let array = FixedSizeListArray::from(list_data); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -240,8 +240,8 @@ mod tests { .build()?; let array = FixedSizeListArray::from(list_data); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } #[test] @@ -278,7 +278,7 @@ mod tests { let array = FixedSizeListArray::from(list_data); - let data = array.data(); - test_round_trip(data) + let data = array.into_data(); + test_round_trip(&data) } } diff --git a/arrow/src/compute/kernels/limit.rs b/arrow/src/compute/kernels/limit.rs index 74cbd2096bfd..097b8e949443 100644 --- a/arrow/src/compute/kernels/limit.rs +++ b/arrow/src/compute/kernels/limit.rs @@ -172,8 +172,8 @@ mod tests { assert_eq!(5, struct_array.len()); assert_eq!(1, struct_array.null_count()); - assert_eq!(&boolean_data, struct_array.column(0).data()); - assert_eq!(&int_data, struct_array.column(1).data()); + assert_eq!(boolean_data, struct_array.column(0).to_data()); + assert_eq!(int_data, struct_array.column(1).to_data()); let array: ArrayRef = Arc::new(struct_array); diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 7b26cf7f25a5..0af1b1111ca4 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -1104,7 +1104,7 @@ mod tests { )]); // export it - let array = ArrowArray::try_from(struct_array.data().clone())?; + let array = ArrowArray::try_from(struct_array.to_data())?; // (simulate consumer) import it let data = ArrayData::try_from(array)?; @@ -1128,7 +1128,7 @@ mod tests { let union = builder.build().unwrap(); // export it - let array = ArrowArray::try_from(union.data().clone())?; + let array = ArrowArray::try_from(union.to_data())?; // (simulate consumer) import it let data = ArrayData::try_from(array)?; diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index 0b0a06875432..c1094b127bba 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -289,8 +289,8 @@ mod tests { } // Test that the list's child values are non-null let b_array = batch.column(1); - let list_array = b_array.as_any().downcast_ref::().unwrap(); - let child_array = make_array(list_array.data().child_data()[0].clone()); + let list_array = b_array.as_list::(); + let child_array = list_array.values(); assert_eq!(child_array.null_count(), 0); // There should be more values than the list, to show that it's a list assert!(child_array.len() > list_array.len()); diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs index 37968ec6a055..93296c3b0e43 100644 --- a/arrow/tests/array_equal.rs +++ b/arrow/tests/array_equal.rs @@ -372,7 +372,7 @@ fn test_empty_offsets_list_equal() { )))) .len(0) .add_buffer(Buffer::from(&empty_offsets)) - .add_child_data(values.data().clone()) + .add_child_data(values.to_data()) .null_bit_buffer(Some(Buffer::from(&empty_offsets))) .build() .unwrap() @@ -385,7 +385,7 @@ fn test_empty_offsets_list_equal() { )))) .len(0) .add_buffer(Buffer::from(&empty_offsets)) - .add_child_data(values.data().clone()) + .add_child_data(values.to_data()) .null_bit_buffer(Some(Buffer::from(&empty_offsets))) .build() .unwrap() @@ -400,11 +400,7 @@ fn test_empty_offsets_list_equal() { )))) .len(0) .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice())) - .add_child_data( - Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]) - .data() - .clone(), - ) + .add_child_data(Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]).into_data()) .null_bit_buffer(Some(Buffer::from(vec![0b00001001]))) .build() .unwrap() diff --git a/parquet/src/arrow/array_reader/byte_array_dictionary.rs b/parquet/src/arrow/array_reader/byte_array_dictionary.rs index c4ed7e9070cc..9d24344bf14b 100644 --- a/parquet/src/arrow/array_reader/byte_array_dictionary.rs +++ b/parquet/src/arrow/array_reader/byte_array_dictionary.rs @@ -355,7 +355,8 @@ where assert_eq!(dict.data_type(), &self.value_type); - let dict_buffers = dict.data().buffers(); + let data = dict.data(); + let dict_buffers = data.buffers(); let dict_offsets = dict_buffers[0].typed_data::(); let dict_values = dict_buffers[1].as_slice(); @@ -391,8 +392,8 @@ where #[cfg(test)] mod tests { - use arrow_array::{Array, StringArray}; use arrow::compute::cast; + use arrow_array::{Array, StringArray}; use crate::arrow::array_reader::test_util::{ byte_array_all_encodings, encode_dictionary, utf8_column, diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs index 504591c0ca89..a6b354f902df 100644 --- a/parquet/src/arrow/array_reader/list_array.rs +++ b/parquet/src/arrow/array_reader/list_array.rs @@ -143,11 +143,9 @@ impl ArrayReader for ListArrayReader { let mut skipped = 0; // Builder used to construct the filtered child data, skipping empty lists and nulls - let mut child_data_builder = MutableArrayData::new( - vec![next_batch_array.data()], - false, - next_batch_array.len(), - ); + let data = next_batch_array.to_data(); + let mut child_data_builder = + MutableArrayData::new(vec![&data], false, next_batch_array.len()); def_levels.iter().zip(rep_levels).try_for_each(|(d, r)| { match r.cmp(&self.rep_level) { @@ -201,7 +199,7 @@ impl ArrayReader for ListArrayReader { let child_data = if skipped == 0 { // No filtered values - can reuse original array - next_batch_array.data().clone() + next_batch_array.to_data() } else { // One or more filtered values - must build new array if let Some(start) = filter_start.take() { diff --git a/parquet/src/arrow/array_reader/map_array.rs b/parquet/src/arrow/array_reader/map_array.rs index d7645a593505..9bfc047322a7 100644 --- a/parquet/src/arrow/array_reader/map_array.rs +++ b/parquet/src/arrow/array_reader/map_array.rs @@ -96,7 +96,7 @@ impl ArrayReader for MapArrayReader { // A MapArray is just a ListArray with a StructArray child // we can therefore just alter the ArrayData let array = self.reader.consume_batch().unwrap(); - let data = array.data().clone(); + let data = array.to_data(); let builder = data.into_builder().data_type(self.data_type.clone()); // SAFETY - we can assume that ListArrayReader produces valid ListArray diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs index 0670701a0375..11e019f29a59 100644 --- a/parquet/src/arrow/array_reader/struct_array.rs +++ b/parquet/src/arrow/array_reader/struct_array.rs @@ -17,7 +17,7 @@ use crate::arrow::array_reader::ArrayReader; use crate::errors::{ParquetError, Result}; -use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray}; +use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray, Array}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::DataType as ArrowType; use std::any::Any; @@ -130,7 +130,7 @@ impl ArrayReader for StructArrayReader { .child_data( children_array .iter() - .map(|x| x.data().clone()) + .map(|x| x.to_data()) .collect::>(), ); diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 4b88a33f3a25..57741283a2f9 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1761,7 +1761,7 @@ mod tests { let b = Arc::clone(batch.column(0)); assert_eq!(a.data_type(), b.data_type()); - assert_eq!(a.data(), b.data(), "{:#?} vs {:#?}", a.data(), b.data()); + assert_eq!(a.to_data(), b.to_data()); assert_eq!( a.as_any().type_id(), b.as_any().type_id(), @@ -1960,7 +1960,7 @@ mod tests { let batch = reader.into_iter().next().unwrap().unwrap(); assert_eq!(batch.schema().as_ref(), &expected_schema); assert_eq!(batch.num_rows(), 4); - assert_eq!(batch.column(0).data().null_count(), 2); + assert_eq!(batch.column(0).null_count(), 2); } #[test] @@ -2077,7 +2077,7 @@ mod tests { ); let get_dict = - |batch: &RecordBatch| batch.column(0).data().child_data()[0].clone(); + |batch: &RecordBatch| batch.column(0).to_data().child_data()[0].clone(); // First and second batch in same row group -> same dictionary assert_eq!(get_dict(&batches[0]), get_dict(&batches[1])); diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 4239f3fba59b..8ae44f92cffa 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -41,10 +41,9 @@ //! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding) use crate::errors::{ParquetError, Result}; -use arrow_array::{ - make_array, Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, StructArray, -}; -use arrow_data::ArrayData; +use arrow_array::cast::AsArray; +use arrow_array::{Array, ArrayRef, OffsetSizeTrait, StructArray}; +use arrow_buffer::NullBuffer; use arrow_schema::{DataType, Field}; use std::ops::Range; @@ -183,29 +182,37 @@ impl LevelInfoBuilder { self.write_leaf(array, range) } DataType::Struct(_) => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_struct(); self.write_struct(array, range) } DataType::List(_) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - self.write_list(array.value_offsets(), array.data(), range) + let array = array.as_list::(); + self.write_list( + array.value_offsets(), + array.nulls(), + array.values(), + range, + ) } DataType::LargeList(_) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - - self.write_list(array.value_offsets(), array.data(), range) + let array = array.as_list::(); + self.write_list( + array.value_offsets(), + array.nulls(), + array.values(), + range, + ) } DataType::Map(_, _) => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_map(); // A Map is just as ListArray with a StructArray child, we therefore // treat it as such to avoid code duplication - self.write_list(array.value_offsets(), array.data(), range) + self.write_list( + array.value_offsets(), + array.nulls(), + array.values(), + range, + ) } _ => unreachable!(), } @@ -217,7 +224,8 @@ impl LevelInfoBuilder { fn write_list( &mut self, offsets: &[O], - list_data: &ArrayData, + nulls: Option<&NullBuffer>, + values: &ArrayRef, range: Range, ) { let (child, ctx) = match self { @@ -226,11 +234,10 @@ impl LevelInfoBuilder { }; let offsets = &offsets[range.start..range.end + 1]; - let child_array = make_array(list_data.child_data()[0].clone()); let write_non_null_slice = |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| { - child.write(&child_array, start_idx..end_idx); + child.write(values, start_idx..end_idx); child.visit_leaves(|leaf| { let rep_levels = leaf.rep_levels.as_mut().unwrap(); let mut rev = rep_levels.iter_mut().rev(); @@ -270,7 +277,7 @@ impl LevelInfoBuilder { }) }; - match list_data.nulls() { + match nulls { Some(nulls) => { let null_offset = range.start; // TODO: Faster bitmask iteration (#1757) @@ -1243,7 +1250,7 @@ mod tests { let array = Arc::new(list_builder.finish()); - let values_len = array.data().child_data()[0].len(); + let values_len = array.values().len(); assert_eq!(values_len, 5); let schema = Arc::new(Schema::new(vec![list_field])); @@ -1278,7 +1285,7 @@ mod tests { ]); // This test assumes that nulls don't take up space - assert_eq!(inner.data().child_data()[0].len(), 7); + assert_eq!(inner.values().len(), 7); let field = Field::new("list", inner.data_type().clone(), true); let array = Arc::new(inner) as ArrayRef; diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 86f7764ec4cf..4cf54dc8897e 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -22,7 +22,7 @@ use std::io::Write; use std::sync::Arc; use arrow_array::cast::AsArray; -use arrow_array::types::Decimal128Type; +use arrow_array::types::{Decimal128Type, Int32Type, Int64Type, UInt32Type, UInt64Type}; use arrow_array::{types, Array, ArrayRef, RecordBatch}; use arrow_schema::{DataType as ArrowDataType, IntervalUnit, SchemaRef}; @@ -33,11 +33,12 @@ use super::schema::{ use crate::arrow::arrow_writer::byte_array::ByteArrayWriter; use crate::column::writer::{ColumnWriter, ColumnWriterImpl}; +use crate::data_type::{ByteArray, DataType, FixedLenByteArray}; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{KeyValue, RowGroupMetaDataPtr}; use crate::file::properties::WriterProperties; +use crate::file::writer::SerializedFileWriter; use crate::file::writer::SerializedRowGroupWriter; -use crate::{data_type::*, file::writer::SerializedFileWriter}; use levels::{calculate_array_levels, LevelInfo}; mod byte_array; @@ -292,16 +293,21 @@ fn write_leaves( } col_writer.close() } - ArrowDataType::List(_) | ArrowDataType::LargeList(_) => { + ArrowDataType::List(_) => { let arrays: Vec<_> = arrays.iter().map(|array|{ - // write the child list - let data = array.data(); - arrow_array::make_array(data.child_data()[0].clone()) + array.as_list::().values().clone() }).collect(); write_leaves(row_group_writer, &arrays, levels)?; Ok(()) } + ArrowDataType::LargeList(_) => { + let arrays: Vec<_> = arrays.iter().map(|array|{ + array.as_list::().values().clone() + }).collect(); + write_leaves(row_group_writer, &arrays, levels)?; + Ok(()) + } ArrowDataType::Struct(fields) => { // Groups child arrays by field let mut field_arrays = vec![Vec::with_capacity(arrays.len()); fields.len()]; @@ -384,19 +390,15 @@ fn write_leaf( let array = arrow_cast::cast(column, &ArrowDataType::Date32)?; let array = arrow_cast::cast(&array, &ArrowDataType::Int32)?; - let array = array - .as_any() - .downcast_ref::() - .expect("Unable to get int32 array"); + let array = array.as_primitive::(); write_primitive(typed, array.values(), levels)? } ArrowDataType::UInt32 => { - let data = column.data(); - let offset = data.offset(); + let values = column.as_primitive::().values(); // follow C++ implementation and use overflow/reinterpret cast from u32 to i32 which will map // `(i32::MAX as u32)..u32::MAX` to `i32::MIN..0` - let array: &[i32] = data.buffers()[0].typed_data(); - write_primitive(typed, &array[offset..offset + data.len()], levels)? + let array = values.inner().typed_data::(); + write_primitive(typed, array, levels)? } ArrowDataType::Decimal128(_, _) => { // use the int32 to represent the decimal with low precision @@ -407,19 +409,13 @@ fn write_leaf( } _ => { let array = arrow_cast::cast(column, &ArrowDataType::Int32)?; - let array = array - .as_any() - .downcast_ref::() - .expect("Unable to get i32 array"); + let array = array.as_primitive::(); write_primitive(typed, array.values(), levels)? } } } ColumnWriter::BoolColumnWriter(ref mut typed) => { - let array = column - .as_any() - .downcast_ref::() - .expect("Unable to get boolean array"); + let array = column.as_boolean(); typed.write_batch( get_bool_array_slice(array, indices).as_slice(), levels.def_levels(), @@ -429,19 +425,15 @@ fn write_leaf( ColumnWriter::Int64ColumnWriter(ref mut typed) => { match column.data_type() { ArrowDataType::Int64 => { - let array = column - .as_any() - .downcast_ref::() - .expect("Unable to get i64 array"); + let array = column.as_primitive::(); write_primitive(typed, array.values(), levels)? } ArrowDataType::UInt64 => { + let values = column.as_primitive::().values(); // follow C++ implementation and use overflow/reinterpret cast from u64 to i64 which will map // `(i64::MAX as u64)..u64::MAX` to `i64::MIN..0` - let data = column.data(); - let offset = data.offset(); - let array: &[i64] = data.buffers()[0].typed_data(); - write_primitive(typed, &array[offset..offset + data.len()], levels)? + let array = values.inner().typed_data::(); + write_primitive(typed, array, levels)? } ArrowDataType::Decimal128(_, _) => { // use the int64 to represent the decimal with low precision @@ -452,10 +444,7 @@ fn write_leaf( } _ => { let array = arrow_cast::cast(column, &ArrowDataType::Int64)?; - let array = array - .as_any() - .downcast_ref::() - .expect("Unable to get i64 array"); + let array = array.as_primitive::(); write_primitive(typed, array.values(), levels)? } } @@ -642,6 +631,7 @@ mod tests { use arrow_schema::Fields; use crate::basic::Encoding; + use crate::data_type::AsBytes; use crate::file::metadata::ParquetMetaData; use crate::file::page_index::index_reader::read_pages_locations; use crate::file::properties::{ReaderProperties, WriterVersion}; @@ -723,8 +713,8 @@ mod tests { assert_eq!(expected_batch.num_columns(), actual_batch.num_columns()); assert_eq!(expected_batch.num_rows(), actual_batch.num_rows()); for i in 0..expected_batch.num_columns() { - let expected_data = expected_batch.column(i).data().clone(); - let actual_data = actual_batch.column(i).data().clone(); + let expected_data = expected_batch.column(i).to_data(); + let actual_data = actual_batch.column(i).to_data(); assert_eq!(expected_data, actual_data); } @@ -779,7 +769,7 @@ mod tests { // build a record batch let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap(); - assert_eq!(batch.column(0).data().null_count(), 1); + assert_eq!(batch.column(0).null_count(), 1); // This test fails if the max row group size is less than the batch's length // see https://github.com/apache/arrow-rs/issues/518 @@ -821,7 +811,7 @@ mod tests { // This test fails if the max row group size is less than the batch's length // see https://github.com/apache/arrow-rs/issues/518 - assert_eq!(batch.column(0).data().null_count(), 0); + assert_eq!(batch.column(0).null_count(), 0); roundtrip(batch, None); } @@ -928,7 +918,7 @@ mod tests { let g_list_data = ArrayData::builder(struct_field_g.data_type().clone()) .len(5) .add_buffer(g_value_offsets.clone()) - .add_child_data(g_value.data().clone()) + .add_child_data(g_value.to_data()) .build() .unwrap(); let g = ListArray::from(g_list_data); @@ -936,7 +926,7 @@ mod tests { let h_list_data = ArrayData::builder(struct_field_h.data_type().clone()) .len(5) .add_buffer(g_value_offsets) - .add_child_data(g_value.data().clone()) + .add_child_data(g_value.to_data()) .null_bit_buffer(Some(Buffer::from(vec![0b00011011]))) .build() .unwrap(); @@ -1251,9 +1241,9 @@ mod tests { assert_eq!(expected_batch.num_columns(), actual_batch.num_columns()); assert_eq!(expected_batch.num_rows(), actual_batch.num_rows()); for i in 0..expected_batch.num_columns() { - let expected_data = expected_batch.column(i).data(); - let actual_data = actual_batch.column(i).data(); - validate(expected_data, actual_data); + let expected_data = expected_batch.column(i).to_data(); + let actual_data = actual_batch.column(i).to_data(); + validate(&expected_data, &actual_data); } file diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs index 23ebea57b5b2..529c28872642 100644 --- a/parquet/src/arrow/buffer/dictionary_buffer.rs +++ b/parquet/src/arrow/buffer/dictionary_buffer.rs @@ -107,7 +107,8 @@ impl Self::Values { values } => Ok(values), Self::Dict { keys, values } => { let mut spilled = OffsetBuffer::default(); - let dict_buffers = values.data().buffers(); + let data = values.to_data(); + let dict_buffers = data.buffers(); let dict_offsets = dict_buffers[0].typed_data::(); let dict_values = dict_buffers[1].as_slice();