From f250369b93a120e0585b5b30e1a0d8c052ea9a51 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Tue, 9 Jul 2024 09:06:42 -0400 Subject: [PATCH 1/2] update byte view array to not implicit copy --- parquet/src/arrow/array_reader/byte_view_array.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs index dc4ce3f9c1b..8d9e1b284f0 100644 --- a/parquet/src/arrow/array_reader/byte_view_array.rs +++ b/parquet/src/arrow/array_reader/byte_view_array.rs @@ -71,7 +71,6 @@ struct ByteViewArrayReader { } impl ByteViewArrayReader { - #[allow(unused)] fn new( pages: Box, data_type: ArrowType, @@ -316,7 +315,8 @@ impl ByteViewArrayDecoderPlain { } pub fn read(&mut self, output: &mut ViewBuffer, len: usize) -> Result { - let block_id = output.append_block(self.buf.clone().into()); + let buf = arrow_buffer::Buffer::from_bytes(self.buf.clone().into()); + let block_id = output.append_block(buf); let to_read = len.min(self.max_remaining_values); @@ -546,7 +546,8 @@ impl ByteViewArrayDecoderDeltaLength { let src_lengths = &self.lengths[self.length_offset..self.length_offset + to_read]; - let block_id = output.append_block(self.data.clone().into()); + let bytes = arrow_buffer::Buffer::from_bytes(self.data.clone().into()); + let block_id = output.append_block(bytes); let mut current_offset = self.data_offset; let initial_offset = current_offset; From 53d2fd68868a023bc4c57be5730b2db755b38814 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Tue, 9 Jul 2024 19:28:55 -0400 Subject: [PATCH 2/2] Add small comments --- parquet/src/arrow/array_reader/byte_view_array.rs | 4 ++++ parquet/src/arrow/buffer/view_buffer.rs | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs index 8d9e1b284f0..d1a0313dc1e 100644 --- a/parquet/src/arrow/array_reader/byte_view_array.rs +++ b/parquet/src/arrow/array_reader/byte_view_array.rs @@ -315,6 +315,8 @@ impl ByteViewArrayDecoderPlain { } pub fn read(&mut self, output: &mut ViewBuffer, len: usize) -> Result { + // Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is zero copy + // Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, which is also zero copy let buf = arrow_buffer::Buffer::from_bytes(self.buf.clone().into()); let block_id = output.append_block(buf); @@ -546,6 +548,8 @@ impl ByteViewArrayDecoderDeltaLength { let src_lengths = &self.lengths[self.length_offset..self.length_offset + to_read]; + // Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is zero copy + // Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, which is also zero copy let bytes = arrow_buffer::Buffer::from_bytes(self.data.clone().into()); let block_id = output.append_block(bytes); diff --git a/parquet/src/arrow/buffer/view_buffer.rs b/parquet/src/arrow/buffer/view_buffer.rs index ae83ac31777..2256f4877d6 100644 --- a/parquet/src/arrow/buffer/view_buffer.rs +++ b/parquet/src/arrow/buffer/view_buffer.rs @@ -68,7 +68,6 @@ impl ViewBuffer { } /// Converts this into an [`ArrayRef`] with the provided `data_type` and `null_buffer` - #[allow(unused)] pub fn into_array(self, null_buffer: Option, data_type: &ArrowType) -> ArrayRef { let len = self.views.len(); let views = Buffer::from_vec(self.views);