Skip to content

Commit

Permalink
Merge branch 'parquet-string-view-2' into parquet-string-view-3
Browse files Browse the repository at this point in the history
  • Loading branch information
XiangpengHao committed Jul 2, 2024
2 parents b256484 + 9c5972f commit cef542d
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 1 deletion.
2 changes: 1 addition & 1 deletion parquet/src/arrow/array_reader/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::sync::Arc;

use arrow_schema::{DataType, Fields, SchemaBuilder};

use crate::arrow::array_reader::byte_view_array::make_byte_view_array_reader;
use crate::arrow::array_reader::byte_array::make_byte_view_array_reader;
use crate::arrow::array_reader::empty_array::make_empty_array_reader;
use crate::arrow::array_reader::fixed_len_byte_array::make_fixed_len_byte_array_reader;
use crate::arrow::array_reader::{
Expand Down
30 changes: 30 additions & 0 deletions parquet/src/arrow/array_reader/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,36 @@ pub fn make_byte_array_reader(
}
}

/// Returns an [`ArrayReader`] that decodes the provided byte array column to view types.
pub fn make_byte_view_array_reader(
pages: Box<dyn PageIterator>,
column_desc: ColumnDescPtr,
arrow_type: Option<ArrowType>,
) -> Result<Box<dyn ArrayReader>> {
// Check if Arrow type is specified, else create it from Parquet type
let data_type = match arrow_type {
Some(t) => t,
None => match parquet_to_arrow_field(column_desc.as_ref())?.data_type() {
ArrowType::Utf8 | ArrowType::Utf8View => ArrowType::Utf8View,
_ => ArrowType::BinaryView,
},
};

match data_type {
ArrowType::BinaryView | ArrowType::Utf8View => {
let reader = GenericRecordReader::new(column_desc);
Ok(Box::new(ByteArrayReader::<i32>::new(
pages, data_type, reader,
)))
}

_ => Err(general_err!(
"invalid data type for byte array reader read to view type - {}",
data_type
)),
}
}

/// An [`ArrayReader`] for variable length byte arrays
struct ByteArrayReader<I: OffsetSizeTrait> {
data_type: ArrowType,
Expand Down
3 changes: 3 additions & 0 deletions parquet/src/arrow/array_reader/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ use bytes::Bytes;
use std::any::Any;

/// Returns an [`ArrayReader`] that decodes the provided byte array column to view types.
#[allow(unused)]
pub fn make_byte_view_array_reader(
pages: Box<dyn PageIterator>,
column_desc: ColumnDescPtr,
Expand Down Expand Up @@ -60,6 +61,7 @@ pub fn make_byte_view_array_reader(
}

/// An [`ArrayReader`] for variable length byte arrays
#[allow(unused)]
struct ByteViewArrayReader {
data_type: ArrowType,
pages: Box<dyn PageIterator>,
Expand All @@ -69,6 +71,7 @@ struct ByteViewArrayReader {
}

impl ByteViewArrayReader {
#[allow(unused)]
fn new(
pages: Box<dyn PageIterator>,
data_type: ArrowType,
Expand Down

0 comments on commit cef542d

Please sign in to comment.