diff --git a/parquet/src/arrow/array_reader/builder.rs b/parquet/src/arrow/array_reader/builder.rs index 945f62526a7e..958594c93232 100644 --- a/parquet/src/arrow/array_reader/builder.rs +++ b/parquet/src/arrow/array_reader/builder.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use arrow_schema::{DataType, Fields, SchemaBuilder}; -use crate::arrow::array_reader::byte_view_array::make_byte_view_array_reader; +use crate::arrow::array_reader::byte_array::make_byte_view_array_reader; use crate::arrow::array_reader::empty_array::make_empty_array_reader; use crate::arrow::array_reader::fixed_len_byte_array::make_fixed_len_byte_array_reader; use crate::arrow::array_reader::{ diff --git a/parquet/src/arrow/array_reader/byte_array.rs b/parquet/src/arrow/array_reader/byte_array.rs index 5f86c01a10c2..d0aa6f7b1ebe 100644 --- a/parquet/src/arrow/array_reader/byte_array.rs +++ b/parquet/src/arrow/array_reader/byte_array.rs @@ -74,6 +74,36 @@ pub fn make_byte_array_reader( } } +/// Returns an [`ArrayReader`] that decodes the provided byte array column to view types. +pub fn make_byte_view_array_reader( + pages: Box, + column_desc: ColumnDescPtr, + arrow_type: Option, +) -> Result> { + // Check if Arrow type is specified, else create it from Parquet type + let data_type = match arrow_type { + Some(t) => t, + None => match parquet_to_arrow_field(column_desc.as_ref())?.data_type() { + ArrowType::Utf8 | ArrowType::Utf8View => ArrowType::Utf8View, + _ => ArrowType::BinaryView, + }, + }; + + match data_type { + ArrowType::BinaryView | ArrowType::Utf8View => { + let reader = GenericRecordReader::new(column_desc); + Ok(Box::new(ByteArrayReader::::new( + pages, data_type, reader, + ))) + } + + _ => Err(general_err!( + "invalid data type for byte array reader read to view type - {}", + data_type + )), + } +} + /// An [`ArrayReader`] for variable length byte arrays struct ByteArrayReader { data_type: ArrowType, diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs index 779157c15df9..0531b0044342 100644 --- a/parquet/src/arrow/array_reader/byte_view_array.rs +++ b/parquet/src/arrow/array_reader/byte_view_array.rs @@ -32,6 +32,7 @@ use bytes::Bytes; use std::any::Any; /// Returns an [`ArrayReader`] that decodes the provided byte array column to view types. +#[allow(unused)] pub fn make_byte_view_array_reader( pages: Box, column_desc: ColumnDescPtr, @@ -60,6 +61,7 @@ pub fn make_byte_view_array_reader( } /// An [`ArrayReader`] for variable length byte arrays +#[allow(unused)] struct ByteViewArrayReader { data_type: ArrowType, pages: Box, @@ -69,6 +71,7 @@ struct ByteViewArrayReader { } impl ByteViewArrayReader { + #[allow(unused)] fn new( pages: Box, data_type: ArrowType,