Skip to content

Commit

Permalink
Update parquet to depend on arrow subcrates (#3028)
Browse files Browse the repository at this point in the history
* Update parquet to depend on arrow subcrates (#3044)

* Fix parquet_derive

* Fix parquet_derive

* Fix no-default-features test compilation

* Fix parquet-fromcsv

* Clippy
  • Loading branch information
tustvold authored Nov 10, 2022
1 parent 132152c commit 4dd7fea
Show file tree
Hide file tree
Showing 39 changed files with 199 additions and 199 deletions.
16 changes: 12 additions & 4 deletions parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ edition = "2021"
rust-version = "1.62"

[dependencies]
arrow-array = { version = "26.0.0", path = "../arrow-array", default-features = false, optional = true }
arrow-buffer = { version = "26.0.0", path = "../arrow-buffer", default-features = false, optional = true }
arrow-cast = { version = "26.0.0", path = "../arrow-cast", default-features = false, optional = true }
arrow-csv = { version = "26.0.0", path = "../arrow-csv", default-features = false, optional = true }
arrow-data = { version = "26.0.0", path = "../arrow-data", default-features = false, optional = true }
arrow-schema = { version = "26.0.0", path = "../arrow-schema", default-features = false, optional = true }
arrow-select = { version = "26.0.0", path = "../arrow-select", default-features = false, optional = true }
arrow-ipc = { version = "26.0.0", path = "../arrow-ipc", default-features = false, optional = true }

ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }
bytes = { version = "1.1", default-features = false, features = ["std"] }
thrift = { version = "0.16", default-features = false }
Expand All @@ -41,7 +50,6 @@ zstd = { version = "0.11.1", optional = true, default-features = false }
chrono = { version = "0.4", default-features = false, features = ["alloc"] }
num = { version = "0.4", default-features = false }
num-bigint = { version = "0.4", default-features = false }
arrow = { path = "../arrow", version = "26.0.0", optional = true, default-features = false, features = ["ipc"] }
base64 = { version = "0.13", default-features = false, features = ["std"], optional = true }
clap = { version = "4", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true }
serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
Expand Down Expand Up @@ -70,9 +78,9 @@ all-features = true
[features]
default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"]
# Enable arrow reader/writer APIs
arrow = ["dep:arrow", "base64"]
arrow = ["base64", "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", "arrow-schema", "arrow-select", "arrow-ipc"]
# Enable CLI tools
cli = ["json", "base64", "clap", "arrow/csv"]
cli = ["json", "base64", "clap", "arrow-csv"]
# Enable JSON APIs
json = ["serde_json", "base64"]
# Enable internal testing APIs
Expand Down Expand Up @@ -100,7 +108,7 @@ required-features = ["cli"]

[[bin]]
name = "parquet-fromcsv"
required-features = ["cli"]
required-features = ["arrow", "cli"]

[[bench]]
name = "arrow_writer"
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/array_reader/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::sync::Arc;

use arrow::datatypes::DataType;
use arrow_schema::DataType;

use crate::arrow::array_reader::empty_array::make_empty_array_reader;
use crate::arrow::array_reader::fixed_len_byte_array::make_fixed_len_byte_array_reader;
Expand Down
8 changes: 4 additions & 4 deletions parquet/src/arrow/array_reader/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ use crate::encodings::decoding::{Decoder, DeltaBitPackDecoder};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::memory::ByteBufferPtr;
use arrow::array::{Array, ArrayRef, BinaryArray, Decimal128Array, OffsetSizeTrait};
use arrow::buffer::Buffer;
use arrow::datatypes::DataType as ArrowType;
use arrow_array::{Array, ArrayRef, BinaryArray, Decimal128Array, OffsetSizeTrait};
use arrow_buffer::Buffer;
use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::ops::Range;
use std::sync::Arc;
Expand Down Expand Up @@ -587,7 +587,7 @@ mod tests {
use super::*;
use crate::arrow::array_reader::test_util::{byte_array_all_encodings, utf8_column};
use crate::arrow::record_reader::buffer::ValuesBuffer;
use arrow::array::{Array, StringArray};
use arrow_array::{Array, StringArray};

#[test]
fn test_byte_array_decoder() {
Expand Down
25 changes: 7 additions & 18 deletions parquet/src/arrow/array_reader/byte_array_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ use std::marker::PhantomData;
use std::ops::Range;
use std::sync::Arc;

use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
use arrow::buffer::Buffer;
use arrow::datatypes::{ArrowNativeType, DataType as ArrowType};
use arrow_array::{Array, ArrayRef, OffsetSizeTrait};
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_schema::DataType as ArrowType;

use crate::arrow::array_reader::byte_array::{ByteArrayDecoder, ByteArrayDecoderPlain};
use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
Expand Down Expand Up @@ -188,15 +188,11 @@ where
}

fn get_def_levels(&self) -> Option<&[i16]> {
self.def_levels_buffer
.as_ref()
.map(|buf| buf.typed_data())
self.def_levels_buffer.as_ref().map(|buf| buf.typed_data())
}

fn get_rep_levels(&self) -> Option<&[i16]> {
self.rep_levels_buffer
.as_ref()
.map(|buf| buf.typed_data())
self.rep_levels_buffer.as_ref().map(|buf| buf.typed_data())
}
}

Expand Down Expand Up @@ -395,7 +391,7 @@ where

#[cfg(test)]
mod tests {
use arrow::array::{Array, StringArray};
use arrow_array::{Array, StringArray};
use arrow::compute::cast;

use crate::arrow::array_reader::test_util::{
Expand Down Expand Up @@ -528,13 +524,7 @@ mod tests {

assert_eq!(
strings.iter().collect::<Vec<_>>(),
vec![
Some("0"),
Some("1"),
Some("1"),
Some("2"),
Some("2"),
]
vec![Some("0"), Some("1"), Some("1"), Some("2"), Some("2"),]
)
}

Expand Down Expand Up @@ -625,7 +615,6 @@ mod tests {
}
}


#[test]
fn test_too_large_dictionary() {
let data: Vec<_> = (0..128)
Expand Down
5 changes: 3 additions & 2 deletions parquet/src/arrow/array_reader/empty_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@

use crate::arrow::array_reader::ArrayReader;
use crate::errors::Result;
use arrow::array::{ArrayDataBuilder, ArrayRef, StructArray};
use arrow::datatypes::DataType as ArrowType;
use arrow_schema::DataType as ArrowType;
use arrow_array::{ArrayRef, StructArray};
use arrow_data::ArrayDataBuilder;
use std::any::Any;
use std::sync::Arc;

Expand Down
15 changes: 8 additions & 7 deletions parquet/src/arrow/array_reader/fixed_len_byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ use crate::column::reader::decoder::{ColumnValueDecoder, ValuesBufferSlice};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::memory::ByteBufferPtr;
use arrow::array::{
ArrayDataBuilder, ArrayRef, Decimal128Array, FixedSizeBinaryArray,
IntervalDayTimeArray, IntervalYearMonthArray,
use arrow_array::{
ArrayRef, Decimal128Array, FixedSizeBinaryArray, IntervalDayTimeArray,
IntervalYearMonthArray,
};
use arrow::buffer::Buffer;
use arrow::datatypes::{DataType as ArrowType, IntervalUnit};
use arrow_buffer::Buffer;
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType as ArrowType, IntervalUnit};
use std::any::Any;
use std::ops::Range;
use std::sync::Arc;
Expand Down Expand Up @@ -427,10 +428,10 @@ mod tests {
use super::*;
use crate::arrow::arrow_reader::ParquetRecordBatchReader;
use crate::arrow::ArrowWriter;
use arrow::array::{Array, Decimal128Array, ListArray};
use arrow_array::{Array, Decimal128Array, ListArray};
use arrow::datatypes::Field;
use arrow::error::Result as ArrowResult;
use arrow::record_batch::RecordBatch;
use arrow_array::RecordBatch;
use bytes::Bytes;
use std::sync::Arc;

Expand Down
16 changes: 9 additions & 7 deletions parquet/src/arrow/array_reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
use crate::arrow::array_reader::ArrayReader;
use crate::errors::ParquetError;
use crate::errors::Result;
use arrow::array::{
new_empty_array, Array, ArrayData, ArrayRef, BooleanBufferBuilder, GenericListArray,
MutableArrayData, OffsetSizeTrait,
use arrow_array::{
builder::BooleanBufferBuilder, new_empty_array, Array, ArrayRef, GenericListArray,
OffsetSizeTrait,
};
use arrow::buffer::Buffer;
use arrow::datatypes::DataType as ArrowType;
use arrow::datatypes::ToByteSlice;
use arrow_buffer::Buffer;
use arrow_buffer::ToByteSlice;
use arrow_data::{transform::MutableArrayData, ArrayData};
use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::cmp::Ordering;
use std::marker::PhantomData;
Expand Down Expand Up @@ -257,8 +258,9 @@ mod tests {
use crate::file::reader::{FileReader, SerializedFileReader};
use crate::schema::parser::parse_message_type;
use crate::schema::types::SchemaDescriptor;
use arrow::array::{Array, ArrayDataBuilder, PrimitiveArray};
use arrow::datatypes::{Field, Int32Type as ArrowInt32, Int32Type};
use arrow_array::{Array, PrimitiveArray};
use arrow_data::ArrayDataBuilder;
use std::sync::Arc;

fn list_type<OffsetSize: OffsetSizeTrait>(
Expand Down
16 changes: 8 additions & 8 deletions parquet/src/arrow/array_reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

use crate::arrow::array_reader::{ArrayReader, ListArrayReader, StructArrayReader};
use crate::errors::Result;
use arrow::array::{Array, ArrayRef, MapArray};
use arrow::datatypes::DataType as ArrowType;
use arrow_array::{Array, ArrayRef, MapArray};
use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;

Expand Down Expand Up @@ -125,10 +125,10 @@ mod tests {
use super::*;
use crate::arrow::arrow_reader::ParquetRecordBatchReader;
use crate::arrow::ArrowWriter;
use arrow::array;
use arrow::array::{MapBuilder, PrimitiveBuilder, StringBuilder};
use arrow::datatypes::{Field, Int32Type, Schema};
use arrow::record_batch::RecordBatch;
use arrow_array::builder::{MapBuilder, PrimitiveBuilder, StringBuilder};
use arrow_array::cast::*;
use arrow_array::RecordBatch;
use bytes::Bytes;

#[test]
Expand Down Expand Up @@ -203,9 +203,9 @@ mod tests {
let col = record_batch.column(0);
assert!(col.is_null(0));
assert!(col.is_null(1));
let map_entry = array::as_map_array(col).value(2);
let struct_col = array::as_struct_array(&map_entry);
let key_col = array::as_string_array(struct_col.column(0)); // Key column
let map_entry = as_map_array(col).value(2);
let struct_col = as_struct_array(&map_entry);
let key_col = as_string_array(struct_col.column(0)); // Key column
assert_eq!(key_col.value(0), "three");
assert_eq!(key_col.value(1), "four");
assert_eq!(key_col.value(2), "five");
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/array_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
//! Logic for reading into arrow arrays
use crate::errors::Result;
use arrow::array::ArrayRef;
use arrow::datatypes::DataType as ArrowType;
use arrow_array::ArrayRef;
use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;

Expand Down
8 changes: 4 additions & 4 deletions parquet/src/arrow/array_reader/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ use crate::column::page::PageIterator;
use crate::data_type::DataType;
use crate::errors::Result;
use crate::schema::types::ColumnDescPtr;
use arrow::array::ArrayRef;
use arrow::buffer::Buffer;
use arrow::datatypes::DataType as ArrowType;
use arrow_array::ArrayRef;
use arrow_buffer::Buffer;
use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;

Expand Down Expand Up @@ -82,7 +82,7 @@ where

fn consume_batch(&mut self) -> Result<ArrayRef> {
// convert to arrays
let array = arrow::array::NullArray::new(self.record_reader.num_values());
let array = arrow_array::NullArray::new(self.record_reader.num_values());

// save definition and repetition buffers
self.def_levels_buffer = self.record_reader.consume_def_levels();
Expand Down
21 changes: 11 additions & 10 deletions parquet/src/arrow/array_reader/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ use crate::column::page::PageIterator;
use crate::data_type::{DataType, Int96};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use arrow::array::{
ArrayDataBuilder, ArrayRef, BooleanArray, BooleanBufferBuilder, Decimal128Array,
Float32Array, Float64Array, Int32Array, Int64Array, TimestampNanosecondArray,
TimestampNanosecondBufferBuilder, UInt32Array, UInt64Array,
use arrow_array::{
builder::{BooleanBufferBuilder, TimestampNanosecondBufferBuilder},
ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, Int32Array,
Int64Array, TimestampNanosecondArray, UInt32Array, UInt64Array,
};
use arrow::buffer::Buffer;
use arrow::datatypes::{DataType as ArrowType, TimeUnit};
use arrow_buffer::Buffer;
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType as ArrowType, TimeUnit};
use std::any::Any;
use std::sync::Arc;

Expand Down Expand Up @@ -205,8 +206,8 @@ where
let array = match target_type {
ArrowType::Date64 => {
// this is cheap as it internally reinterprets the data
let a = arrow::compute::cast(&array, &ArrowType::Date32)?;
arrow::compute::cast(&a, target_type)?
let a = arrow_cast::cast(&array, &ArrowType::Date32)?;
arrow_cast::cast(&a, target_type)?
}
ArrowType::Decimal128(p, s) => {
let array = match array.data_type() {
Expand Down Expand Up @@ -236,7 +237,7 @@ where

Arc::new(array) as ArrayRef
}
_ => arrow::compute::cast(&array, target_type)?,
_ => arrow_cast::cast(&array, target_type)?,
};

// save definition and repetition buffers
Expand Down Expand Up @@ -270,8 +271,8 @@ mod tests {
use crate::schema::types::SchemaDescriptor;
use crate::util::test_common::rand_gen::make_pages;
use crate::util::InMemoryPageIterator;
use arrow::array::{Array, PrimitiveArray};
use arrow::datatypes::ArrowPrimitiveType;
use arrow_array::{Array, PrimitiveArray};

use arrow::datatypes::DataType::Decimal128;
use rand::distributions::uniform::SampleUniform;
Expand Down
9 changes: 4 additions & 5 deletions parquet/src/arrow/array_reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@

use crate::arrow::array_reader::ArrayReader;
use crate::errors::{ParquetError, Result};
use arrow::array::{
ArrayData, ArrayDataBuilder, ArrayRef, BooleanBufferBuilder, StructArray,
};
use arrow::datatypes::DataType as ArrowType;
use arrow_array::{builder::BooleanBufferBuilder, ArrayRef, StructArray};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;

Expand Down Expand Up @@ -216,9 +215,9 @@ mod tests {
use super::*;
use crate::arrow::array_reader::test_util::InMemoryArrayReader;
use crate::arrow::array_reader::ListArrayReader;
use arrow::array::{Array, Int32Array, ListArray};
use arrow::buffer::Buffer;
use arrow::datatypes::Field;
use arrow_array::{Array, Int32Array, ListArray};

#[test]
fn test_struct_array_reader() {
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/array_reader/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use arrow::array::{Array, ArrayRef};
use arrow::datatypes::DataType as ArrowType;
use arrow_array::{Array, ArrayRef};
use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;

Expand Down
Loading

0 comments on commit 4dd7fea

Please sign in to comment.