Skip to content

Commit

Permalink
add some more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
etseidl committed Dec 12, 2024
1 parent 1abe85f commit b3d4dd9
Showing 2 changed files with 79 additions and 0 deletions.
68 changes: 68 additions & 0 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
@@ -989,6 +989,21 @@ mod tests {
assert_eq!(original_schema.fields()[1], reader.schema().fields()[0]);
}

#[test]
fn test_arrow_reader_single_column_by_name() {
let file = get_test_file("parquet/generated_simple_numerics/blogs.parquet");

let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
let original_schema = Arc::clone(builder.schema());

let mask = ProjectionMask::columns(builder.parquet_schema(), ["blog_id"]);
let reader = builder.with_projection(mask).build().unwrap();

// Verify that the schema was correctly parsed
assert_eq!(1, reader.schema().fields().len());
assert_eq!(original_schema.fields()[1], reader.schema().fields()[0]);
}

#[test]
fn test_null_column_reader_test() {
let mut file = tempfile::tempfile().unwrap();
@@ -2563,6 +2578,59 @@ mod tests {
}
}

#[test]
// same as test_read_structs but constructs projection mask via column names
fn test_read_structs_by_name() {
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{testdata}/nested_structs.rust.parquet");
let file = File::open(&path).unwrap();
let record_batch_reader = ParquetRecordBatchReader::try_new(file, 60).unwrap();

for batch in record_batch_reader {
batch.unwrap();
}

let file = File::open(&path).unwrap();
let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();

let mask = ProjectionMask::columns(
builder.parquet_schema(),
["roll_num.count", "PC_CUR.mean", "PC_CUR.sum"],
);
let projected_reader = builder
.with_projection(mask)
.with_batch_size(60)
.build()
.unwrap();

let expected_schema = Schema::new(vec![
Field::new(
"roll_num",
ArrowDataType::Struct(Fields::from(vec![Field::new(
"count",
ArrowDataType::UInt64,
false,
)])),
false,
),
Field::new(
"PC_CUR",
ArrowDataType::Struct(Fields::from(vec![
Field::new("mean", ArrowDataType::Int64, false),
Field::new("sum", ArrowDataType::Int64, false),
])),
false,
),
]);

assert_eq!(&expected_schema, projected_reader.schema().as_ref());

for batch in projected_reader {
let batch = batch.unwrap();
assert_eq!(batch.schema().as_ref(), &expected_schema);
}
}

#[test]
fn test_read_maps() {
let testdata = arrow::util::test_util::parquet_test_data();
11 changes: 11 additions & 0 deletions parquet/src/arrow/schema/mod.rs
Original file line number Diff line number Diff line change
@@ -1297,6 +1297,17 @@ mod tests {
for i in 0..arrow_fields.len() {
assert_eq!(&arrow_fields[i], converted_fields[i].as_ref());
}

let mask =
ProjectionMask::columns(&parquet_schema, ["group2.leaf4", "group1.leaf1", "leaf5"]);
let converted_arrow_schema =
parquet_to_arrow_schema_by_columns(&parquet_schema, mask, None).unwrap();
let converted_fields = converted_arrow_schema.fields();

assert_eq!(arrow_fields.len(), converted_fields.len());
for i in 0..arrow_fields.len() {
assert_eq!(&arrow_fields[i], converted_fields[i].as_ref());
}
}

#[test]

0 comments on commit b3d4dd9

Please sign in to comment.