Skip to content

Commit

Permalink
ensure null-counts are written for all-null columns (#307) (#404)
Browse files Browse the repository at this point in the history
Fixes #306.

Co-authored-by: Marco Neumann <[email protected]>
  • Loading branch information
alamb and crepererum authored Jun 5, 2021
1 parent c928d57 commit 4d216f3
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
17 changes: 17 additions & 0 deletions parquet/src/arrow/arrow_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1560,4 +1560,21 @@ mod tests {
panic!("Statistics::Int64 missing")
}
}

#[test]
fn statistics_null_counts_only_nulls() {
// check that null-count statistics for "only NULL"-columns are correct
let values = Arc::new(UInt64Array::from(vec![None, None]));
let file = one_column_roundtrip("null_counts", values, true);

// check statistics are valid
let reader = SerializedFileReader::new(file).unwrap();
let metadata = reader.metadata();
assert_eq!(metadata.num_row_groups(), 1);
let row_group = metadata.row_group(0);
assert_eq!(row_group.num_columns(), 1);
let column = row_group.column(0);
let stats = column.statistics().unwrap();
assert_eq!(stats.null_count(), 2);
}
}
4 changes: 3 additions & 1 deletion parquet/src/column/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -607,9 +607,11 @@ impl<T: DataType> ColumnWriterImpl<T> {
let max_def_level = self.descr.max_def_level();
let max_rep_level = self.descr.max_rep_level();

// always update column NULL count, no matter if page stats are used
self.num_column_nulls += self.num_page_nulls;

let page_statistics = if calculate_page_stat {
self.update_column_min_max();
self.num_column_nulls += self.num_page_nulls;
Some(self.make_page_statistics())
} else {
None
Expand Down

0 comments on commit 4d216f3

Please sign in to comment.