diff --git a/datafusion/core/src/physical_plan/file_format/parquet.rs b/datafusion/core/src/physical_plan/file_format/parquet.rs index 92be32f47649..8c5b5e4de8ea 100644 --- a/datafusion/core/src/physical_plan/file_format/parquet.rs +++ b/datafusion/core/src/physical_plan/file_format/parquet.rs @@ -1622,11 +1622,11 @@ mod tests { .infer_schema(&state, &store, &[meta.clone()]) .await?; - let group_empty = vec![vec![file_range(&meta, 0, 5)]]; - let group_contain = vec![vec![file_range(&meta, 5, i64::MAX)]]; + let group_empty = vec![vec![file_range(&meta, 0, 2)]]; + let group_contain = vec![vec![file_range(&meta, 2, i64::MAX)]]; let group_all = vec![vec![ - file_range(&meta, 0, 5), - file_range(&meta, 5, i64::MAX), + file_range(&meta, 0, 2), + file_range(&meta, 2, i64::MAX), ]]; assert_parquet_read(&state, group_empty, None, file_schema.clone()).await?; diff --git a/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs b/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs index 376ae35c66d9..86cf06620c20 100644 --- a/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs +++ b/datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs @@ -53,7 +53,13 @@ pub(crate) fn prune_row_groups( let mut filtered = Vec::with_capacity(groups.len()); for (idx, metadata) in groups.iter().enumerate() { if let Some(range) = &range { - let offset = metadata.column(0).file_offset(); + // figure out where the first dictionary page (or first data page are) + // note don't use the location of metadata + // + let col = metadata.column(0); + let offset = col + .dictionary_page_offset() + .unwrap_or_else(|| col.data_page_offset()); if offset < range.start || offset >= range.end { continue; }