From 26fbfb1c09d21b63d7b557f79de056a9a3ca6a67 Mon Sep 17 00:00:00 2001 From: Smith Cruise Date: Wed, 10 May 2023 14:29:53 +0800 Subject: [PATCH] [BugFix][Cherry-Pick][Branch-2.5] Fix using wrong parquet column chunk index in dict filter Signed-off-by: Smith Cruise (cherry picked from commit a43b726f862e3f7724c17ae403e80b2faf80ee5f) --- be/src/formats/parquet/group_reader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/be/src/formats/parquet/group_reader.cpp b/be/src/formats/parquet/group_reader.cpp index 879ee0cc52d8a..97be8d7ad0cb9 100644 --- a/be/src/formats/parquet/group_reader.cpp +++ b/be/src/formats/parquet/group_reader.cpp @@ -175,8 +175,10 @@ void GroupReader::_process_columns_and_conjunct_ctxs() { for (auto& column : _param.read_cols) { int chunk_index = column.col_idx_in_chunk; SlotId slot_id = column.slot_id; + const auto* parquet_field = _param.file_metadata->schema().get_stored_column_by_idx(column.col_idx_in_parquet); + DCHECK(parquet_field != nullptr); const tparquet::ColumnMetaData& column_metadata = - _row_group_metadata->columns[column.col_idx_in_parquet].meta_data; + _row_group_metadata->columns[parquet_field->physical_column_index].meta_data; if (_can_using_dict_filter(slots[chunk_index], conjunct_ctxs_by_slot, column_metadata)) { _use_as_dict_filter_column[read_col_idx] = true; _dict_filter_conjunct_ctxs[slot_id] = conjunct_ctxs_by_slot.at(slot_id);