Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix](orc-reader) Fix incorrect result if null partition fields in orc file. #23369

Merged
merged 1 commit into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,19 +630,24 @@ bool OrcReader::_init_search_argument(
for (int i = 0; i < root_type.getSubtypeCount(); ++i) {
type_map.emplace(_get_field_name_lower_case(&root_type, i), root_type.getSubtype(i));
}
for (auto it = colname_to_value_range->begin(); it != colname_to_value_range->end(); ++it) {
auto type_it = type_map.find(it->first);
if (type_it != type_map.end()) {
std::visit(
[&](auto& range) {
std::vector<OrcPredicate> value_predicates =
value_range_to_predicate(range, type_it->second);
for (auto& range_predicate : value_predicates) {
predicates.emplace_back(range_predicate);
}
},
it->second);
for (auto& col_name : _lazy_read_ctx.all_read_columns) {
auto iter = colname_to_value_range->find(col_name);
if (iter == colname_to_value_range->end()) {
continue;
}
auto type_it = type_map.find(col_name);
if (type_it == type_map.end()) {
continue;
}
std::visit(
[&](auto& range) {
std::vector<OrcPredicate> value_predicates =
value_range_to_predicate(range, type_it->second);
for (auto& range_predicate : value_predicates) {
predicates.emplace_back(range_predicate);
}
},
iter->second);
}
if (predicates.empty()) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,39 @@ Z6n2t4XA2n7CXTECJ,PE,iBbsCh0RE1Dd2A,z48
-- !null_expr_dict_filter_parquet --
4844 4363

-- !par_fields_in_file_orc1 --
1 Alice 100.0 2023 8
2 Bob 150.0 2023 8

-- !par_fields_in_file_parquet1 --
1 Alice 100.0 2023 8
2 Bob 150.0 2023 8

-- !par_fields_in_file_orc2 --
1 Alice 100.0 2023 8
2 Bob 150.0 2023 8

-- !par_fields_in_file_parquet2 --
1 Alice 100.0 2023 8
2 Bob 150.0 2023 8

-- !par_fields_in_file_orc3 --
1 Alice 100.0 2023 8
2 Bob 150.0 2023 8

-- !par_fields_in_file_parquet3 --
1 Alice 100.0 2023 8
2 Bob 150.0 2023 8

-- !par_fields_in_file_orc4 --
1 Alice 100.0 2023 8
2 Bob 150.0 2023 8

-- !par_fields_in_file_parquet4 --
1 Alice 100.0 2023 8
2 Bob 150.0 2023 8

-- !par_fields_in_file_orc5 --

-- !par_fields_in_file_parquet5 --

Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@ suite("test_external_catalog_hive", "p2,external,hive,external_remote,external_r
qt_null_expr_dict_filter_orc """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_orc WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """
qt_null_expr_dict_filter_parquet """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_parquet WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """

// test par fields in file
qt_par_fields_in_file_orc1 """ select * from multi_catalog.par_fields_in_file_orc where year = 2023 and month = 8 order by id; """
qt_par_fields_in_file_parquet1 """ select * from multi_catalog.par_fields_in_file_parquet where year = 2023 and month = 8 order by id; """
qt_par_fields_in_file_orc2 """ select * from multi_catalog.par_fields_in_file_orc where year = 2023 order by id; """
qt_par_fields_in_file_parquet2 """ select * from multi_catalog.par_fields_in_file_parquet where year = 2023 order by id; """
qt_par_fields_in_file_orc3 """ select * from multi_catalog.par_fields_in_file_orc where month = 8 order by id; """
qt_par_fields_in_file_parquet3 """ select * from multi_catalog.par_fields_in_file_parquet where month = 8 order by id; """
qt_par_fields_in_file_orc4 """ select * from multi_catalog.par_fields_in_file_orc where month = 8 and year >= 2022 order by id; """
qt_par_fields_in_file_parquet4 """ select * from multi_catalog.par_fields_in_file_parquet where month = 8 and year >= 2022 order by id; """
qt_par_fields_in_file_orc5 """ select * from multi_catalog.par_fields_in_file_orc where month = 8 and year = 2022 order by id; """
qt_par_fields_in_file_parquet5 """ select * from multi_catalog.par_fields_in_file_parquet where month = 8 and year = 2022 order by id; """

// test remember last used database after switch / rename catalog
sql """switch ${catalog_name};"""
Expand Down