From b8d8194562580996a85480ace71b5b779f77465b Mon Sep 17 00:00:00 2001 From: airborne12 Date: Wed, 11 Sep 2024 14:13:14 +0800 Subject: [PATCH] [Enhancement](inverted index) improve expr evaluate_inverted_index performace and remove useless code (#40600) ## Proposed changes Pre-allocate vectors based on an estimated or known size in vexpr evaluated_inverted_index --- .../rowset/segment_v2/segment_iterator.cpp | 30 ------------- .../olap/rowset/segment_v2/segment_iterator.h | 16 ------- be/src/vec/exprs/vexpr.cpp | 45 ++++++++++++------- 3 files changed, 28 insertions(+), 63 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index f46347c3d272a2..d946787df5b8b5 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -944,36 +944,6 @@ bool SegmentIterator::_need_read_data(ColumnId cid) { return true; } -bool SegmentIterator::_is_target_expr_match_predicate(const vectorized::VExprSPtr& expr, - const MatchPredicate* match_pred, - const Schema* schema) { - if (!expr || expr->node_type() != TExprNodeType::MATCH_PRED) { - return false; - } - - const auto& children = expr->children(); - if (children.size() != 2 || !children[0]->is_slot_ref() || !children[1]->is_constant()) { - return false; - } - - auto slot_ref = dynamic_cast(children[0].get()); - if (!slot_ref) { - LOG(WARNING) << children[0]->debug_string() << " should be SlotRef"; - return false; - } - std::shared_ptr const_col_wrapper; - // children 1 is VLiteral, we do not need expr context. - auto res = children[1]->get_const_col(nullptr /* context */, &const_col_wrapper); - if (!res.ok() || !const_col_wrapper) { - return false; - } - - const auto const_column = - check_and_get_column(const_col_wrapper->column_ptr); - return const_column && match_pred->column_id() == schema->column_id(slot_ref->column_id()) && - StringRef(match_pred->get_value()) == const_column->get_data_at(0); -} - Status SegmentIterator::_apply_inverted_index() { SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer); if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 7b42a3938d9f9c..5f1c5b68e2fca4 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -363,22 +363,6 @@ class SegmentIterator : public RowwiseIterator { return 0; } - bool _is_match_predicate_and_not_remaining( - ColumnPredicate* pred, const std::vector& remaining_predicates) { - return pred->type() == PredicateType::MATCH && - std::find(remaining_predicates.begin(), remaining_predicates.end(), pred) == - remaining_predicates.end(); - } - - void _delete_expr_from_conjunct_roots(const vectorized::VExprSPtr& expr, - vectorized::VExprSPtrs& conjunct_roots) { - conjunct_roots.erase(std::remove(conjunct_roots.begin(), conjunct_roots.end(), expr), - conjunct_roots.end()); - } - - bool _is_target_expr_match_predicate(const vectorized::VExprSPtr& expr, - const MatchPredicate* match_pred, const Schema* schema); - Status _convert_to_expected_type(const std::vector& col_ids); bool _no_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column, diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 4617d316649e84..f026ff0617aea3 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -604,15 +604,26 @@ Status VExpr::get_result_from_const(vectorized::Block* block, const std::string& Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function, uint32_t segment_num_rows) { + // Pre-allocate vectors based on an estimated or known size std::vector iterators; std::vector data_type_with_names; std::vector column_ids; vectorized::ColumnsWithTypeAndName arguments; VExprSPtrs children_exprs; - for (auto child : children()) { - // if child is cast expr, we need to ensure target data type is the same with storage data type. - // or they are all string type - // and if data type is array, we need to get the nested data type to ensure that. + + // Reserve space to avoid multiple reallocations + const size_t estimated_size = children().size(); + iterators.reserve(estimated_size); + data_type_with_names.reserve(estimated_size); + column_ids.reserve(estimated_size); + children_exprs.reserve(estimated_size); + + auto index_context = context->get_inverted_index_context(); + + // if child is cast expr, we need to ensure target data type is the same with storage data type. + // or they are all string type + // and if data type is array, we need to get the nested data type to ensure that. + for (const auto& child : children()) { if (child->node_type() == TExprNodeType::CAST_EXPR) { auto* cast_expr = assert_cast(child.get()); DCHECK_EQ(cast_expr->children().size(), 1); @@ -654,7 +665,11 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase } } - for (auto child : children_exprs) { + if (children_exprs.empty()) { + return Status::OK(); // Early exit if no children to process + } + + for (const auto& child : children_exprs) { if (child->is_slot_ref()) { auto* column_slot_ref = assert_cast(child.get()); auto column_id = column_slot_ref->column_id(); @@ -685,25 +700,21 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase column_literal->get_data_type(), column_literal->expr_name()); } } - auto result_bitmap = segment_v2::InvertedIndexResultBitmap(); - if (iterators.empty()) { - return Status::OK(); - } - // If arguments are empty, it means the left value in the expression is not a literal. - if (arguments.empty()) { - return Status::OK(); + + if (iterators.empty() || arguments.empty()) { + return Status::OK(); // Nothing to evaluate or no literals to compare against } + + auto result_bitmap = segment_v2::InvertedIndexResultBitmap(); auto res = function->evaluate_inverted_index(arguments, data_type_with_names, iterators, segment_num_rows, result_bitmap); if (!res.ok()) { return res; } if (!result_bitmap.is_empty()) { - context->get_inverted_index_context()->set_inverted_index_result_for_expr(this, - result_bitmap); - for (auto column_id : column_ids) { - context->get_inverted_index_context()->set_true_for_inverted_index_status(this, - column_id); + index_context->set_inverted_index_result_for_expr(this, result_bitmap); + for (int column_id : column_ids) { + index_context->set_true_for_inverted_index_status(this, column_id); } // set fast_execute when expr evaluated by inverted index correctly _can_fast_execute = true;