Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement](inverted index) improve expr evaluate_inverted_index performace and remove useless code #40600

Merged
merged 1 commit into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 0 additions & 30 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -927,36 +927,6 @@ bool SegmentIterator::_need_read_data(ColumnId cid) {
return true;
}

bool SegmentIterator::_is_target_expr_match_predicate(const vectorized::VExprSPtr& expr,
const MatchPredicate* match_pred,
const Schema* schema) {
if (!expr || expr->node_type() != TExprNodeType::MATCH_PRED) {
return false;
}

const auto& children = expr->children();
if (children.size() != 2 || !children[0]->is_slot_ref() || !children[1]->is_constant()) {
return false;
}

auto slot_ref = dynamic_cast<vectorized::VSlotRef*>(children[0].get());
if (!slot_ref) {
LOG(WARNING) << children[0]->debug_string() << " should be SlotRef";
return false;
}
std::shared_ptr<ColumnPtrWrapper> const_col_wrapper;
// children 1 is VLiteral, we do not need expr context.
auto res = children[1]->get_const_col(nullptr /* context */, &const_col_wrapper);
if (!res.ok() || !const_col_wrapper) {
return false;
}

const auto const_column =
check_and_get_column<vectorized::ColumnConst>(const_col_wrapper->column_ptr);
return const_column && match_pred->column_id() == schema->column_id(slot_ref->column_id()) &&
StringRef(match_pred->get_value()) == const_column->get_data_at(0);
}

Status SegmentIterator::_apply_inverted_index() {
std::vector<ColumnPredicate*> remaining_predicates;
std::set<const ColumnPredicate*> no_need_to_pass_column_predicate_set;
Expand Down
16 changes: 0 additions & 16 deletions be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,22 +362,6 @@ class SegmentIterator : public RowwiseIterator {
return 0;
}

bool _is_match_predicate_and_not_remaining(
ColumnPredicate* pred, const std::vector<ColumnPredicate*>& remaining_predicates) {
return pred->type() == PredicateType::MATCH &&
std::find(remaining_predicates.begin(), remaining_predicates.end(), pred) ==
remaining_predicates.end();
}

void _delete_expr_from_conjunct_roots(const vectorized::VExprSPtr& expr,
vectorized::VExprSPtrs& conjunct_roots) {
conjunct_roots.erase(std::remove(conjunct_roots.begin(), conjunct_roots.end(), expr),
conjunct_roots.end());
}

bool _is_target_expr_match_predicate(const vectorized::VExprSPtr& expr,
const MatchPredicate* match_pred, const Schema* schema);

Status _convert_to_expected_type(const std::vector<ColumnId>& col_ids);

bool _no_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
Expand Down
45 changes: 28 additions & 17 deletions be/src/vec/exprs/vexpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,15 +613,26 @@ Status VExpr::get_result_from_const(vectorized::Block* block, const std::string&

Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function,
uint32_t segment_num_rows) {
// Pre-allocate vectors based on an estimated or known size
std::vector<segment_v2::InvertedIndexIterator*> iterators;
std::vector<vectorized::IndexFieldNameAndTypePair> data_type_with_names;
std::vector<int> column_ids;
vectorized::ColumnsWithTypeAndName arguments;
VExprSPtrs children_exprs;
for (auto child : children()) {
// if child is cast expr, we need to ensure target data type is the same with storage data type.
// or they are all string type
// and if data type is array, we need to get the nested data type to ensure that.

// Reserve space to avoid multiple reallocations
const size_t estimated_size = children().size();
iterators.reserve(estimated_size);
data_type_with_names.reserve(estimated_size);
column_ids.reserve(estimated_size);
children_exprs.reserve(estimated_size);

auto index_context = context->get_inverted_index_context();

// if child is cast expr, we need to ensure target data type is the same with storage data type.
// or they are all string type
// and if data type is array, we need to get the nested data type to ensure that.
for (const auto& child : children()) {
if (child->node_type() == TExprNodeType::CAST_EXPR) {
auto* cast_expr = assert_cast<VCastExpr*>(child.get());
DCHECK_EQ(cast_expr->children().size(), 1);
Expand Down Expand Up @@ -663,7 +674,11 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase
}
}

for (auto child : children_exprs) {
if (children_exprs.empty()) {
return Status::OK(); // Early exit if no children to process
}

for (const auto& child : children_exprs) {
if (child->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
auto column_id = column_slot_ref->column_id();
Expand Down Expand Up @@ -694,25 +709,21 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase
column_literal->get_data_type(), column_literal->expr_name());
}
}
auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
if (iterators.empty()) {
return Status::OK();
}
// If arguments are empty, it means the left value in the expression is not a literal.
if (arguments.empty()) {
return Status::OK();

if (iterators.empty() || arguments.empty()) {
return Status::OK(); // Nothing to evaluate or no literals to compare against
}

auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
auto res = function->evaluate_inverted_index(arguments, data_type_with_names, iterators,
segment_num_rows, result_bitmap);
if (!res.ok()) {
return res;
}
if (!result_bitmap.is_empty()) {
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this,
result_bitmap);
for (auto column_id : column_ids) {
context->get_inverted_index_context()->set_true_for_inverted_index_status(this,
column_id);
index_context->set_inverted_index_result_for_expr(this, result_bitmap);
for (int column_id : column_ids) {
index_context->set_true_for_inverted_index_status(this, column_id);
}
// set fast_execute when expr evaluated by inverted index correctly
_can_fast_execute = true;
Expand Down
Loading