Skip to content

Commit

Permalink
Merge branch 'branch-2.1' into pick_40258_to_origin_branch-2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
airborne12 authored Sep 29, 2024
2 parents 2165e5b + d1ee0f4 commit 531284b
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 104 deletions.
3 changes: 2 additions & 1 deletion be/src/agent/be_exec_version_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,13 @@ class BeExecVersionManager {
* g. do local merge of remote runtime filter
* h. "now": ALWAYS_NOT_NULLABLE -> DEPEND_ON_ARGUMENTS
*/
constexpr inline int BeExecVersionManager::max_be_exec_version = 5;
constexpr inline int BeExecVersionManager::max_be_exec_version = 6;
constexpr inline int BeExecVersionManager::min_be_exec_version = 0;

/// functional
constexpr inline int BITMAP_SERDE = 3;
constexpr inline int USE_NEW_SERDE = 4; // release on DORIS version 2.1
constexpr inline int OLD_WAL_SERDE = 3; // use to solve compatibility issues, see pr #32299
constexpr inline int VARIANT_SERDE = 6; // change variant serde to fix PR #38413

} // namespace doris
94 changes: 32 additions & 62 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,35 +502,41 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() {
}

RETURN_IF_ERROR(_apply_bitmap_index());
RETURN_IF_ERROR(_apply_inverted_index());
RETURN_IF_ERROR(_apply_index_expr());
size_t input_rows = _row_bitmap.cardinality();
for (auto it = _common_expr_ctxs_push_down.begin(); it != _common_expr_ctxs_push_down.end();) {
if ((*it)->all_expr_inverted_index_evaluated()) {
const auto* result =
(*it)->get_inverted_index_context()->get_inverted_index_result_for_expr(
(*it)->root().get());
if (result != nullptr) {
_row_bitmap &= *result->get_data_bitmap();
auto root = (*it)->root();
auto iter_find = std::find(_remaining_conjunct_roots.begin(),
_remaining_conjunct_roots.end(), root);
if (iter_find != _remaining_conjunct_roots.end()) {
_remaining_conjunct_roots.erase(iter_find);
{
if (_opts.runtime_state &&
_opts.runtime_state->query_options().enable_inverted_index_query) {
SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer);
size_t input_rows = _row_bitmap.cardinality();
RETURN_IF_ERROR(_apply_inverted_index());
RETURN_IF_ERROR(_apply_index_expr());
for (auto it = _common_expr_ctxs_push_down.begin();
it != _common_expr_ctxs_push_down.end();) {
if ((*it)->all_expr_inverted_index_evaluated()) {
const auto* result =
(*it)->get_inverted_index_context()->get_inverted_index_result_for_expr(
(*it)->root().get());
if (result != nullptr) {
_row_bitmap &= *result->get_data_bitmap();
auto root = (*it)->root();
auto iter_find = std::find(_remaining_conjunct_roots.begin(),
_remaining_conjunct_roots.end(), root);
if (iter_find != _remaining_conjunct_roots.end()) {
_remaining_conjunct_roots.erase(iter_find);
}
it = _common_expr_ctxs_push_down.erase(it);
}
} else {
++it;
}
it = _common_expr_ctxs_push_down.erase(it);
}
} else {
++it;
}
}

_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
for (auto cid : _schema->column_ids()) {
bool result_true = _check_all_conditions_passed_inverted_index_for_column(cid);
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
for (auto cid : _schema->column_ids()) {
bool result_true = _check_all_conditions_passed_inverted_index_for_column(cid);

if (result_true) {
_need_read_data_indices[cid] = false;
if (result_true) {
_need_read_data_indices[cid] = false;
}
}
}
}
if (!_row_bitmap.isEmpty() &&
Expand Down Expand Up @@ -933,42 +939,7 @@ bool SegmentIterator::_need_read_data(ColumnId cid) {
return true;
}

bool SegmentIterator::_is_target_expr_match_predicate(const vectorized::VExprSPtr& expr,
const MatchPredicate* match_pred,
const Schema* schema) {
if (!expr || expr->node_type() != TExprNodeType::MATCH_PRED) {
return false;
}

const auto& children = expr->children();
if (children.size() != 2 || !children[0]->is_slot_ref() || !children[1]->is_constant()) {
return false;
}

auto slot_ref = dynamic_cast<vectorized::VSlotRef*>(children[0].get());
if (!slot_ref) {
LOG(WARNING) << children[0]->debug_string() << " should be SlotRef";
return false;
}
std::shared_ptr<ColumnPtrWrapper> const_col_wrapper;
// children 1 is VLiteral, we do not need expr context.
auto res = children[1]->get_const_col(nullptr /* context */, &const_col_wrapper);
if (!res.ok() || !const_col_wrapper) {
return false;
}

const auto const_column =
check_and_get_column<vectorized::ColumnConst>(const_col_wrapper->column_ptr);
return const_column && match_pred->column_id() == schema->column_id(slot_ref->column_id()) &&
StringRef(match_pred->get_value()) == const_column->get_data_at(0);
}

Status SegmentIterator::_apply_inverted_index() {
SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer);
if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
return Status::OK();
}
size_t input_rows = _row_bitmap.cardinality();
std::vector<ColumnPredicate*> remaining_predicates;
std::set<const ColumnPredicate*> no_need_to_pass_column_predicate_set;

Expand All @@ -986,7 +957,6 @@ Status SegmentIterator::_apply_inverted_index() {
}

_col_predicates = std::move(remaining_predicates);
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
return Status::OK();
}

Expand Down
16 changes: 0 additions & 16 deletions be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,22 +363,6 @@ class SegmentIterator : public RowwiseIterator {
return 0;
}

bool _is_match_predicate_and_not_remaining(
ColumnPredicate* pred, const std::vector<ColumnPredicate*>& remaining_predicates) {
return pred->type() == PredicateType::MATCH &&
std::find(remaining_predicates.begin(), remaining_predicates.end(), pred) ==
remaining_predicates.end();
}

void _delete_expr_from_conjunct_roots(const vectorized::VExprSPtr& expr,
vectorized::VExprSPtrs& conjunct_roots) {
conjunct_roots.erase(std::remove(conjunct_roots.begin(), conjunct_roots.end(), expr),
conjunct_roots.end());
}

bool _is_target_expr_match_predicate(const vectorized::VExprSPtr& expr,
const MatchPredicate* match_pred, const Schema* schema);

Status _convert_to_expected_type(const std::vector<ColumnId>& col_ids);

bool _no_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
Expand Down
1 change: 0 additions & 1 deletion be/src/pipeline/exec/scan_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ class ScanLocalState : public ScanLocalStateBase {
SlotDescriptor* slot, ColumnValueRange<T>& range,
vectorized::VScanNode::PushDownType* pdt);

template <PrimitiveType T>
Status _normalize_is_null_predicate(vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx,
SlotDescriptor* slot, ColumnValueRange<T>& range,
vectorized::VScanNode::PushDownType* pdt);
Expand Down
18 changes: 17 additions & 1 deletion be/src/vec/data_types/data_type_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <utility>
#include <vector>

#include "agent/be_exec_version_manager.h"
#include "vec/columns/column_object.h"
#include "vec/common/assert_cast.h"
#include "vec/common/typeid_cast.h"
Expand Down Expand Up @@ -66,7 +67,6 @@ int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
if (is_nothing(type)) {
continue;
}

PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
type->to_pb_column_meta(&column_meta_pb);
Expand All @@ -78,6 +78,10 @@ int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
size += type->get_uncompressed_serialized_bytes(entry->data.get_finalized_column(),
be_exec_version);
}
// serialize num of rows, only take effect when subcolumns empty
if (be_exec_version >= VARIANT_SERDE) {
size += sizeof(uint32_t);
}

return size;
}
Expand Down Expand Up @@ -121,6 +125,11 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve
}
// serialize num of subcolumns
*reinterpret_cast<uint32_t*>(size_pos) = num_of_columns;
// serialize num of rows, only take effect when subcolumns empty
if (be_exec_version >= VARIANT_SERDE) {
*reinterpret_cast<uint32_t*>(buf) = column_object.rows();
buf += sizeof(uint32_t);
}

return buf;
}
Expand Down Expand Up @@ -155,6 +164,13 @@ const char* DataTypeObject::deserialize(const char* buf, IColumn* column,
}
column_object->add_sub_column(key, std::move(sub_column), type);
}
size_t num_rows = 0;
// serialize num of rows, only take effect when subcolumns empty
if (be_exec_version >= VARIANT_SERDE) {
num_rows = *reinterpret_cast<const uint32_t*>(buf);
column_object->set_num_rows(num_rows);
buf += sizeof(uint32_t);
}

column_object->finalize();
#ifndef NDEBUG
Expand Down
45 changes: 28 additions & 17 deletions be/src/vec/exprs/vexpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,15 +604,26 @@ Status VExpr::get_result_from_const(vectorized::Block* block, const std::string&

Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function,
uint32_t segment_num_rows) {
// Pre-allocate vectors based on an estimated or known size
std::vector<segment_v2::InvertedIndexIterator*> iterators;
std::vector<vectorized::IndexFieldNameAndTypePair> data_type_with_names;
std::vector<int> column_ids;
vectorized::ColumnsWithTypeAndName arguments;
VExprSPtrs children_exprs;
for (auto child : children()) {
// if child is cast expr, we need to ensure target data type is the same with storage data type.
// or they are all string type
// and if data type is array, we need to get the nested data type to ensure that.

// Reserve space to avoid multiple reallocations
const size_t estimated_size = children().size();
iterators.reserve(estimated_size);
data_type_with_names.reserve(estimated_size);
column_ids.reserve(estimated_size);
children_exprs.reserve(estimated_size);

auto index_context = context->get_inverted_index_context();

// if child is cast expr, we need to ensure target data type is the same with storage data type.
// or they are all string type
// and if data type is array, we need to get the nested data type to ensure that.
for (const auto& child : children()) {
if (child->node_type() == TExprNodeType::CAST_EXPR) {
auto* cast_expr = assert_cast<VCastExpr*>(child.get());
DCHECK_EQ(cast_expr->children().size(), 1);
Expand Down Expand Up @@ -654,7 +665,11 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase
}
}

for (auto child : children_exprs) {
if (children_exprs.empty()) {
return Status::OK(); // Early exit if no children to process
}

for (const auto& child : children_exprs) {
if (child->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
auto column_id = column_slot_ref->column_id();
Expand Down Expand Up @@ -685,25 +700,21 @@ Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBase
column_literal->get_data_type(), column_literal->expr_name());
}
}
auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
if (iterators.empty()) {
return Status::OK();
}
// If arguments are empty, it means the left value in the expression is not a literal.
if (arguments.empty()) {
return Status::OK();

if (iterators.empty() || arguments.empty()) {
return Status::OK(); // Nothing to evaluate or no literals to compare against
}

auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
auto res = function->evaluate_inverted_index(arguments, data_type_with_names, iterators,
segment_num_rows, result_bitmap);
if (!res.ok()) {
return res;
}
if (!result_bitmap.is_empty()) {
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this,
result_bitmap);
for (auto column_id : column_ids) {
context->get_inverted_index_context()->set_true_for_inverted_index_status(this,
column_id);
index_context->set_inverted_index_result_for_expr(this, result_bitmap);
for (int column_id : column_ids) {
index_context->set_true_for_inverted_index_status(this, column_id);
}
// set fast_execute when expr evaluated by inverted index correctly
_can_fast_execute = true;
Expand Down
5 changes: 0 additions & 5 deletions be/src/vec/functions/array/function_array_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,6 @@ class FunctionArrayIndex : public IFunction {
roaring->cardinality(), result_bitmap);
}
})
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1859,7 +1859,7 @@ public class Config extends ConfigBase {
* Max data version of backends serialize block.
*/
@ConfField(mutable = false)
public static int max_be_exec_version = 5;
public static int max_be_exec_version = 6;

/**
* Min data version of backends serialize block.
Expand Down
67 changes: 67 additions & 0 deletions regression-test/data/variant_p0/rqg/rqg5.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !rqg5 --
0

-- !rqg5_2 --
50

-- !rqg5_3 --
50

-- !rqg5_4 --
50

-- !rqg5_5 --
50

-- !rqg5_6 --
50

-- !rqg5_7 --
0

-- !rqg5_8 --
0

-- !rqg5_9 --
50

-- !rqg5_10 --
50

-- !rqg5_11 --
50

-- !rqg5_12 --
50

-- !rqg5_13 --
50

-- !rqg5_14 --
50

-- !rqg5_15 --
0

-- !rqg5_16 --
100

-- !rqg5_17 --
100

-- !rqg5_18 --
100

-- !rqg5_19 --
100

-- !rqg5_20 --
100

-- !rqg5_21 --
100

-- !rqg5_22 --
0

Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ suite("test_index_compaction_null", "nonConcurrent") {
}

def run_sql = { ->
sql """ set enable_common_expr_pushdown=true """
// select all data
qt_select_0 "SELECT * FROM ${tableName} ORDER BY id"

Expand Down
Loading

0 comments on commit 531284b

Please sign in to comment.