Skip to content

Commit

Permalink
[fix](arrays_overlap) support arrays overlap with inverted index (#42090
Browse files Browse the repository at this point in the history
)

## Proposed changes
backport : #41286
#41495
Issue Number: close #xxx

<!--Describe your changes.-->
  • Loading branch information
amorynan authored Oct 18, 2024
1 parent dde0bf9 commit 5db44a1
Show file tree
Hide file tree
Showing 4 changed files with 1,094 additions and 25 deletions.
19 changes: 3 additions & 16 deletions be/src/vec/functions/array/function_array_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,22 +130,9 @@ class FunctionArrayIndex : public IFunction {
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
if (is_string_type(param_type)) {
Status st = iter->read_from_inverted_index(
data_type_with_name.first, query_param->get_value(),
segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, roaring);
if (st.code() == ErrorCode::INVERTED_INDEX_NO_TERMS) {
// if analyzed param with no term, we do not filter any rows
// return all rows with OK status
roaring->addRange(0, num_rows);
} else if (st != Status::OK()) {
return st;
}
} else {
RETURN_IF_ERROR(iter->read_from_inverted_index(
data_type_with_name.first, query_param->get_value(),
segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, roaring));
}
RETURN_IF_ERROR(iter->read_from_inverted_index(
data_type_with_name.first, query_param->get_value(),
segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, roaring));
// here debug for check array_contains function really filter rows by inverted index correctly
DBUG_EXECUTE_IF("array_func.array_contains", {
auto result_bitmap = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
Expand Down
78 changes: 78 additions & 0 deletions be/src/vec/functions/array/function_arrays_overlap.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "vec/data_types/data_type_number.h"
#include "vec/functions/array/function_array_utils.h"
#include "vec/functions/function.h"
#include "vec/functions/function_helpers.h"

namespace doris {
class FunctionContext;
Expand Down Expand Up @@ -127,6 +128,83 @@ class FunctionArraysOverlap : public IFunction {
return make_nullable(std::make_shared<DataTypeUInt8>());
}

/**
* eval inverted index. we can filter array rows with inverted index iter
* array_overlap(array, []) -> array_overlap(array, const value)
*/
Status evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
DCHECK(arguments.size() == 1);
DCHECK(data_type_with_names.size() == 1);
DCHECK(iterators.size() == 1);
auto* iter = iterators[0];
if (iter == nullptr) {
return Status::OK();
}
auto data_type_with_name = data_type_with_names[0];
if (iter->get_inverted_index_reader_type() ==
segment_v2::InvertedIndexReaderType::FULLTEXT) {
return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
"Inverted index evaluate skipped, FULLTEXT reader can not support "
"array_overlap");
}
// in arrays_overlap param is array Field and const Field
ColumnPtr arg_column = arguments[0].column;
DataTypePtr arg_type = arguments[0].type;
if ((is_column_nullable(*arg_column) && !is_column_const(*remove_nullable(arg_column))) ||
(!is_column_nullable(*arg_column) && !is_column_const(*arg_column))) {
// if not we should skip inverted index and evaluate in expression
return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
"Inverted index evaluate skipped, array_overlap only support const value");
}

Field param_value;
arguments[0].column->get(0, param_value);
DCHECK(is_array(remove_nullable(arguments[0].type)));
auto nested_param_type =
check_and_get_data_type<DataTypeArray>(remove_nullable(arguments[0].type).get())
->get_nested_type()
->get_type_as_type_descriptor()
.type;
// The current implementation for the inverted index of arrays cannot handle cases where the array contains null values,
// meaning an item in the array is null.
if (param_value.is_null()) {
return Status::OK();
}
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
const Array& query_val = param_value.get<Array>();
for (auto nested_query_val : query_val) {
// any element inside array is NULL, return NULL
// by current arrays_overlap execute logic.
if (nested_query_val.is_null()) {
return Status::OK();
}
std::shared_ptr<roaring::Roaring> single_res = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
nested_param_type, &nested_query_val, query_param));
RETURN_IF_ERROR(iter->read_from_inverted_index(
data_type_with_name.first, query_param->get_value(),
segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, single_res));
*roaring |= *single_res;
}

segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();

return Status::OK();
}

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
auto left_column =
Expand Down
Loading

0 comments on commit 5db44a1

Please sign in to comment.