Skip to content

Commit

Permalink
[fix](function) to_bitmap parameter parsing failure returns null inst…
Browse files Browse the repository at this point in the history
…ead of bitmap_empty (#21236)

* [fix](function) to_bitmap parameter parsing failure returns null instead of bitmap_empty

* add ut

* fix nereids

* fix regression-test
  • Loading branch information
zenoyang authored Aug 18, 2023
1 parent aa5e56c commit 1c3cc77
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 57 deletions.
18 changes: 18 additions & 0 deletions be/src/util/bitmap_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -1846,6 +1846,24 @@ class BitmapValue {
return *this;
}

bool operator==(const BitmapValue& rhs) const {
if (_type != rhs._type) return false;

switch (_type) {
case BitmapValue::BitmapDataType::EMPTY:
return true;
case BitmapValue::BitmapDataType::SINGLE:
return _sv == rhs._sv;
case BitmapValue::BitmapDataType::BITMAP:
return _bitmap == rhs._bitmap;
case BitmapValue::BitmapDataType::SET:
return _set == rhs._set;
default:
CHECK(false) << _type;
}
return false;
}

// check if value x is present
bool contains(uint64_t x) const {
switch (_type) {
Expand Down
71 changes: 24 additions & 47 deletions be/src/vec/functions/function_bitmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,55 +86,31 @@ struct BitmapEmpty {

struct ToBitmap {
static constexpr auto name = "to_bitmap";
using ReturnType = DataTypeBitMap;

template <typename ColumnType>
static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
execute<ColumnType, false>(col, nullptr, col_res);
}
template <typename ColumnType>
static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
MutableColumnPtr& col_res) {
execute<ColumnType, true>(col, &nullmap, col_res);
}
template <typename ColumnType, bool arg_is_nullable>
static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
const ColumnString::Chars& data = col->get_chars();
const ColumnString::Offsets& offsets = col->get_offsets();

auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();

for (size_t i = 0; i < size; ++i) {
if (arg_is_nullable && ((*nullmap)[i])) {
continue;
} else {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
raw_str, str_size, &parse_result);
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add(int_value);
}
}
}
} else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = col->size();
using ArgumentType = DataTypeString;

for (size_t i = 0; i < size; ++i) {
if constexpr (arg_is_nullable) {
if ((*nullmap)[i]) {
continue;
}
}
res_data[i].add(col->get_data()[i]);
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
std::vector<BitmapValue>& res_data, NullMap& null_map,
size_t input_rows_count) {
res_data.resize(input_rows_count);
if (offsets.size() == 0 && input_rows_count == 1) {
// For NULL constant
res_data.emplace_back();
null_map[0] = 1;
return Status::OK();
}
for (size_t i = 0; i < input_rows_count; ++i) {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(raw_str, str_size,
&parse_result);
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add(int_value);
} else {
null_map[i] = 1;
}
}
return Status::OK();
}
};

Expand Down Expand Up @@ -311,6 +287,7 @@ class FunctionBitmapAlwaysNull : public IFunction {
auto& res = res_data_column->get_data();

ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
// todo(zeno) int can avoid cast into string or char
if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) {
const auto& str_column = static_cast<const ColumnString&>(*argument_column);
const ColumnString::Chars& data = str_column.get_chars();
Expand Down Expand Up @@ -1105,7 +1082,7 @@ class FunctionBitmapToArray : public IFunction {
};

using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
using FunctionToBitmap = FunctionBitmapAlwaysNull<ToBitmap>;
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>;

using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
Expand Down
27 changes: 27 additions & 0 deletions be/test/vec/function/function_bitmap_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "testutil/any_type.h"
#include "util/bitmap_value.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
Expand Down Expand Up @@ -207,4 +208,30 @@ TEST(function_bitmap_test, function_bitmap_has_all) {
check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
}

TEST(function_bitmap_test, function_to_bitmap) {
std::string func_name = "to_bitmap";
InputTypeSet input_types = {TypeIndex::String};

BitmapValue bitmap1(1);
DataSet data_set = {{{std::string("1")}, &bitmap1},
{{std::string("-1")}, Null()},
{{std::string("1.11")}, Null()},
{{Null()}, Null()}};

check_function<DataTypeBitMap, true>(func_name, input_types, data_set);
}

TEST(function_bitmap_test, function_bitmap_from_string) {
std::string func_name = "bitmap_from_string";
InputTypeSet input_types = {TypeIndex::String};

BitmapValue bitmap1(1);
DataSet data_set = {{{std::string("1")}, &bitmap1},
{{std::string("-1")}, Null()},
{{std::string("1.11")}, Null()},
{{Null()}, Null()}};

check_function<DataTypeBitMap, true>(func_name, input_types, data_set);
}

} // namespace doris::vectorized
22 changes: 22 additions & 0 deletions be/test/vec/function/function_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/simple_function_factory.h"
Expand Down Expand Up @@ -290,6 +291,17 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty
ColumnPtr column = block.get_columns()[result];
EXPECT_TRUE(column != nullptr);

ColumnPtr nested_col = nullptr;
ColumnPtr null_map_col = ColumnUInt8::create(row_size, 0);
if (doris::vectorized::is_column_nullable(*column)) {
const auto& null_column = check_and_get_column<ColumnNullable>(column);
nested_col = null_column->get_nested_column_ptr();
null_map_col = null_column->get_null_map_column_ptr();
} else {
nested_col = column;
}
const auto& null_map = check_and_get_column<ColumnUInt8>(null_map_col)->get_data();

for (int i = 0; i < row_size; ++i) {
auto check_column_data = [&]() {
if constexpr (std::is_same_v<ReturnType, DataTypeJsonb>) {
Expand All @@ -303,6 +315,16 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty
EXPECT_EQ(expect_data, JsonbToJson::jsonb_to_json_string(s.data, s.size))
<< " at row " << i;
}
} else if constexpr (std::is_same_v<ReturnType, DataTypeBitMap>) {
const auto* expect_data =
any_cast<typename ReturnType::FieldType*>(data_set[i].second);
if (null_map[i]) {
EXPECT_TRUE(expect_data->empty()) << " at row " << i;
} else {
auto& bitmap_value =
check_and_get_column<ColumnBitmap>(nested_col)->get_data()[i];
EXPECT_EQ(*expect_data, bitmap_value) << " at row " << i;
}
} else {
Field field;
column->get(i, field);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
Expand All @@ -36,7 +36,7 @@
* ScalarFunction 'to_bitmap'. This class is generated by GenerateFunction.
*/
public class ToBitmap extends ScalarFunction
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable {
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable {

public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(BitmapType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT),
Expand Down
6 changes: 3 additions & 3 deletions gensrc/script/doris_builtins_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1758,11 +1758,11 @@

#bitmap function
"Bitmap": [
[['to_bitmap'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap'], 'BITMAP', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap'], 'BITMAP', ['STRING'], 'ALWAYS_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap'], 'BITMAP', ['BIGINT'], 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap'], 'BITMAP', ['BIGINT'], 'ALWAYS_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['BIGINT'], 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash64'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ true
\N

-- !sql_bitmap_has_all_Bitmap_Bitmap --
true
\N
true
true
true
Expand Down Expand Up @@ -348,7 +348,7 @@ true
true

-- !sql_bitmap_has_any_Bitmap_Bitmap --
false
\N
true
true
true
Expand Down Expand Up @@ -696,7 +696,7 @@ true
\N

-- !sql_bitmap_to_string_Bitmap --

\N
1
2
3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ true
1

-- !sql --

\N

-- !sql --
\N
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ true
1

-- !sql --

\N

-- !sql --
\N
Expand Down

0 comments on commit 1c3cc77

Please sign in to comment.