Skip to content

Commit

Permalink
[fix](ub) undefined behavior in FixedContainer (apache#39191)
Browse files Browse the repository at this point in the history
## Proposed changes

Undefined behavior occurs if there is a null value in the list.

```
/root/doris/be/src/vec/common/string_ref.h:271:54: runtime error: null pointer passed as argument 2, which is declared to never be null
/var/local/ldb-toolchain/bin/../usr/include/string.h:64:33: note: nonnull attribute specified here
#0 0x5616d072245d in doris::StringRef::eq(doris::StringRef const&) const /root/doris/be/src/vec/common/string_ref.h:271:41
#1 0x5616d072245d in doris::StringRef::operator==(doris::StringRef const&) const /root/doris/be/src/vec/common/string_ref.h:274:60
#2 0x5616d072245d in doris::FixedContainer::find(doris::StringRef const&) const /root/doris/be/src/exprs/hybrid_set.h:76:36
#3 0x5616d072245d in void doris::StringValueSet>::_find_batch(doris::vectorized::IColumn const&, unsigned long, doris::vectorized::PODArray, 16ul, 15ul> const*, doris::vectorized::PODArray, 16ul, 15ul>&) /root/doris/be/src/exprs/hybrid_set.h:688:63
#4 0x5616d0747857 in doris::vectorized::FunctionIn::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /root/doris/be/src/vec/functions/in.h:170:21
#5 0x5616c741fa3a in doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /root/doris/be/src/vec/functions/function.h:462:26
#6 0x5616cbb5b650 in doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp
#7 0x5616cbb4e14e in doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:244:12
#8 0x5616cbb4e3c2 in doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:250:12
#9 0x5616c741cd68 in doris::vectorized::IFunctionBase::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.h:190:19
#10 0x5616c74cf712 in doris::vectorized::VInPredicate::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vin_predicate.cpp:130:5
#11 0x5616c740d5c0 in doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*, std::vector>&) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:183:9
#12 0x5616c740ecf5 in doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:215:12
#13 0x5616c7462e24 in doris::vectorized::VCompoundPred::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vcompound_pred.h:127:38
apache#14 0x5616c74bccec in doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vexpr_context.cpp:54:5
apache#15 0x5616c74c1dcc in doris::vectorized::VExprContext::execute_conjuncts(std::vector, std::allocator>> const&, std::vector, 16ul, 15ul>, std::allocator, 16ul, 15ul>>> const*, bool, doris::vectorized::Block*, doris::vectorized::PODArray, 16ul, 15ul>, bool) /root/doris/be/src/vec/exprs/vexpr_context.cpp:169:9
apache#16 0x5616c74c5108 in doris::vectorized::VExprContext::execute_conjuncts_and_filter_block(std::vector, std::allocator>> const&, doris::vectorized::Block*, std::vector>&, int, doris::vectorized::PODArray, 16ul, 15ul>&) /root/doris/be/src/vec/exprs/vexpr_context.cpp:322:5
apache#17 0x5616ad8a7f1a in doris::segment_v2::SegmentIterator::_execute_common_expr(unsigned short*, unsigned short&, doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2680:5
apache#18 0x5616ad89e86e in doris::segment_v2::SegmentIterator::_next_batch_internal(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2582:25
apache#19 0x5616ad892f5c in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)::$_0::operator()() const /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2315:9
apache#20 0x5616ad892f5c in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2314:19
apache#21 0x5616ad6dd9cc in doris::segment_v2::LazyInitSegmentIterator::next_batch(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.h:44:33
apache#22 0x5616ad269d67 in doris::BetaRowsetReader::next_block(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/beta_rowset_reader.cpp:380:29
apache#23 0x5616de6de110 in doris::vectorized::VCollectIterator::Level0Iterator::_refresh() /root/doris/be/src/vec/olap/vcollect_iterator.h
apache#24 0x5616de6c967f in doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:514:24
apache#25 0x5616de6ca8a6 in doris::vectorized::VCollectIterator::Level0Iterator::ensure_first_row_ref() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:493:14
apache#26 0x5616de6d7008 in doris::vectorized::VCollectIterator::Level1Iterator::ensure_first_row_ref() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:692:27
apache#27 0x5616de6bd200 in doris::vectorized::VCollectIterator::build_heap(std::vector, std::allocator>>&) /root/doris/be/src/vec/olap/vcollect_iterator.cpp:186:9
apache#28 0x5616de651b6c in doris::vectorized::BlockReader::_init_collect_iter(doris::TabletReader::ReaderParams const&) /root/doris/be/src/vec/olap/block_reader.cpp:157:5
apache#29 0x5616de65526f in doris::vectorized::BlockReader::init(doris::TabletReader::ReaderParams const&) /root/doris/be/src/vec/olap/block_reader.cpp:229:19
apache#30 0x5616e175a0f9 in doris::vectorized::NewOlapScanner::open(doris::RuntimeState*) /root/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:237:32
apache#31 0x5616c736ad34 in doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr, std::shared_ptr) /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:236:5
apache#32 0x5616c736f05e in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:176:21
apache#33 0x5616c736f05e in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:175:31
apache#34 0x5616c736f05e in void std::_invoke_impl, std::shared_ptr)::$_1::operator()() const::'lambda'()&>(std::_invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14
apache#35 0x5616c736f05e in std::enable_if, std::shared_ptr)::$1::operator()() const::'lambda'()&>, void>::type std::_invoke_r, std::shared_ptr)::$_1::operator()() const::'lambda'()&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2
apache#36 0x5616c736f05e in std::_Function_handler, std::shared_ptr)::$_1::operator()() const::'lambda'()>::_M_invoke(std::_Any_data const&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9
apache#37 0x5616aeed6a3b in doris::ThreadPool::dispatch_thread() /root/doris/be/src/util/threadpool.cpp:543:24
apache#38 0x5616aeeae4f7 in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5
apache#39 0x7f7e663e3ac2 in start_thread nptl/pthread_create.c:442:8
apache#40 0x7f7e6647584f misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /root/doris/be/src/vec/common/string_ref.h:271:54 in
```
  • Loading branch information
mrhhsg authored Aug 10, 2024
1 parent a1ba0a1 commit 7697f02
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 1 deletion.
35 changes: 35 additions & 0 deletions be/src/exprs/hybrid_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@

#pragma once

#include <glog/logging.h>

#include <type_traits>

#include "common/exception.h"
#include "common/object_pool.h"
#include "common/status.h"
#include "exprs/runtime_filter.h"
#include "runtime/decimalv2_value.h"
#include "runtime/define_primitive_type.h"
Expand Down Expand Up @@ -60,8 +66,16 @@ class FixedContainer {
}
}

void check_size() {
if (N != _size) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"invalid size of FixedContainer<{}>: {}", N, _size);
}
}

// Use '|' instead of '||' has better performance by test.
ALWAYS_INLINE bool find(const T& value) const {
DCHECK_EQ(N, _size);
if constexpr (N == 0) {
return false;
}
Expand Down Expand Up @@ -144,6 +158,12 @@ class FixedContainer {
size_t _size {};
};

template <typename T>
struct IsFixedContainer : std::false_type {};

template <typename T, size_t N>
struct IsFixedContainer<FixedContainer<T, N>> : std::true_type {};

/**
* Dynamic Container uses phmap::flat_hash_set.
* @tparam T Element Type
Expand Down Expand Up @@ -354,6 +374,11 @@ class HybridSet : public HybridSetBase {
if constexpr (is_nullable) {
null_map_data = null_map->data();
}

if constexpr (IsFixedContainer<ContainerType>::value) {
_set.check_size();
}

auto* __restrict result_data = results.data();
for (size_t i = 0; i < rows; ++i) {
if constexpr (!is_nullable && !is_negative) {
Expand Down Expand Up @@ -507,6 +532,11 @@ class StringSet : public HybridSetBase {
if constexpr (is_nullable) {
null_map_data = null_map->data();
}

if constexpr (IsFixedContainer<ContainerType>::value) {
_set.check_size();
}

auto* __restrict result_data = results.data();
for (size_t i = 0; i < rows; ++i) {
const auto& string_data = col.get_data_at(i).to_string();
Expand Down Expand Up @@ -675,6 +705,11 @@ class StringValueSet : public HybridSetBase {
if constexpr (is_nullable) {
null_map_data = null_map->data();
}

if constexpr (IsFixedContainer<ContainerType>::value) {
_set.check_size();
}

auto* __restrict result_data = results.data();
for (size_t i = 0; i < rows; ++i) {
uint32_t len = offset[i] - offset[i - 1];
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/functions/in.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ class FunctionIn : public IFunction {
context->get_arg_type(0)->type == PrimitiveType::TYPE_VARCHAR ||
context->get_arg_type(0)->type == PrimitiveType::TYPE_STRING) {
// the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
state->hybrid_set.reset(create_string_value_set((size_t)(context->get_num_args() - 1)));
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
} else {
state->hybrid_set.reset(
create_set(context->get_arg_type(0)->type, get_size_with_out_null(context)));
Expand Down
9 changes: 9 additions & 0 deletions regression-test/data/nereids_syntax_p0/inpredicate.out
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,12 @@
29 Supplier#000000029 VVSymB3fbwaN ARGENTINA4 ARGENTINA AMERICA 11-773-203-7342
9 Supplier#000000009 ,gJ6K2MKveYxQT IRAN 6 IRAN MIDDLE EAST 20-338-906-3675

-- !in_predicate_11 --
15 Supplier#000000015 DF35PepL5saAK INDIA 0 INDIA ASIA 18-687-542-7601

-- !in_predicate_12 --

-- !in_predicate_13 --

-- !in_predicate_14 --

16 changes: 16 additions & 0 deletions regression-test/suites/nereids_syntax_p0/inpredicate.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,21 @@ suite("inpredicate") {
order_qt_in_predicate_10 """
SELECT * FROM supplier WHERE s_suppkey not in (15);
"""

order_qt_in_predicate_11 """
SELECT * FROM supplier WHERE s_suppkey in (15, null);
"""

order_qt_in_predicate_12 """
SELECT * FROM supplier WHERE s_suppkey not in (15, null);
"""

order_qt_in_predicate_13 """
SELECT * FROM supplier WHERE s_nation in ('PERU', 'ETHIOPIA', null);
"""

order_qt_in_predicate_14 """
SELECT * FROM supplier WHERE s_nation not in ('PERU', 'ETHIOPIA', null);
"""
}

0 comments on commit 7697f02

Please sign in to comment.