From b4e8599d205244a702ff6d5252520cc1cef7c57e Mon Sep 17 00:00:00 2001 From: jinhelin Date: Tue, 13 Aug 2024 13:57:03 +0800 Subject: [PATCH] Storages: Add whether has null in the result of MinMaxIndex (#9288) ref pingcap/tiflash#9103 1. Refine `RSResult`, make it can express whether null value is contained. 2. In `MinMaxIndex`, check if the pack has null value, and if so, add information containing the null value to the returned result. Co-authored-by: JaySon Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- .../Storages/DeltaMerge/DeltaMergeDefines.h | 8 + .../DeltaMerge/DeltaMergeStore_InternalBg.cpp | 8 +- .../Storages/DeltaMerge/File/ColumnStream.cpp | 4 +- .../DeltaMerge/File/DMFilePackFilter.cpp | 32 +- .../DeltaMerge/File/DMFilePackFilter.h | 6 +- .../Storages/DeltaMerge/File/DMFileReader.cpp | 14 +- dbms/src/Storages/DeltaMerge/Filter/Like.h | 4 +- .../Storages/DeltaMerge/Filter/Unsupported.h | 2 +- .../Storages/DeltaMerge/Index/MinMaxIndex.cpp | 40 +- .../Storages/DeltaMerge/Index/MinMaxIndex.h | 2 + .../Storages/DeltaMerge/Index/RSResult.cpp | 56 ++ dbms/src/Storages/DeltaMerge/Index/RSResult.h | 130 ++-- .../Storages/DeltaMerge/Index/RoughCheck.h | 28 +- dbms/src/Storages/DeltaMerge/ScanContext.cpp | 1 + dbms/src/Storages/DeltaMerge/ScanContext.h | 6 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 2 +- .../Storages/DeltaMerge/StableValueSpace.cpp | 4 +- .../tests/gtest_dm_minmax_index.cpp | 644 ++++++++++++++---- .../DeltaMerge/tests/gtest_rs_result.cpp | 120 ++++ 19 files changed, 844 insertions(+), 267 deletions(-) create mode 100644 dbms/src/Storages/DeltaMerge/Index/RSResult.cpp create mode 100644 dbms/src/Storages/DeltaMerge/tests/gtest_rs_result.cpp diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h b/dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h index 0f1ebd33cd8..bc93fc5d5db 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeDefines.h @@ -169,6 +169,14 @@ static_assert( static constexpr bool DM_RUN_CHECK = true; +struct Attr +{ + String col_name; + ColId col_id; + DataTypePtr type; +}; +using Attrs = std::vector; + } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp index d570c9dda9e..c225187bab3 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore_InternalBg.cpp @@ -634,8 +634,8 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange( "GC - shouldCompactStableWithTooMuchDataOutOfSegmentRange checked false " "because segment DTFile is shared with a neighbor segment, " "first_pack_inc={} last_pack_inc={} prev_seg_files=[{}] next_seg_files=[{}] my_files=[{}] segment={}", - magic_enum::enum_name(at_least_result.first_pack_intersection), - magic_enum::enum_name(at_least_result.last_pack_intersection), + at_least_result.first_pack_intersection, + at_least_result.last_pack_intersection, fmt::join(prev_segment_file_ids, ","), fmt::join(next_segment_file_ids, ","), [&] { @@ -687,8 +687,8 @@ bool shouldCompactStableWithTooMuchDataOutOfSegmentRange( "check_result={} first_pack_inc={} last_pack_inc={} rows_at_least={} bytes_at_least={} file_rows={} " "file_bytes={} segment={} ", check_result, - magic_enum::enum_name(at_least_result.first_pack_intersection), - magic_enum::enum_name(at_least_result.last_pack_intersection), + at_least_result.first_pack_intersection, + at_least_result.last_pack_intersection, at_least_result.rows, at_least_result.bytes, file_rows, diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index 6ee1ebdbae1..f7c4c31f40a 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -160,7 +160,7 @@ std::unique_ptr ColumnReadStream::buildColDataRe const auto & pack_res = reader.pack_filter.getPackResConst(); for (size_t i = 0; i < n_packs; /*empty*/) { - if (!isUse(pack_res[i])) + if (!pack_res[i].isUse()) { ++i; continue; @@ -168,7 +168,7 @@ std::unique_ptr ColumnReadStream::buildColDataRe size_t cur_offset_in_file = getOffsetInFile(i); size_t end = i + 1; // First, find the end of current available range. - while (end < n_packs && isUse(pack_res[end])) + while (end < n_packs && pack_res[end].isUse()) ++end; // Second, if the end of range is inside the block, we will need to read it too. diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 847c79ae4ea..e2e0f73c631 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -82,8 +82,8 @@ void DMFilePackFilter::init(ReadTag read_tag) pack_res.begin(), [](RSResult a, RSResult b) { return a && b; }); } - auto [none_count, some_count, all_count] = countPackRes(); - auto after_filter = some_count + all_count; + auto [none_count, some_count, all_count, all_null_count] = countPackRes(); + auto after_filter = some_count + all_count + all_null_count; ProfileEvents::increment(ProfileEvents::DMFileFilterAftRoughSet, after_filter); // In table scanning, DMFilePackFilter of a DMFile may be created several times: // 1. When building MVCC bitmap (ReadTag::MVCC). @@ -96,6 +96,7 @@ void DMFilePackFilter::init(ReadTag read_tag) scan_context->rs_pack_filter_none += none_count; scan_context->rs_pack_filter_some += some_count; scan_context->rs_pack_filter_all += all_count; + scan_context->rs_pack_filter_all_null += all_null_count; } Float64 filter_rate = 0.0; @@ -107,7 +108,8 @@ void DMFilePackFilter::init(ReadTag read_tag) LOG_DEBUG( log, "RSFilter exclude rate: {:.2f}, after_pk: {}, after_read_packs: {}, after_filter: {}, handle_ranges: {}" - ", read_packs: {}, pack_count: {}, none_count: {}, some_count: {}, all_count: {}, read_tag: {}", + ", read_packs: {}, pack_count: {}, none_count: {}, some_count: {}, all_count: {}, all_null_count: {}, " + "read_tag: {}", ((after_read_packs == 0) ? std::numeric_limits::quiet_NaN() : filter_rate), after_pk, after_read_packs, @@ -118,37 +120,33 @@ void DMFilePackFilter::init(ReadTag read_tag) none_count, some_count, all_count, + all_null_count, magic_enum::enum_name(read_tag)); } -std::tuple DMFilePackFilter::countPackRes() const +std::tuple DMFilePackFilter::countPackRes() const { UInt64 none_count = 0; UInt64 some_count = 0; UInt64 all_count = 0; + UInt64 all_null_count = 0; for (auto res : pack_res) { - switch (res) - { - case RSResult::None: + if (res == RSResult::None || res == RSResult::NoneNull) ++none_count; - break; - case RSResult::Some: + else if (res == RSResult::Some || res == RSResult::SomeNull) ++some_count; - break; - case RSResult::All: + else if (res == RSResult::All) ++all_count; - break; - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "{} is invalid", static_cast(res)); - } + else if (res == RSResult::AllNull) + ++all_null_count; } - return {none_count, some_count, all_count}; + return {none_count, some_count, all_count, all_null_count}; } UInt64 DMFilePackFilter::countUsePack() const { - return std::count_if(pack_res.cbegin(), pack_res.cend(), [](RSResult res) { return isUse(res); }); + return std::count_if(pack_res.cbegin(), pack_res.cend(), [](RSResult res) { return res.isUse(); }); } void DMFilePackFilter::loadIndex( diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index c531f4fb9cb..4b5af084769 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -106,7 +106,7 @@ class DMFilePackFilter const auto & pack_stats = dmfile->getPackStats(); for (size_t i = 0; i < pack_stats.size(); ++i) { - if (isUse(pack_res[i])) + if (pack_res[i].isUse()) { rows += pack_stats[i].rows; bytes += pack_stats[i].bytes; @@ -157,8 +157,8 @@ class DMFilePackFilter void tryLoadIndex(ColId col_id); - // None, Some, All - std::tuple countPackRes() const; + // None+NoneNull, Some+SomeNull, All, AllNull + std::tuple countPackRes() const; private: DMFilePtr dmfile; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 027f790d227..658dcaf0745 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -110,7 +110,7 @@ bool DMFileReader::getSkippedRows(size_t & skip_rows) skip_rows = 0; const auto & pack_res = pack_filter.getPackResConst(); const auto & pack_stats = dmfile->getPackStats(); - for (; next_pack_id < pack_res.size() && !isUse(pack_res[next_pack_id]); ++next_pack_id) + for (; next_pack_id < pack_res.size() && !pack_res[next_pack_id].isUse(); ++next_pack_id) { skip_rows += pack_stats[next_pack_id].rows; addSkippedRows(pack_stats[next_pack_id].rows); @@ -151,7 +151,7 @@ size_t DMFileReader::getReadRows() const size_t read_pack_limit = read_one_pack_every_time ? 1 : std::numeric_limits::max(); const auto & pack_stats = dmfile->getPackStats(); size_t read_rows = 0; - for (; next_pack_id < pack_res.size() && isUse(pack_res[next_pack_id]) && read_rows < rows_threshold_per_read; + for (; next_pack_id < pack_res.size() && pack_res[next_pack_id].isUse() && read_rows < rows_threshold_per_read; ++next_pack_id) { if (next_pack_id - start_pack_id >= read_pack_limit) @@ -225,7 +225,7 @@ Block DMFileReader::readWithFilter(const IColumn::Filter & filter) // The algorithm runs as follows: // When i = next_pack_id + 2, call read() to read {next_pack_id, next_pack_id + 1}th packs // When i = next_pack_id + 5, call read() to read {next_pack_id + 3, next_pack_id + 4, next_pack_id + 5}th packs - if (isUse(pack_res[pack_id]) && (pack_id + 1 == pack_res.size() || !isUse(pack_res[pack_id + 1]))) + if (pack_res[pack_id].isUse() && (pack_id + 1 == pack_res.size() || !pack_res[pack_id + 1].isUse())) { Block block = read(); size_t rows = block.rows(); @@ -256,7 +256,7 @@ Block DMFileReader::readWithFilter(const IColumn::Filter & filter) } offset += rows; } - else if (!isUse(pack_res[pack_id])) + else if (!pack_res[pack_id].isUse()) { offset += pack_stats[pack_id].rows; } @@ -327,14 +327,14 @@ Block DMFileReader::read() for (size_t i = start_pack_id; i < next_pack_id; ++i) { // If all handle in a pack are in the given range, and del column do clean read, we do not need to read handle column. - if (handle_res[i] == All + if (handle_res[i] == RSResult::All && std::find(del_column_clean_read_packs.cbegin(), del_column_clean_read_packs.cend(), i) != del_column_clean_read_packs.cend()) { handle_column_clean_read_packs.push_back(i); } // If all handle in a pack are in the given range, but disable del clean read, we do not need to read handle column. - else if (!enable_del_clean_read && handle_res[i] == All) + else if (!enable_del_clean_read && handle_res[i] == RSResult::All) { handle_column_clean_read_packs.push_back(i); } @@ -350,7 +350,7 @@ Block DMFileReader::read() { // If all handle in a pack are in the given range, no not_clean rows, and max version <= max_read_version, // we do not need to read handle column. - if (handle_res[i] == All && pack_stats[i].not_clean == 0 + if (handle_res[i] == RSResult::All && pack_stats[i].not_clean == 0 && pack_filter.getMaxVersion(i) <= max_read_version) { handle_column_clean_read_packs.push_back(i); diff --git a/dbms/src/Storages/DeltaMerge/Filter/Like.h b/dbms/src/Storages/DeltaMerge/Filter/Like.h index 2ed94096dd3..2fd433545b6 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/Like.h +++ b/dbms/src/Storages/DeltaMerge/Filter/Like.h @@ -30,8 +30,8 @@ class Like : public ColCmpVal RSResults roughCheck(size_t /*start_pack*/, size_t pack_count, const RSCheckParam & /*param*/) override { - return RSResults(pack_count, Some); + return RSResults(pack_count, RSResult::Some); } }; -} // namespace DB::DM \ No newline at end of file +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Filter/Unsupported.h b/dbms/src/Storages/DeltaMerge/Filter/Unsupported.h index ee3d4f1b414..6eb68e5703b 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/Unsupported.h +++ b/dbms/src/Storages/DeltaMerge/Filter/Unsupported.h @@ -36,7 +36,7 @@ class Unsupported : public RSOperator RSResults roughCheck(size_t /*start_pack*/, size_t pack_count, const RSCheckParam & /*param*/) override { - return RSResults(pack_count, Some); + return RSResults(pack_count, RSResult::Some); } }; diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index 25bc366b5ce..2f40ed1d7bc 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -95,6 +95,8 @@ inline std::pair minmax( // If the minimum value is null, this minmax index is generated before v6.4.0. // For compatibility, the filter result of the corresponding pack should be Some, // and the upper layer will read the pack data to perform the filter calculation. +// +// TODO: avoid hitting this compatibility check when all the fields of a pack are null or deleted. ALWAYS_INLINE bool minIsNull(const DB::ColumnUInt8 & null_map, size_t i) { return null_map.getElement(i * 2); @@ -234,7 +236,7 @@ RSResults MinMaxIndex::checkNullableInImpl( const std::vector & values, const DataTypePtr & type) { - RSResults results(pack_count, RSResult::Some); + RSResults results(pack_count, RSResult::SomeNull); const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { @@ -242,7 +244,8 @@ RSResults MinMaxIndex::checkNullableInImpl( continue; auto min = minmaxes_data[i * 2]; auto max = minmaxes_data[i * 2 + 1]; - results[i - start_pack] = RoughCheck::CheckIn::check(values, type, min, max); + auto value_result = RoughCheck::CheckIn::check(values, type, min, max); + results[i - start_pack] = addNullIfHasNull(value_result, i); } return results; } @@ -256,7 +259,7 @@ RSResults MinMaxIndex::checkNullableIn( const auto & column_nullable = static_cast(*minmaxes); const auto & null_map = column_nullable.getNullMapColumn(); - RSResults results(pack_count, RSResult::Some); + RSResults results(pack_count, RSResult::SomeNull); const auto * raw_type = type.get(); #define DISPATCH(TYPE) \ @@ -292,7 +295,8 @@ RSResults MinMaxIndex::checkNullableIn( pos = i * 2 + 1; prev_offset = offsets[pos - 1]; auto max = String(chars[prev_offset], offsets[pos] - prev_offset - 1); - results[i - start_pack] = RoughCheck::CheckIn::check(values, type, min, max); + auto value_result = RoughCheck::CheckIn::check(values, type, min, max); + results[i - start_pack] = addNullIfHasNull(value_result, i); } return results; } @@ -335,7 +339,8 @@ RSResults MinMaxIndex::checkInImpl( continue; auto min = minmaxes_data[i * 2]; auto max = minmaxes_data[i * 2 + 1]; - results[i - start_pack] = RoughCheck::CheckIn::check(values, type, min, max); + auto value_result = RoughCheck::CheckIn::check(values, type, min, max); + results[i - start_pack] = addNullIfHasNull(value_result, i); } return results; } @@ -383,7 +388,8 @@ RSResults MinMaxIndex::checkIn( pos = i * 2 + 1; prev_offset = offsets[pos - 1]; auto max = String(reinterpret_cast(&chars[prev_offset]), offsets[pos] - prev_offset - 1); - results[i - start_pack] = RoughCheck::CheckIn::check(values, type, min, max); + auto value_result = RoughCheck::CheckIn::check(values, type, min, max); + results[i - start_pack] = addNullIfHasNull(value_result, i); } return results; } @@ -406,7 +412,8 @@ RSResults MinMaxIndex::checkCmpImpl(size_t start_pack, size_t pack_count, const continue; auto min = minmaxes_data[i * 2]; auto max = minmaxes_data[i * 2 + 1]; - results[i - start_pack] = Op::template check(value, type, min, max); + auto value_result = Op::template check(value, type, min, max); + results[i - start_pack] = addNullIfHasNull(value_result, i); } return results; } @@ -452,7 +459,8 @@ RSResults MinMaxIndex::checkCmp(size_t start_pack, size_t pack_count, const Fiel pos = i * 2 + 1; prev_offset = offsets[pos - 1]; auto max = String(reinterpret_cast(&chars[prev_offset]), offsets[pos] - prev_offset - 1); - results[i - start_pack] = Op::template check(value, type, min, max); + auto value_result = Op::template check(value, type, min, max); + results[i - start_pack] = addNullIfHasNull(value_result, i); } return results; } @@ -489,7 +497,7 @@ RSResults MinMaxIndex::checkNullableCmpImpl( const Field & value, const DataTypePtr & type) { - RSResults results(pack_count, RSResult::Some); + RSResults results(pack_count, RSResult::SomeNull); const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { @@ -497,7 +505,8 @@ RSResults MinMaxIndex::checkNullableCmpImpl( continue; auto min = minmaxes_data[i * 2]; auto max = minmaxes_data[i * 2 + 1]; - results[i - start_pack] = Op::template check(value, type, min, max); + auto value_result = Op::template check(value, type, min, max); + results[i - start_pack] = addNullIfHasNull(value_result, i); } return results; } @@ -512,7 +521,7 @@ RSResults MinMaxIndex::checkNullableCmp( const auto & column_nullable = static_cast(*minmaxes); const auto & null_map = column_nullable.getNullMapColumn(); - RSResults results(pack_count, RSResult::Some); + RSResults results(pack_count, RSResult::SomeNull); const auto * raw_type = type.get(); #define DISPATCH(TYPE) \ @@ -548,7 +557,8 @@ RSResults MinMaxIndex::checkNullableCmp( pos = i * 2 + 1; prev_offset = offsets[pos - 1]; auto max = String(chars[prev_offset], offsets[pos] - prev_offset - 1); - results[i - start_pack] = Op::template check(value, type, min, max); + auto value_result = Op::template check(value, type, min, max); + results[i - start_pack] = addNullIfHasNull(value_result, i); } return results; } @@ -591,4 +601,10 @@ RSResults MinMaxIndex::checkIsNull(size_t start_pack, size_t pack_count) return results; } +RSResult MinMaxIndex::addNullIfHasNull(RSResult value_result, size_t i) const +{ + if (has_null_marks[i]) + value_result.setHasNull(); + return value_result; +} } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h index 9f617c2969b..65b52c27d41 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h @@ -113,6 +113,8 @@ class MinMaxIndex const std::vector & values, const DataTypePtr & type); + RSResult addNullIfHasNull(RSResult value_result, size_t i) const; + PaddedPODArray has_null_marks; PaddedPODArray has_value_marks; MutableColumnPtr minmaxes; diff --git a/dbms/src/Storages/DeltaMerge/Index/RSResult.cpp b/dbms/src/Storages/DeltaMerge/Index/RSResult.cpp new file mode 100644 index 00000000000..a31eb2a59dc --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/Index/RSResult.cpp @@ -0,0 +1,56 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB::DM +{ +const RSResult RSResult::Some(RSResult::ValueResult::Some, false); +const RSResult RSResult::None(RSResult::ValueResult::None, false); +const RSResult RSResult::All(RSResult::ValueResult::All, false); +const RSResult RSResult::SomeNull(RSResult::ValueResult::Some, true); +const RSResult RSResult::NoneNull(RSResult::ValueResult::None, true); +const RSResult RSResult::AllNull(RSResult::ValueResult::All, true); + +RSResult::ValueResult RSResult::logicalNot(ValueResult v) noexcept +{ + switch (v) + { + case ValueResult::Some: + return ValueResult::Some; + case ValueResult::None: + return ValueResult::All; + case ValueResult::All: + return ValueResult::None; + } +} + +RSResult::ValueResult RSResult::logicalAnd(ValueResult v0, ValueResult v1) noexcept +{ + if (v0 == ValueResult::None || v1 == ValueResult::None) + return ValueResult::None; + if (v0 == ValueResult::All && v1 == ValueResult::All) + return ValueResult::All; + return ValueResult::Some; +} + +RSResult::ValueResult RSResult::logicalOr(ValueResult v0, ValueResult v1) noexcept +{ + if (v0 == ValueResult::All || v1 == ValueResult::All) + return ValueResult::All; + else if (v0 == ValueResult::Some || v1 == ValueResult::Some) + return ValueResult::Some; + return ValueResult::None; +} +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Index/RSResult.h b/dbms/src/Storages/DeltaMerge/Index/RSResult.h index 21c1cad6911..e52ce7bbfb6 100644 --- a/dbms/src/Storages/DeltaMerge/Index/RSResult.h +++ b/dbms/src/Storages/DeltaMerge/Index/RSResult.h @@ -14,71 +14,91 @@ #pragma once -#include +#include +#include -namespace DB +#include + +namespace DB::DM { -namespace DM +class RSResult; +} +namespace fmt { -struct Attr +template <> +struct formatter; +} + +namespace DB::DM { - String col_name; - ColId col_id; - DataTypePtr type; -}; -using Attrs = std::vector; -enum class RSResult : UInt8 +class RSResult { - Unknown = 0, // Not checked yet - Some = 1, // Suspected (but may be empty or full) - None = 2, // Empty, no need to read - All = 3, // Full, need to read -}; -using RSResults = std::vector; +private: + enum class ValueResult : UInt8 + { + Some = 1, // Some values meet requirements and NOT has null, need to read and perform filtering + None = 2, // No value meets requirements and NOT has null, no need to read + All = 3, // All values meet requirements NOT has null, need to read and no need perform filtering + }; -static constexpr RSResult Unknown = RSResult::Unknown; -static constexpr RSResult Some = RSResult::Some; -static constexpr RSResult None = RSResult::None; -static constexpr RSResult All = RSResult::All; + static ValueResult logicalNot(ValueResult v) noexcept; + static ValueResult logicalAnd(ValueResult v0, ValueResult v1) noexcept; + static ValueResult logicalOr(ValueResult v0, ValueResult v1) noexcept; -inline RSResult operator!(RSResult v) -{ - if (unlikely(v == Unknown)) - throw Exception("Unexpected Unknown"); - if (v == All) - return None; - else if (v == None) - return All; - return v; -} + // Deleting or privating constructors, so that cannot create invalid objects. + // Use the static member variables below. + RSResult() = delete; + RSResult(ValueResult v_, bool has_null_) + : v(v_) + , has_null(has_null_) + {} -inline RSResult operator||(RSResult v0, RSResult v1) -{ - if (unlikely(v0 == Unknown || v1 == Unknown)) - throw Exception("Unexpected Unknown"); - if (v0 == All || v1 == All) - return All; - if (v0 == Some || v1 == Some) - return Some; - return None; -} + friend struct fmt::formatter; -inline RSResult operator&&(RSResult v0, RSResult v1) -{ - if (unlikely(v0 == Unknown || v1 == Unknown)) - throw Exception("Unexpected Unknown"); - if (v0 == None || v1 == None) - return None; - if (v0 == All && v1 == All) - return All; - return Some; -} + ValueResult v; + bool has_null; -ALWAYS_INLINE inline bool isUse(RSResult res) noexcept +public: + bool isUse() const noexcept { return v != ValueResult::None; } + + bool allMatch() const noexcept { return *this == RSResult::All; } + + void setHasNull() noexcept { has_null = true; } + + RSResult operator!() const noexcept { return RSResult(logicalNot(v), has_null); } + + RSResult operator&&(RSResult r) const noexcept { return RSResult(logicalAnd(v, r.v), has_null || r.has_null); } + + RSResult operator||(RSResult r) const noexcept + { + // Because the result of `1 || 1/0/NULL` is always 1. + if (allMatch() || r.allMatch()) + return RSResult(ValueResult::All, false); + return RSResult(logicalOr(v, r.v), has_null || r.has_null); + } + + bool operator==(RSResult r) const noexcept { return v == r.v && has_null == r.has_null; } + + static const RSResult Some; + static const RSResult None; + static const RSResult All; + static const RSResult SomeNull; + static const RSResult NoneNull; + static const RSResult AllNull; +}; + +using RSResults = std::vector; +} // namespace DB::DM + +template <> +struct fmt::formatter { - return res != RSResult::None; -} -} // namespace DM + static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } -} // namespace DB \ No newline at end of file + template + auto format(const DB::DM::RSResult r, FormatContext & ctx) const + { + return fmt::format_to(ctx.out(), "{}{}", magic_enum::enum_name(r.v), r.has_null ? "Null" : ""); + } +}; diff --git a/dbms/src/Storages/DeltaMerge/Index/RoughCheck.h b/dbms/src/Storages/DeltaMerge/Index/RoughCheck.h index abb5b884104..2818f9dfa2a 100644 --- a/dbms/src/Storages/DeltaMerge/Index/RoughCheck.h +++ b/dbms/src/Storages/DeltaMerge/Index/RoughCheck.h @@ -45,7 +45,7 @@ struct CheckEqual static RSResult check(const Field & v, const DataTypePtr & type, const T & min, const T & max) { if (!IS_LEGAL(v, min)) - return Some; + return RSResult::Some; // if (min == max && v == min) // return All; @@ -55,11 +55,11 @@ struct CheckEqual // return None; if (min == max && EQUAL(v, min)) - return All; + return RSResult::All; else if (GREATER_EQ(v, min) && LESS_EQ(v, max)) - return Some; + return RSResult::Some; else - return None; + return RSResult::None; } }; @@ -68,10 +68,10 @@ struct CheckIn template static RSResult check(const std::vector & values, const DataTypePtr & type, const T & min, const T & max) { - RSResult result = None; + RSResult result = RSResult::None; for (const auto & v : values) { - if (result == All) + if (result == RSResult::All) break; // skip null value if (v.isNull()) @@ -88,7 +88,7 @@ struct CheckGreater static RSResult check(const Field & v, const DataTypePtr & type, const T & min, const T & max) { if (!IS_LEGAL(v, min)) - return Some; + return RSResult::Some; // if (v >= max) // return None; @@ -97,11 +97,11 @@ struct CheckGreater // return Some; if (GREATER_EQ(v, max)) - return None; + return RSResult::None; else if (LESS(v, min)) - return All; + return RSResult::All; else - return Some; + return RSResult::Some; } }; @@ -111,7 +111,7 @@ struct CheckGreaterEqual static RSResult check(const Field & v, const DataTypePtr & type, T min, T max) { if (!IS_LEGAL(v, min)) - return Some; + return RSResult::Some; // if (v > max) // return None; @@ -120,11 +120,11 @@ struct CheckGreaterEqual // return Some; if (GREATER(v, max)) - return None; + return RSResult::None; else if (LESS_EQ(v, min)) - return All; + return RSResult::All; else - return Some; + return RSResult::Some; } }; diff --git a/dbms/src/Storages/DeltaMerge/ScanContext.cpp b/dbms/src/Storages/DeltaMerge/ScanContext.cpp index 5a5b126a40e..27c35f07bd5 100644 --- a/dbms/src/Storages/DeltaMerge/ScanContext.cpp +++ b/dbms/src/Storages/DeltaMerge/ScanContext.cpp @@ -110,6 +110,7 @@ String ScanContext::toJson() const json->set("rs_pack_filter_none", rs_pack_filter_none.load()); json->set("rs_pack_filter_some", rs_pack_filter_some.load()); json->set("rs_pack_filter_all", rs_pack_filter_all.load()); + json->set("rs_pack_filter_all_null", rs_pack_filter_all_null.load()); json->set("num_remote_region", total_remote_region_num.load()); json->set("num_local_region", total_local_region_num.load()); diff --git a/dbms/src/Storages/DeltaMerge/ScanContext.h b/dbms/src/Storages/DeltaMerge/ScanContext.h index fdf26eb5200..1ee21209881 100644 --- a/dbms/src/Storages/DeltaMerge/ScanContext.h +++ b/dbms/src/Storages/DeltaMerge/ScanContext.h @@ -46,6 +46,7 @@ class ScanContext std::atomic rs_pack_filter_none{0}; std::atomic rs_pack_filter_some{0}; std::atomic rs_pack_filter_all{0}; + std::atomic rs_pack_filter_all_null{0}; std::atomic total_remote_region_num{0}; std::atomic total_local_region_num{0}; @@ -101,7 +102,7 @@ class ScanContext dmfile_lm_filter_scanned_rows = tiflash_scan_context_pb.dmfile_lm_filter_scanned_rows(); dmfile_lm_filter_skipped_rows = tiflash_scan_context_pb.dmfile_lm_filter_skipped_rows(); total_rs_pack_filter_check_time_ns = tiflash_scan_context_pb.total_dmfile_rs_check_ms() * 1000000; - // TODO: rs_pack_filter_none, rs_pack_filter_some, rs_pack_filter_all + // TODO: rs_pack_filter_none, rs_pack_filter_some, rs_pack_filter_all,rs_pack_filter_all_null total_dmfile_read_time_ns = tiflash_scan_context_pb.total_dmfile_read_ms() * 1000000; create_snapshot_time_ns = tiflash_scan_context_pb.total_build_snapshot_ms() * 1000000; total_remote_region_num = tiflash_scan_context_pb.remote_regions(); @@ -190,6 +191,7 @@ class ScanContext rs_pack_filter_none += other.rs_pack_filter_none; rs_pack_filter_some += other.rs_pack_filter_some; rs_pack_filter_all += other.rs_pack_filter_all; + rs_pack_filter_all_null += other.rs_pack_filter_all_null; total_dmfile_read_time_ns += other.total_dmfile_read_time_ns; total_local_region_num += other.total_local_region_num; @@ -235,7 +237,7 @@ class ScanContext dmfile_lm_filter_scanned_rows += other.dmfile_lm_filter_scanned_rows(); dmfile_lm_filter_skipped_rows += other.dmfile_lm_filter_skipped_rows(); total_rs_pack_filter_check_time_ns += other.total_dmfile_rs_check_ms() * 1000000; - // TODO: rs_pack_filter_none, rs_pack_filter_some, rs_pack_filter_all + // TODO: rs_pack_filter_none, rs_pack_filter_some, rs_pack_filter_all, rs_pack_filter_all_null total_dmfile_read_time_ns += other.total_dmfile_read_ms() * 1000000; create_snapshot_time_ns += other.total_build_snapshot_ms() * 1000000; total_local_region_num += other.local_regions(); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 38bc9dd80f1..8c8108b8e52 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -2956,7 +2956,7 @@ std::pair, std::vector> parseDMFilePackInfo( { const auto & pack_stat = pack_stats[pack_id]; preceded_rows += pack_stat.rows; - if (!isUse(pack_res[pack_id])) + if (!pack_res[pack_id].isUse()) { continue; } diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 4fd6f3c9039..b7243bc8345 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -457,7 +457,7 @@ void StableValueSpace::calculateStableProperty( } for (size_t pack_id = 0; pack_id < pack_res.size(); ++pack_id) { - if (!isUse(pack_res[pack_id])) + if (!pack_res[pack_id].isUse()) continue; property.num_versions += pack_stats[pack_id].rows; property.num_puts += pack_stats[pack_id].rows - pack_stats[pack_id].not_clean; @@ -596,7 +596,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & const auto & pack_res = filter.getPackResConst(); for (size_t i = 0; i < pack_stats.size(); ++i) { - if (isUse(pack_res[i])) + if (pack_res[i].isUse()) { ++match_packs; total_match_rows += pack_stats[i].rows; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 324064a3c2a..da05837c390 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -33,23 +33,14 @@ #include #include -#include #include namespace DB::DM::tests { -static const ColId DEFAULT_COL_ID = 0; -static const String DEFAULT_COL_NAME = "2020-09-26"; - class MinMaxIndexTest : public ::testing::Test { -public: - MinMaxIndexTest() = default; - protected: - static void SetUpTestCase() {} - void SetUp() override { context = DMTestEnv::getContext(); @@ -66,6 +57,11 @@ class MinMaxIndexTest : public ::testing::Test ContextPtr context; }; +namespace +{ +static constexpr ColId DEFAULT_COL_ID = 0; +static const String DEFAULT_COL_NAME = "2020-09-26"; + Attr attr(String type) { return Attr{DEFAULT_COL_NAME, DEFAULT_COL_ID, DataTypeFactory::instance().get(type)}; @@ -77,7 +73,7 @@ Attr pkAttr() return Attr{col.name, col.id, col.type}; } - +// Check if the data in `block_tuples` match `filter`. bool checkMatch( const String & test_case, Context & context, @@ -151,19 +147,6 @@ bool checkMatch( return rows != 0; } -bool checkMatch( - const String & test_case, - Context & context, - const String & type, - const String & value, - const RSOperatorPtr & filter) -{ - // The first three values are pk, version and del_mark. - // For del_mark, 1 means deleted. - CSVTuples tuples = {{"0", "0", "0", value}}; - return checkMatch(test_case, context, type, tuples, filter); -} - bool checkDelMatch( const String & test_case, Context & context, @@ -241,24 +224,24 @@ Decimal64 getDecimal64(String s) return expected_default_value; } -#define Int64_Match_DATA (100) -#define Int64_Greater_DATA (10000) -#define Int64_Smaller_DATA (-1) +static constexpr Int64 Int64_Match_DATA = 100; +static constexpr Int64 Int64_Greater_DATA = 10000; +static constexpr Int64 Int64_Smaller_DATA = -1; -#define Date_Match_DATA ("2020-09-27") -#define Date_Greater_DATA ("2022-09-27") -#define Date_Smaller_DATA ("1997-09-27") +static const String Date_Match_DATA = "2020-09-27"; +static const String Date_Greater_DATA = "2022-09-27"; +static const String Date_Smaller_DATA = "1997-09-27"; -#define DateTime_Match_DATA ("2020-01-01 05:00:01") -#define DateTime_Greater_DATA ("2022-01-01 05:00:01") -#define DateTime_Smaller_DATA ("1997-01-01 05:00:01") +static const String DateTime_Match_DATA = "2020-01-01 05:00:01"; +static const String DateTime_Greater_DATA = "2022-01-01 05:00:01"; +static const String DateTime_Smaller_DATA = "1997-01-01 05:00:01"; -#define MyDateTime_Match_DATE ("2020-09-27") -#define MyDateTime_Greater_DATE ("2022-09-27") -#define MyDateTime_Smaller_DATE ("1997-09-27") +static const String MyDateTime_Match_DATE = "2020-09-27"; +static const String MyDateTime_Greater_DATE = "2022-09-27"; +static const String MyDateTime_Smaller_DATE = "1997-09-27"; -#define Decimal_Match_DATA ("100.25566") -#define Decimal_UnMatch_DATA ("100.25500") +static const String Decimal_Match_DATA = "100.25566"; +static const String Decimal_UnMatch_DATA = "100.25500"; std::pair generateTypeValue(MinMaxTestDatatype data_type, bool has_null) { @@ -278,55 +261,51 @@ std::pair generateTypeValue(MinMaxTestDatatype data_type, boo } case Test_Date: { - return {"Date", {{"0", "0", "0", DB::toString(Date_Match_DATA)}}}; + return {"Date", {{"0", "0", "0", Date_Match_DATA}}}; } case Test_Nullable_Date: { if (has_null) { - return {"Nullable(Date)", {{"0", "0", "0", DB::toString(Date_Match_DATA)}, {"1", "1", "0", "\\N"}}}; + return {"Nullable(Date)", {{"0", "0", "0", Date_Match_DATA}, {"1", "1", "0", "\\N"}}}; } - return {"Nullable(Date)", {{"0", "0", "0", DB::toString(Date_Match_DATA)}}}; + return {"Nullable(Date)", {{"0", "0", "0", Date_Match_DATA}}}; } case Test_DateTime: { - return {"DateTime", {{"0", "0", "0", DB::toString(DateTime_Match_DATA)}}}; + return {"DateTime", {{"0", "0", "0", DateTime_Match_DATA}}}; } case Test_Nullable_DateTime: { if (has_null) { - return {"Nullable(DateTime)", {{"0", "0", "0", DB::toString(DateTime_Match_DATA)}, {"1", "1", "0", "\\N"}}}; + return {"Nullable(DateTime)", {{"0", "0", "0", DateTime_Match_DATA}, {"1", "1", "0", "\\N"}}}; } - return {"Nullable(DateTime)", {{"0", "0", "0", DB::toString(DateTime_Match_DATA)}}}; + return {"Nullable(DateTime)", {{"0", "0", "0", DateTime_Match_DATA}}}; } case Test_MyDateTime: { - return {"MyDateTime", {{"0", "0", "0", DB::toString(MyDateTime_Match_DATE)}}}; + return {"MyDateTime", {{"0", "0", "0", MyDateTime_Match_DATE}}}; } case Test_Nullable_MyDateTime: { if (has_null) { - return { - "Nullable(MyDateTime)", - {{"0", "0", "0", DB::toString(MyDateTime_Match_DATE)}, {"1", "1", "0", "\\N"}}}; + return {"Nullable(MyDateTime)", {{"0", "0", "0", MyDateTime_Match_DATE}, {"1", "1", "0", "\\N"}}}; } - return {"Nullable(MyDateTime)", {{"0", "0", "0", DB::toString(MyDateTime_Match_DATE)}}}; + return {"Nullable(MyDateTime)", {{"0", "0", "0", MyDateTime_Match_DATE}}}; } case Test_Decimal64: { - return {"Decimal(20, 5)", {{"0", "0", "0", DB::toString(Decimal_Match_DATA)}}}; + return {"Decimal(20, 5)", {{"0", "0", "0", Decimal_Match_DATA}}}; } case Test_Nullable_Decimal64: { if (has_null) { - return { - "Nullable(Decimal(20, 5))", - {{"0", "0", "0", DB::toString(Decimal_Match_DATA)}, {"1", "1", "0", "\\N"}}}; + return {"Nullable(Decimal(20, 5))", {{"0", "0", "0", Decimal_Match_DATA}, {"1", "1", "0", "\\N"}}}; } - return {"Nullable(Decimal(20, 5))", {{"0", "0", "0", DB::toString(Decimal_Match_DATA)}}}; + return {"Nullable(Decimal(20, 5))", {{"0", "0", "0", Decimal_Match_DATA}}}; } default: throw Exception("Unknown data type"); @@ -341,66 +320,66 @@ RSOperatorPtr generateEqualOperator(MinMaxTestDatatype data_type, bool is_match) { if (is_match) { - return createEqual(attr("Int64"), Field(static_cast Int64_Match_DATA)); + return createEqual(attr("Int64"), Field(Int64_Match_DATA)); } else { - return createEqual(attr("Int64"), Field(static_cast Int64_Smaller_DATA)); + return createEqual(attr("Int64"), Field(Int64_Smaller_DATA)); } } case Test_Nullable_Int64: { if (is_match) { - return createEqual(attr("Nullable(Int64)"), Field(static_cast Int64_Match_DATA)); + return createEqual(attr("Nullable(Int64)"), Field(Int64_Match_DATA)); } else { - return createEqual(attr("Nullable(Int64)"), Field(static_cast Int64_Smaller_DATA)); + return createEqual(attr("Nullable(Int64)"), Field(Int64_Smaller_DATA)); } } case Test_Date: { if (is_match) { - return createEqual(attr("Date"), Field(static_cast Date_Match_DATA)); + return createEqual(attr("Date"), Field(Date_Match_DATA)); } else { - return createEqual(attr("Date"), Field(static_cast Date_Smaller_DATA)); + return createEqual(attr("Date"), Field(Date_Smaller_DATA)); } } case Test_Nullable_Date: { if (is_match) { - return createEqual(attr("Nullable(Date)"), Field(static_cast Date_Match_DATA)); + return createEqual(attr("Nullable(Date)"), Field(Date_Match_DATA)); } else { - return createEqual(attr("Nullable(Date)"), Field(static_cast Date_Smaller_DATA)); + return createEqual(attr("Nullable(Date)"), Field(Date_Smaller_DATA)); } } case Test_DateTime: { if (is_match) { - return createEqual(attr("DateTime"), Field(static_cast DateTime_Match_DATA)); + return createEqual(attr("DateTime"), Field(DateTime_Match_DATA)); } else { - return createEqual(attr("DateTime"), Field(static_cast DateTime_Smaller_DATA)); + return createEqual(attr("DateTime"), Field(DateTime_Smaller_DATA)); } } case Test_Nullable_DateTime: { if (is_match) { - return createEqual(attr("Nullable(DateTime)"), Field(static_cast DateTime_Match_DATA)); + return createEqual(attr("Nullable(DateTime)"), Field(DateTime_Match_DATA)); } else { - return createEqual(attr("Nullable(DateTime)"), Field(static_cast DateTime_Smaller_DATA)); + return createEqual(attr("Nullable(DateTime)"), Field(DateTime_Smaller_DATA)); } } case Test_MyDateTime: @@ -468,66 +447,66 @@ RSOperatorPtr generateInOperator(MinMaxTestDatatype data_type, bool is_match) { if (is_match) { - return createIn(attr("Int64"), {Field(static_cast Int64_Match_DATA)}); + return createIn(attr("Int64"), {Field(Int64_Match_DATA)}); } else { - return createIn(attr("Int64"), {Field(static_cast Int64_Smaller_DATA)}); + return createIn(attr("Int64"), {Field(Int64_Smaller_DATA)}); } } case Test_Nullable_Int64: { if (is_match) { - return createIn(attr("Nullable(Int64)"), {Field(static_cast Int64_Match_DATA)}); + return createIn(attr("Nullable(Int64)"), {Field(Int64_Match_DATA)}); } else { - return createIn(attr("Nullable(Int64)"), {Field(static_cast Int64_Smaller_DATA)}); + return createIn(attr("Nullable(Int64)"), {Field(Int64_Smaller_DATA)}); } } case Test_Date: { if (is_match) { - return createIn(attr("Date"), {Field(static_cast Date_Match_DATA)}); + return createIn(attr("Date"), {Field(Date_Match_DATA)}); } else { - return createIn(attr("Date"), {Field(static_cast Date_Smaller_DATA)}); + return createIn(attr("Date"), {Field(Date_Smaller_DATA)}); } } case Test_Nullable_Date: { if (is_match) { - return createIn(attr("Nullable(Date)"), {Field(static_cast Date_Match_DATA)}); + return createIn(attr("Nullable(Date)"), {Field(Date_Match_DATA)}); } else { - return createIn(attr("Nullable(Date)"), {Field(static_cast Date_Smaller_DATA)}); + return createIn(attr("Nullable(Date)"), {Field(Date_Smaller_DATA)}); } } case Test_DateTime: { if (is_match) { - return createIn(attr("DateTime"), {Field(static_cast DateTime_Match_DATA)}); + return createIn(attr("DateTime"), {Field(DateTime_Match_DATA)}); } else { - return createIn(attr("DateTime"), {Field(static_cast DateTime_Smaller_DATA)}); + return createIn(attr("DateTime"), {Field(DateTime_Smaller_DATA)}); } } case Test_Nullable_DateTime: { if (is_match) { - return createIn(attr("Nullable(DateTime)"), {Field(static_cast DateTime_Match_DATA)}); + return createIn(attr("Nullable(DateTime)"), {Field(DateTime_Match_DATA)}); } else { - return createIn(attr("Nullable(DateTime)"), {Field(static_cast DateTime_Smaller_DATA)}); + return createIn(attr("Nullable(DateTime)"), {Field(DateTime_Smaller_DATA)}); } } case Test_MyDateTime: @@ -595,66 +574,66 @@ RSOperatorPtr generateGreaterOperator(MinMaxTestDatatype data_type, bool is_matc { if (is_match) { - return createGreater(attr("Int64"), Field(static_cast Int64_Smaller_DATA)); + return createGreater(attr("Int64"), Field(Int64_Smaller_DATA)); } else { - return createGreater(attr("Int64"), Field(static_cast Int64_Match_DATA)); + return createGreater(attr("Int64"), Field(Int64_Match_DATA)); } } case Test_Nullable_Int64: { if (is_match) { - return createGreater(attr("Nullable(Int64)"), Field(static_cast Int64_Smaller_DATA)); + return createGreater(attr("Nullable(Int64)"), Field(Int64_Smaller_DATA)); } else { - return createGreater(attr("Nullable(Int64)"), Field(static_cast Int64_Match_DATA)); + return createGreater(attr("Nullable(Int64)"), Field(Int64_Match_DATA)); } } case Test_Date: { if (is_match) { - return createGreater(attr("Date"), Field(static_cast Date_Smaller_DATA)); + return createGreater(attr("Date"), Field(Date_Smaller_DATA)); } else { - return createGreater(attr("Date"), Field(static_cast Date_Match_DATA)); + return createGreater(attr("Date"), Field(Date_Match_DATA)); } } case Test_Nullable_Date: { if (is_match) { - return createGreater(attr("Nullable(Date)"), Field(static_cast Date_Smaller_DATA)); + return createGreater(attr("Nullable(Date)"), Field(Date_Smaller_DATA)); } else { - return createGreater(attr("Nullable(Date)"), Field(static_cast Date_Match_DATA)); + return createGreater(attr("Nullable(Date)"), Field(Date_Match_DATA)); } } case Test_DateTime: { if (is_match) { - return createGreater(attr("DateTime"), Field(static_cast DateTime_Smaller_DATA)); + return createGreater(attr("DateTime"), Field(DateTime_Smaller_DATA)); } else { - return createGreater(attr("DateTime"), Field(static_cast DateTime_Match_DATA)); + return createGreater(attr("DateTime"), Field(DateTime_Match_DATA)); } } case Test_Nullable_DateTime: { if (is_match) { - return createGreater(attr("Nullable(DateTime)"), Field(static_cast DateTime_Smaller_DATA)); + return createGreater(attr("Nullable(DateTime)"), Field(DateTime_Smaller_DATA)); } else { - return createGreater(attr("Nullable(DateTime)"), Field(static_cast DateTime_Match_DATA)); + return createGreater(attr("Nullable(DateTime)"), Field(DateTime_Match_DATA)); } } case Test_MyDateTime: @@ -722,66 +701,66 @@ RSOperatorPtr generateGreaterEqualOperator(MinMaxTestDatatype data_type, bool is { if (is_match) { - return createGreaterEqual(attr("Int64"), Field(static_cast Int64_Smaller_DATA)); + return createGreaterEqual(attr("Int64"), Field(Int64_Smaller_DATA)); } else { - return createGreaterEqual(attr("Int64"), Field(static_cast Int64_Greater_DATA)); + return createGreaterEqual(attr("Int64"), Field(Int64_Greater_DATA)); } } case Test_Nullable_Int64: { if (is_match) { - return createGreaterEqual(attr("Nullable(Int64)"), Field(static_cast Int64_Smaller_DATA)); + return createGreaterEqual(attr("Nullable(Int64)"), Field(Int64_Smaller_DATA)); } else { - return createGreaterEqual(attr("Nullable(Int64)"), Field(static_cast Int64_Greater_DATA)); + return createGreaterEqual(attr("Nullable(Int64)"), Field(Int64_Greater_DATA)); } } case Test_Date: { if (is_match) { - return createGreaterEqual(attr("Date"), Field(static_cast Date_Smaller_DATA)); + return createGreaterEqual(attr("Date"), Field(Date_Smaller_DATA)); } else { - return createGreaterEqual(attr("Date"), Field(static_cast Date_Greater_DATA)); + return createGreaterEqual(attr("Date"), Field(Date_Greater_DATA)); } } case Test_Nullable_Date: { if (is_match) { - return createGreaterEqual(attr("Nullable(Date)"), Field(static_cast Date_Smaller_DATA)); + return createGreaterEqual(attr("Nullable(Date)"), Field(Date_Smaller_DATA)); } else { - return createGreaterEqual(attr("Nullable(Date)"), Field(static_cast Date_Greater_DATA)); + return createGreaterEqual(attr("Nullable(Date)"), Field(Date_Greater_DATA)); } } case Test_DateTime: { if (is_match) { - return createGreaterEqual(attr("DateTime"), Field(static_cast DateTime_Smaller_DATA)); + return createGreaterEqual(attr("DateTime"), Field(DateTime_Smaller_DATA)); } else { - return createGreaterEqual(attr("DateTime"), Field(static_cast DateTime_Greater_DATA)); + return createGreaterEqual(attr("DateTime"), Field(DateTime_Greater_DATA)); } } case Test_Nullable_DateTime: { if (is_match) { - return createGreaterEqual(attr("Nullable(DateTime)"), Field(static_cast DateTime_Smaller_DATA)); + return createGreaterEqual(attr("Nullable(DateTime)"), Field(DateTime_Smaller_DATA)); } else { - return createGreaterEqual(attr("Nullable(DateTime)"), Field(static_cast DateTime_Greater_DATA)); + return createGreaterEqual(attr("Nullable(DateTime)"), Field(DateTime_Greater_DATA)); } } case Test_MyDateTime: @@ -849,66 +828,66 @@ RSOperatorPtr generateLessOperator(MinMaxTestDatatype data_type, bool is_match) { if (is_match) { - return createLess(attr("Int64"), Field(static_cast Int64_Greater_DATA)); + return createLess(attr("Int64"), Field(Int64_Greater_DATA)); } else { - return createLess(attr("Int64"), Field(static_cast Int64_Match_DATA)); + return createLess(attr("Int64"), Field(Int64_Match_DATA)); } } case Test_Nullable_Int64: { if (is_match) { - return createLess(attr("Nullable(Int64)"), Field(static_cast Int64_Greater_DATA)); + return createLess(attr("Nullable(Int64)"), Field(Int64_Greater_DATA)); } else { - return createLess(attr("Nullable(Int64)"), Field(static_cast Int64_Match_DATA)); + return createLess(attr("Nullable(Int64)"), Field(Int64_Match_DATA)); } } case Test_Date: { if (is_match) { - return createLess(attr("Date"), Field(static_cast Date_Greater_DATA)); + return createLess(attr("Date"), Field(Date_Greater_DATA)); } else { - return createLess(attr("Date"), Field(static_cast Date_Match_DATA)); + return createLess(attr("Date"), Field(Date_Match_DATA)); } } case Test_Nullable_Date: { if (is_match) { - return createLess(attr("Nullable(Date)"), Field(static_cast Date_Greater_DATA)); + return createLess(attr("Nullable(Date)"), Field(Date_Greater_DATA)); } else { - return createLess(attr("Nullable(Date)"), Field(static_cast Date_Match_DATA)); + return createLess(attr("Nullable(Date)"), Field(Date_Match_DATA)); } } case Test_DateTime: { if (is_match) { - return createLess(attr("DateTime"), Field(static_cast DateTime_Greater_DATA)); + return createLess(attr("DateTime"), Field(DateTime_Greater_DATA)); } else { - return createLess(attr("DateTime"), Field(static_cast DateTime_Match_DATA)); + return createLess(attr("DateTime"), Field(DateTime_Match_DATA)); } } case Test_Nullable_DateTime: { if (is_match) { - return createLess(attr("Nullable(DateTime)"), Field(static_cast DateTime_Greater_DATA)); + return createLess(attr("Nullable(DateTime)"), Field(DateTime_Greater_DATA)); } else { - return createLess(attr("Nullable(DateTime)"), Field(static_cast DateTime_Match_DATA)); + return createLess(attr("Nullable(DateTime)"), Field(DateTime_Match_DATA)); } } case Test_MyDateTime: @@ -976,66 +955,66 @@ RSOperatorPtr generateLessEqualOperator(MinMaxTestDatatype data_type, bool is_ma { if (is_match) { - return createLessEqual(attr("Int64"), Field(static_cast Int64_Greater_DATA)); + return createLessEqual(attr("Int64"), Field(Int64_Greater_DATA)); } else { - return createLessEqual(attr("Int64"), Field(static_cast Int64_Smaller_DATA)); + return createLessEqual(attr("Int64"), Field(Int64_Smaller_DATA)); } } case Test_Nullable_Int64: { if (is_match) { - return createLessEqual(attr("Nullable(Int64)"), Field(static_cast Int64_Greater_DATA)); + return createLessEqual(attr("Nullable(Int64)"), Field(Int64_Greater_DATA)); } else { - return createLessEqual(attr("Nullable(Int64)"), Field(static_cast Int64_Smaller_DATA)); + return createLessEqual(attr("Nullable(Int64)"), Field(Int64_Smaller_DATA)); } } case Test_Date: { if (is_match) { - return createLessEqual(attr("Date"), Field(static_cast Date_Greater_DATA)); + return createLessEqual(attr("Date"), Field(Date_Greater_DATA)); } else { - return createLessEqual(attr("Date"), Field(static_cast Date_Smaller_DATA)); + return createLessEqual(attr("Date"), Field(Date_Smaller_DATA)); } } case Test_Nullable_Date: { if (is_match) { - return createLessEqual(attr("Nullable(Date)"), Field(static_cast Date_Greater_DATA)); + return createLessEqual(attr("Nullable(Date)"), Field(Date_Greater_DATA)); } else { - return createLessEqual(attr("Nullable(Date)"), Field(static_cast Date_Smaller_DATA)); + return createLessEqual(attr("Nullable(Date)"), Field(Date_Smaller_DATA)); } } case Test_DateTime: { if (is_match) { - return createLessEqual(attr("DateTime"), Field(static_cast DateTime_Greater_DATA)); + return createLessEqual(attr("DateTime"), Field(DateTime_Greater_DATA)); } else { - return createLessEqual(attr("DateTime"), Field(static_cast DateTime_Smaller_DATA)); + return createLessEqual(attr("DateTime"), Field(DateTime_Smaller_DATA)); } } case Test_Nullable_DateTime: { if (is_match) { - return createLessEqual(attr("Nullable(DateTime)"), Field(static_cast DateTime_Greater_DATA)); + return createLessEqual(attr("Nullable(DateTime)"), Field(DateTime_Greater_DATA)); } else { - return createLessEqual(attr("Nullable(DateTime)"), Field(static_cast DateTime_Smaller_DATA)); + return createLessEqual(attr("Nullable(DateTime)"), Field(DateTime_Smaller_DATA)); } } case Test_MyDateTime: @@ -1180,6 +1159,7 @@ RSOperatorPtr generateRSOperator(MinMaxTestDatatype data_type, MinMaxTestOperato throw Exception("Unknown filter operator type"); } } +} // namespace TEST_F(MinMaxIndexTest, Equal) try @@ -2052,7 +2032,7 @@ try // make a euqal filter, check equal with 1 auto filter = createEqual(attr("Nullable(Int64)"), Field(static_cast(1))); - ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::Some); + ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::SomeNull); } CATCH @@ -2087,32 +2067,32 @@ try { // make a in filter, check in (NULL) auto filter = createIn(attr("Nullable(Int64)"), {Field()}); - ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::None); + ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::NoneNull); } { // make a in filter, check in (NULL, 1) auto filter = createIn(attr("Nullable(Int64)"), {Field(), Field(static_cast(1))}); - ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::Some); + ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::SomeNull); } { // make a in filter, check in (3) auto filter = createIn(attr("Nullable(Int64)"), {Field(static_cast(3))}); - ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::None); + ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::NoneNull); } { // make a not in filter, check not in (NULL) auto filter = createNot(createIn(attr("Nullable(Int64)"), {Field()})); - ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::All); + ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::AllNull); } { // make a not in filter, check not in (NULL, 1) auto filter = createNot(createIn(attr("Nullable(Int64)"), {Field(), Field(static_cast(1))})); - ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::Some); + ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::SomeNull); } { // make a not in filter, check not in (3) auto filter = createNot(createIn(attr("Nullable(Int64)"), {Field(static_cast(3))})); - ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::All); + ASSERT_EQ(filter->roughCheck(0, 1, param)[0], RSResult::AllNull); } } CATCH @@ -2264,6 +2244,24 @@ try } CATCH +namespace +{ +// Only support Int64 for testing. +template +MinMaxIndexPtr createMinMaxIndex(const IDataType & col_type, const T & cases) +{ + auto minmax_index = std::make_shared(col_type); + for (const auto & c : cases) + { + RUNTIME_CHECK(c.column_data.size(), c.del_mark.size()); + auto col_data = createColumn>(c.column_data).column; + auto del_mark_col = createColumn(c.del_mark).column; + minmax_index->addPack(*col_data, static_cast *>(del_mark_col.get())); + } + return minmax_index; +} +} // namespace + TEST_F(MinMaxIndexTest, CheckIsNull) try { @@ -2286,26 +2284,382 @@ try }; auto col_type = makeNullable(std::make_shared()); - auto minmax_index = std::make_shared(*col_type); - for (const auto & c : cases) - { - ASSERT_EQ(c.column_data.size(), c.del_mark.size()); - auto col_data = createColumn>(c.column_data).column; - auto del_mark_col = createColumn(c.del_mark).column; - minmax_index->addPack(*col_data, static_cast *>(del_mark_col.get())); - } + auto minmax_index = createMinMaxIndex(*col_type, cases); auto actual_results = minmax_index->checkIsNull(0, cases.size()); for (size_t i = 0; i < cases.size(); ++i) { const auto & c = cases[i]; - ASSERT_EQ(actual_results[i], c.result) << fmt::format( - "i={} actual={} expected={}", - i, - magic_enum::enum_name(actual_results[i]), - magic_enum::enum_name(c.result)); + ASSERT_EQ(actual_results[i], c.result) + << fmt::format("i={} actual={} expected={}", i, actual_results[i], c.result); + } +} +CATCH + +namespace +{ +struct MinMaxCheckTestData +{ + std::vector> column_data; + std::vector del_mark; +}; + +const auto min_max_check_test_data = std::array{ + MinMaxCheckTestData{ + .column_data = {1, 2, 3, 4, std::nullopt}, + .del_mark = {0, 0, 0, 0, 0}, + }, + MinMaxCheckTestData{ + .column_data = {6, 7, 8, 9, 10}, + .del_mark = {0, 0, 0, 0, 0}, + }, + MinMaxCheckTestData{ + .column_data = {std::nullopt, std::nullopt}, + .del_mark = {0, 0}, + }, + MinMaxCheckTestData{ + .column_data = {1, 2, 3, 4, std::nullopt}, + .del_mark = {0, 0, 0, 0, 1}, + }, + MinMaxCheckTestData{ + .column_data = {6, 7, 8, 9, 10}, + .del_mark = {0, 0, 0, 1, 0}, + }, + MinMaxCheckTestData{ + .column_data = {std::nullopt, std::nullopt}, + .del_mark = {1, 0}, + }, + MinMaxCheckTestData{ + .column_data = {std::nullopt, std::nullopt}, + .del_mark = {1, 1}, + }, + MinMaxCheckTestData{ + .column_data = {1, 2, 3, 4}, + .del_mark = {1, 1, 1, 1}, + }, + MinMaxCheckTestData{ + .column_data = {1, 1}, + .del_mark = {0, 0}, + }, + MinMaxCheckTestData{ + .column_data = {1, 1, std::nullopt}, + .del_mark = {0, 0, 0}, + }, +}; +} // namespace + +TEST_F(MinMaxIndexTest, CheckIn) +try +{ + struct ValuesAndResults + { + std::vector values; // select ... in (values) + std::array results; // Result of each test data + }; + + std::vector params = { + { + .values = {1, 2, 3, 4, 5, 6}, + .results = { + RSResult::SomeNull, // checkIn can return All only when min value equals to max value + RSResult::Some, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::Some, // checkIn can return All only when min value equals to max value + RSResult::Some, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::All, // checkIn can return All only when min value equals to max value + RSResult::AllNull, // checkIn can return All only when min value equals to max value + }, + }, + { + .values = {100}, + .results = { + RSResult::NoneNull, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::None, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::None, + RSResult::NoneNull, + }, + }, + { + .values = {0}, + .results = { + RSResult::NoneNull, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::None, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::None, + RSResult::NoneNull, + }, + }, + }; + + auto col_type = makeNullable(std::make_shared()); + auto minmax_index = createMinMaxIndex(*col_type, min_max_check_test_data); + for (const auto & [values, expected_results] : params) + { + auto actual_results = minmax_index->checkIn( + 0, + min_max_check_test_data.size(), + std::vector(values.cbegin(), values.cend()), + col_type); + for (size_t j = 0; j < min_max_check_test_data.size(); ++j) + { + ASSERT_EQ(actual_results[j], expected_results[j]) << fmt::format( + "<{}> column_data={}, del_mark={}, values={}, actual={} expected={}", + j, + min_max_check_test_data[j].column_data, + min_max_check_test_data[j].del_mark, + values, + actual_results[j], + expected_results[j]); + } + } +} +CATCH + +TEST_F(MinMaxIndexTest, CheckCmp_Equal) +try +{ + struct ValuesAndResults + { + Int64 value; // select ... = value + std::array results; // Result of each test data + }; + + std::vector params = { + { + .value = 1, + .results = { + RSResult::SomeNull, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::Some, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::All, + RSResult::AllNull, + }, + }, + { + .value = 5, + .results = { + RSResult::NoneNull, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::None, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::None, + RSResult::NoneNull, + }, + } + }; + + auto col_type = makeNullable(std::make_shared()); + auto minmax_index = createMinMaxIndex(*col_type, min_max_check_test_data); + for (const auto & [value, expected_results] : params) + { + auto actual_results + = minmax_index->checkCmp(0, min_max_check_test_data.size(), value, col_type); + for (size_t j = 0; j < min_max_check_test_data.size(); ++j) + { + ASSERT_EQ(actual_results[j], expected_results[j]) << fmt::format( + "<{}> column_data={}, del_mark={}, values={}, actual={} expected={}", + j, + min_max_check_test_data[j].column_data, + min_max_check_test_data[j].del_mark, + value, + actual_results[j], + expected_results[j]); + } + } +} +CATCH + +TEST_F(MinMaxIndexTest, CheckCmp_Greater) +try +{ + struct ValuesAndResults + { + Int64 value; // select ... > value + std::array results; // Result of each test data + }; + + std::vector params = { + { + .value = 0, + .results = { + RSResult::AllNull, + RSResult::All, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::All, + RSResult::All, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::All, + RSResult::AllNull, + }, + }, + { + .value = 5, + .results = { + RSResult::NoneNull, + RSResult::All, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::None, + RSResult::All, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::None, + RSResult::NoneNull, + }, + }, + { + .value = 11, + .results = { + RSResult::NoneNull, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::None, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::None, + RSResult::NoneNull, + }, + }, + }; + + auto col_type = makeNullable(std::make_shared()); + auto minmax_index = createMinMaxIndex(*col_type, min_max_check_test_data); + for (const auto & [value, expected_results] : params) + { + auto actual_results + = minmax_index->checkCmp(0, min_max_check_test_data.size(), value, col_type); + for (size_t j = 0; j < min_max_check_test_data.size(); ++j) + { + ASSERT_EQ(actual_results[j], expected_results[j]) << fmt::format( + "<{}> column_data={}, del_mark={}, values={}, actual={} expected={}", + j, + min_max_check_test_data[j].column_data, + min_max_check_test_data[j].del_mark, + value, + actual_results[j], + expected_results[j]); + } } } CATCH +TEST_F(MinMaxIndexTest, CheckCmp_GreaterEqual) +try +{ + struct ValuesAndResults + { + Int64 value; // select ... >= value + std::array results; // Result of each test data + }; + + std::vector params = { + { + .value = 1, + .results = { + RSResult::AllNull, + RSResult::All, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::All, + RSResult::All, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::All, + RSResult::AllNull, + }, + }, + { + .value = 2, + .results = { + RSResult::SomeNull, + RSResult::All, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::Some, + RSResult::All, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::None, + RSResult::NoneNull, + }, + }, + { + .value = 10, + .results = { + RSResult::NoneNull, + RSResult::Some, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::None, + RSResult::Some, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::None, + RSResult::NoneNull, + }, + }, + { + .value = 11, + .results = { + RSResult::NoneNull, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::None, + RSResult::None, + RSResult::SomeNull, // All the fields are null, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::SomeNull, // All the fields are deleted, the default value is null, meet the compatibility check + RSResult::None, + RSResult::NoneNull, + }, + }, + }; + + auto col_type = makeNullable(std::make_shared()); + auto minmax_index = createMinMaxIndex(*col_type, min_max_check_test_data); + for (const auto & [value, expected_results] : params) + { + auto actual_results + = minmax_index->checkCmp(0, min_max_check_test_data.size(), value, col_type); + for (size_t j = 0; j < min_max_check_test_data.size(); ++j) + { + ASSERT_EQ(actual_results[j], expected_results[j]) << fmt::format( + "<{}> column_data={}, del_mark={}, values={}, actual={} expected={}", + j, + min_max_check_test_data[j].column_data, + min_max_check_test_data[j].del_mark, + value, + actual_results[j], + expected_results[j]); + } + } +} +CATCH } // namespace DB::DM::tests diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_rs_result.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_rs_result.cpp new file mode 100644 index 00000000000..1af179a19ef --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_rs_result.cpp @@ -0,0 +1,120 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::DM::tests +{ + +TEST(RSResultTest, Not) +{ + ASSERT_EQ(!RSResult::Some, RSResult::Some); + ASSERT_EQ(!RSResult::None, RSResult::All); + ASSERT_EQ(!RSResult::All, RSResult::None); + ASSERT_EQ(!RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(!RSResult::NoneNull, RSResult::AllNull); + ASSERT_EQ(!RSResult::AllNull, RSResult::NoneNull); +} + +TEST(RSResultTest, And) +{ + ASSERT_EQ(RSResult::Some && RSResult::Some, RSResult::Some); + ASSERT_EQ(RSResult::Some && RSResult::None, RSResult::None); + ASSERT_EQ(RSResult::Some && RSResult::All, RSResult::Some); + ASSERT_EQ(RSResult::Some && RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::Some && RSResult::NoneNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::Some && RSResult::AllNull, RSResult::SomeNull); + + ASSERT_EQ(RSResult::None && RSResult::Some, RSResult::None); + ASSERT_EQ(RSResult::None && RSResult::None, RSResult::None); + ASSERT_EQ(RSResult::None && RSResult::All, RSResult::None); + ASSERT_EQ(RSResult::None && RSResult::SomeNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::None && RSResult::NoneNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::None && RSResult::AllNull, RSResult::NoneNull); + + ASSERT_EQ(RSResult::All && RSResult::Some, RSResult::Some); + ASSERT_EQ(RSResult::All && RSResult::None, RSResult::None); + ASSERT_EQ(RSResult::All && RSResult::All, RSResult::All); + ASSERT_EQ(RSResult::All && RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::All && RSResult::NoneNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::All && RSResult::AllNull, RSResult::AllNull); + + ASSERT_EQ(RSResult::SomeNull && RSResult::Some, RSResult::SomeNull); + ASSERT_EQ(RSResult::SomeNull && RSResult::None, RSResult::NoneNull); + ASSERT_EQ(RSResult::SomeNull && RSResult::All, RSResult::SomeNull); + ASSERT_EQ(RSResult::SomeNull && RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::SomeNull && RSResult::NoneNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::SomeNull && RSResult::AllNull, RSResult::SomeNull); + + ASSERT_EQ(RSResult::NoneNull && RSResult::Some, RSResult::NoneNull); + ASSERT_EQ(RSResult::NoneNull && RSResult::None, RSResult::NoneNull); + ASSERT_EQ(RSResult::NoneNull && RSResult::All, RSResult::NoneNull); + ASSERT_EQ(RSResult::NoneNull && RSResult::SomeNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::NoneNull && RSResult::NoneNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::NoneNull && RSResult::AllNull, RSResult::NoneNull); + + ASSERT_EQ(RSResult::AllNull && RSResult::Some, RSResult::SomeNull); + ASSERT_EQ(RSResult::AllNull && RSResult::None, RSResult::NoneNull); + ASSERT_EQ(RSResult::AllNull && RSResult::All, RSResult::AllNull); + ASSERT_EQ(RSResult::AllNull && RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::AllNull && RSResult::NoneNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::AllNull && RSResult::AllNull, RSResult::AllNull); +} + +TEST(RSResultTest, Or) +{ + ASSERT_EQ(RSResult::Some || RSResult::Some, RSResult::Some); + ASSERT_EQ(RSResult::Some || RSResult::None, RSResult::Some); + ASSERT_EQ(RSResult::Some || RSResult::All, RSResult::All); + ASSERT_EQ(RSResult::Some || RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::Some || RSResult::NoneNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::Some || RSResult::AllNull, RSResult::AllNull); + + ASSERT_EQ(RSResult::None || RSResult::Some, RSResult::Some); + ASSERT_EQ(RSResult::None || RSResult::None, RSResult::None); + ASSERT_EQ(RSResult::None || RSResult::All, RSResult::All); + ASSERT_EQ(RSResult::None || RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::None || RSResult::NoneNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::None || RSResult::AllNull, RSResult::AllNull); + + ASSERT_EQ(RSResult::All || RSResult::Some, RSResult::All); + ASSERT_EQ(RSResult::All || RSResult::None, RSResult::All); + ASSERT_EQ(RSResult::All || RSResult::All, RSResult::All); + ASSERT_EQ(RSResult::All || RSResult::SomeNull, RSResult::All); + ASSERT_EQ(RSResult::All || RSResult::NoneNull, RSResult::All); + ASSERT_EQ(RSResult::All || RSResult::AllNull, RSResult::All); + + ASSERT_EQ(RSResult::SomeNull || RSResult::Some, RSResult::SomeNull); + ASSERT_EQ(RSResult::SomeNull || RSResult::None, RSResult::SomeNull); + ASSERT_EQ(RSResult::SomeNull || RSResult::All, RSResult::All); + ASSERT_EQ(RSResult::SomeNull || RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::SomeNull || RSResult::NoneNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::SomeNull || RSResult::AllNull, RSResult::AllNull); + + ASSERT_EQ(RSResult::NoneNull || RSResult::Some, RSResult::SomeNull); + ASSERT_EQ(RSResult::NoneNull || RSResult::None, RSResult::NoneNull); + ASSERT_EQ(RSResult::NoneNull || RSResult::All, RSResult::All); + ASSERT_EQ(RSResult::NoneNull || RSResult::SomeNull, RSResult::SomeNull); + ASSERT_EQ(RSResult::NoneNull || RSResult::NoneNull, RSResult::NoneNull); + ASSERT_EQ(RSResult::NoneNull || RSResult::AllNull, RSResult::AllNull); + + ASSERT_EQ(RSResult::AllNull || RSResult::Some, RSResult::AllNull); + ASSERT_EQ(RSResult::AllNull || RSResult::None, RSResult::AllNull); + ASSERT_EQ(RSResult::AllNull || RSResult::All, RSResult::All); + ASSERT_EQ(RSResult::AllNull || RSResult::SomeNull, RSResult::AllNull); + ASSERT_EQ(RSResult::AllNull || RSResult::NoneNull, RSResult::AllNull); + ASSERT_EQ(RSResult::AllNull || RSResult::AllNull, RSResult::AllNull); +} +} // namespace DB::DM::tests