diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index da071507a72..27283c0f24a 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -233,8 +233,7 @@ class ColumnConst final : public COWPtrHelper template T getValue() const { - auto && tmp = getField(); - return std::move(tmp.safeGet::Type>()); + return getField().safeGet::Type>(); } }; diff --git a/dbms/src/Functions/CollationOperatorOptimized.h b/dbms/src/Functions/CollationOperatorOptimized.h deleted file mode 100644 index 395ecc5b9eb..00000000000 --- a/dbms/src/Functions/CollationOperatorOptimized.h +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -template -ALWAYS_INLINE inline int signum(T val) -{ - return (0 < val) - (val < 0); -} - -// Check equality is much faster than other comparison. -// - check size first -// - return 0 if equal else 1 -__attribute__((flatten, always_inline, pure)) inline uint8_t RawStrEqualCompare(const std::string_view & lhs, const std::string_view & rhs) -{ - return StringRef(lhs) == StringRef(rhs) ? 0 : 1; -} - -// Compare str view by memcmp -__attribute__((flatten, always_inline, pure)) inline int RawStrCompare(const std::string_view & v1, const std::string_view & v2) -{ - return signum(v1.compare(v2)); -} - -constexpr char SPACE = ' '; - -// Remove tail space -__attribute__((flatten, always_inline, pure)) inline std::string_view RightTrim(const std::string_view & v) -{ - if (likely(v.empty() || v.back() != SPACE)) - return v; - size_t end = v.find_last_not_of(SPACE); - return end == std::string_view::npos ? std::string_view{} : std::string_view(v.data(), end + 1); -} - -__attribute__((flatten, always_inline, pure)) inline int RtrimStrCompare(const std::string_view & va, const std::string_view & vb) -{ - return RawStrCompare(RightTrim(va), RightTrim(vb)); -} - -// If true, only need to check equal or not. -template -struct IsEqualRelated -{ - static constexpr const bool value = false; -}; - -// For `EqualsOp` and `NotEqualsOp`, value is true. -template -struct IsEqualRelated> -{ - static constexpr const bool value = true; -}; -template -struct IsEqualRelated> -{ - static constexpr const bool value = true; -}; - -// Loop columns and invoke callback for each pair. -template -__attribute__((flatten, always_inline)) inline void LoopTwoColumns( - const ColumnString::Chars_t & a_data, - const ColumnString::Offsets & a_offsets, - const ColumnString::Chars_t & b_data, - const ColumnString::Offsets & b_offsets, - size_t size, - F && func) -{ - for (size_t i = 0; i < size; ++i) - { - size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; - size_t b_size = StringUtil::sizeAt(b_offsets, i) - 1; - const auto * a_ptr = reinterpret_cast(&a_data[StringUtil::offsetAt(a_offsets, i)]); - const auto * b_ptr = reinterpret_cast(&b_data[StringUtil::offsetAt(b_offsets, i)]); - - func({a_ptr, a_size}, {b_ptr, b_size}, i); - } -} - -// Loop one column and invoke callback for each pair. -template -__attribute__((flatten, always_inline)) inline void LoopOneColumn( - const ColumnString::Chars_t & a_data, - const ColumnString::Offsets & a_offsets, - size_t size, - F && func) -{ - for (size_t i = 0; i < size; ++i) - { - size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; - const auto * a_ptr = reinterpret_cast(&a_data[StringUtil::offsetAt(a_offsets, i)]); - - func({a_ptr, a_size}, i); - } -} - -// Handle str-column compare str-column. -// - Optimize UTF8_BIN and UTF8MB4_BIN -// - Check if columns do NOT contain tail space -// - If Op is `EqualsOp` or `NotEqualsOp`, optimize comparison by faster way -template -ALWAYS_INLINE inline bool StringVectorStringVector( - const ColumnString::Chars_t & a_data, - const ColumnString::Offsets & a_offsets, - const ColumnString::Chars_t & b_data, - const ColumnString::Offsets & b_offsets, - const TiDB::TiDBCollatorPtr & collator, - Result & c) -{ - bool use_optimized_path = false; - - switch (collator->getCollatorId()) - { - case TiDB::ITiDBCollator::UTF8MB4_BIN: - case TiDB::ITiDBCollator::UTF8_BIN: - { - size_t size = a_offsets.size(); - - LoopTwoColumns(a_data, a_offsets, b_data, b_offsets, size, [&c](const std::string_view & va, const std::string_view & vb, size_t i) { - if constexpr (IsEqualRelated::value) - { - c[i] = Op::apply(RawStrEqualCompare(RightTrim(va), RightTrim(vb)), 0); - } - else - { - c[i] = Op::apply(RtrimStrCompare(va, vb), 0); - } - }); - - use_optimized_path = true; - - break; - } - default: - break; - } - return use_optimized_path; -} - -// Handle str-column compare const-str. -// - Optimize UTF8_BIN and UTF8MB4_BIN -// - Right trim const-str first -// - Check if column does NOT contain tail space -// - If Op is `EqualsOp` or `NotEqualsOp`, optimize comparison by faster way -template -ALWAYS_INLINE inline bool StringVectorConstant( - const ColumnString::Chars_t & a_data, - const ColumnString::Offsets & a_offsets, - const std::string_view & b, - const TiDB::TiDBCollatorPtr & collator, - Result & c) -{ - bool use_optimized_path = false; - - switch (collator->getCollatorId()) - { - case TiDB::ITiDBCollator::UTF8MB4_BIN: - case TiDB::ITiDBCollator::UTF8_BIN: - { - size_t size = a_offsets.size(); - - std::string_view tar_str_view = RightTrim(b); // right trim const-str first - - LoopOneColumn(a_data, a_offsets, size, [&c, &tar_str_view](const std::string_view & view, size_t i) { - if constexpr (IsEqualRelated::value) - { - c[i] = Op::apply(RawStrEqualCompare(RightTrim(view), tar_str_view), 0); - } - else - { - c[i] = Op::apply(RawStrCompare(RightTrim(view), tar_str_view), 0); - } - }); - - use_optimized_path = true; - break; - } - default: - break; - } - return use_optimized_path; -} - -} // namespace DB diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 8f7502fba85..1c63a286452 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -302,12 +301,6 @@ struct StringComparisonWithCollatorImpl const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & c) { - bool optimized_path = StringVectorStringVector(a_data, a_offsets, b_data, b_offsets, collator, c); - if (optimized_path) - { - return; - } - size_t size = a_offsets.size(); for (size_t i = 0; i < size; ++i) @@ -324,17 +317,10 @@ struct StringComparisonWithCollatorImpl static void NO_INLINE stringVectorConstant( const ColumnString::Chars_t & a_data, const ColumnString::Offsets & a_offsets, - const std::string_view & b, + const std::string & b, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & c) { - bool optimized_path = StringVectorConstant(a_data, a_offsets, b, collator, c); - - if (optimized_path) - { - return; - } - size_t size = a_offsets.size(); ColumnString::Offset b_size = b.size(); const char * b_data = reinterpret_cast(b.data()); @@ -346,7 +332,7 @@ struct StringComparisonWithCollatorImpl } static void constantStringVector( - const std::string_view & a, + const std::string & a, const ColumnString::Chars_t & b_data, const ColumnString::Offsets & b_offsets, const TiDB::TiDBCollatorPtr & collator, @@ -356,8 +342,8 @@ struct StringComparisonWithCollatorImpl } static void constantConstant( - const std::string_view & a, - const std::string_view & b, + const std::string & a, + const std::string & b, const TiDB::TiDBCollatorPtr & collator, ResultType & c) { @@ -720,25 +706,6 @@ class FunctionComparison : public IFunction } } - static inline std::string_view genConstStrRef(const ColumnConst * c0_const) - { - std::string_view c0_const_str_ref{}; - if (c0_const) - { - if (const auto * c0_const_string = checkAndGetColumn(&c0_const->getDataColumn()); c0_const_string) - { - c0_const_str_ref = std::string_view(c0_const_string->getDataAt(0)); - } - else if (const auto * c0_const_fixed_string = checkAndGetColumn(&c0_const->getDataColumn()); c0_const_fixed_string) - { - c0_const_str_ref = std::string_view(c0_const_fixed_string->getDataAt(0)); - } - else - throw Exception("Logical error: ColumnConst contains not String nor FixedString column", ErrorCodes::ILLEGAL_COLUMN); - } - return c0_const_str_ref; - } - template bool executeStringWithCollator( Block & block, @@ -753,13 +720,10 @@ class FunctionComparison : public IFunction using ResultType = typename ResultColumnType::value_type; using StringImpl = StringComparisonWithCollatorImpl, ResultType>; - std::string_view c0_const_str_ref = genConstStrRef(c0_const); - std::string_view c1_const_str_ref = genConstStrRef(c1_const); - if (c0_const && c1_const) { ResultType res = 0; - StringImpl::constantConstant(c0_const_str_ref, c1_const_str_ref, collator, res); + StringImpl::constantConstant(c0_const->getValue(), c1_const->getValue(), collator, res); block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(c0_const->size(), toField(res)); return true; } @@ -781,12 +745,12 @@ class FunctionComparison : public IFunction StringImpl::stringVectorConstant( c0_string->getChars(), c0_string->getOffsets(), - c1_const_str_ref, + c1_const->getValue(), collator, c_res->getData()); else if (c0_const && c1_string) StringImpl::constantStringVector( - c0_const_str_ref, + c0_const->getValue(), c1_string->getChars(), c1_string->getOffsets(), collator, @@ -806,8 +770,8 @@ class FunctionComparison : public IFunction template bool executeString(Block & block, size_t result, const IColumn * c0, const IColumn * c1) const { - const auto * c0_string = checkAndGetColumn(c0); - const auto * c1_string = checkAndGetColumn(c1); + const ColumnString * c0_string = checkAndGetColumn(c0); + const ColumnString * c1_string = checkAndGetColumn(c1); const ColumnConst * c0_const = checkAndGetColumnConstStringOrFixedString(c0); const ColumnConst * c1_const = checkAndGetColumnConstStringOrFixedString(c1); diff --git a/dbms/src/Storages/Transaction/Collator.cpp b/dbms/src/Storages/Transaction/Collator.cpp index 1b0221a6829..a9b4d0784be 100644 --- a/dbms/src/Storages/Transaction/Collator.cpp +++ b/dbms/src/Storages/Transaction/Collator.cpp @@ -13,7 +13,6 @@ // limitations under the License. #include -#include #include #include @@ -30,10 +29,17 @@ TiDBCollators dummy_collators; std::vector dummy_sort_key_contaners; std::string dummy_sort_key_contaner; -ALWAYS_INLINE std::string_view rtrim(const char * s, size_t length) +std::string_view rtrim(const char * s, size_t length) { auto v = std::string_view(s, length); - return DB::RightTrim(v); + size_t end = v.find_last_not_of(' '); + return end == std::string_view::npos ? "" : v.substr(0, end + 1); +} + +template +int signum(T val) +{ + return (0 < val) - (val < 0); } using Rune = int32_t; @@ -177,26 +183,26 @@ class Pattern : public ITiDBCollator::IPattern }; template -class BinCollator final : public ITiDBCollator +class BinCollator : public ITiDBCollator { public: explicit BinCollator(int32_t id) : ITiDBCollator(id) {} - int compare(const char * s1, size_t length1, const char * s2, size_t length2) const override { if constexpr (padding) - return DB::RtrimStrCompare({s1, length1}, {s2, length2}); + return signum(rtrim(s1, length1).compare(rtrim(s2, length2))); else - return DB::RawStrCompare({s1, length1}, {s2, length2}); + return signum(std::string_view(s1, length1).compare(std::string_view(s2, length2))); } StringRef sortKey(const char * s, size_t length, std::string &) const override { if constexpr (padding) { - return StringRef(rtrim(s, length)); + auto v = rtrim(s, length); + return StringRef(v.data(), v.length()); } else { @@ -243,7 +249,7 @@ using WeightType = uint16_t; extern const std::array weight_lut; } // namespace GeneralCI -class GeneralCICollator final : public ITiDBCollator +class GeneralCICollator : public ITiDBCollator { public: explicit GeneralCICollator(int32_t id) @@ -264,7 +270,7 @@ class GeneralCICollator final : public ITiDBCollator auto sk2 = weight(c2); auto cmp = sk1 - sk2; if (cmp != 0) - return DB::signum(cmp); + return signum(cmp); } return (offset1 < v1.length()) - (offset2 < v2.length()); @@ -359,7 +365,7 @@ const std::array weight_lut_long = { } // namespace UnicodeCI -class UnicodeCICollator final : public ITiDBCollator +class UnicodeCICollator : public ITiDBCollator { public: explicit UnicodeCICollator(int32_t id) @@ -414,7 +420,7 @@ class UnicodeCICollator final : public ITiDBCollator } else { - return DB::signum(static_cast(s1_first & 0xFFFF) - static_cast(s2_first & 0xFFFF)); + return signum(static_cast(s1_first & 0xFFFF) - static_cast(s2_first & 0xFFFF)); } } } @@ -587,8 +593,6 @@ class UnicodeCICollator final : public ITiDBCollator friend class Pattern; }; -using UTF8MB4_BIN_TYPE = BinCollator; - TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id) { switch (id) @@ -603,10 +607,10 @@ TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id) static const auto latin1_collator = BinCollator(LATIN1_BIN); return &latin1_collator; case ITiDBCollator::UTF8MB4_BIN: - static const auto utf8mb4_collator = UTF8MB4_BIN_TYPE(UTF8MB4_BIN); + static const auto utf8mb4_collator = BinCollator(UTF8MB4_BIN); return &utf8mb4_collator; case ITiDBCollator::UTF8_BIN: - static const auto utf8_collator = UTF8MB4_BIN_TYPE(UTF8_BIN); + static const auto utf8_collator = BinCollator(UTF8_BIN); return &utf8_collator; case ITiDBCollator::UTF8_GENERAL_CI: static const auto utf8_general_ci_collator = GeneralCICollator(UTF8_GENERAL_CI); diff --git a/tests/tidb-ci/new_collation_fullstack/expr.test b/tests/tidb-ci/new_collation_fullstack/expr.test index 1e2135c4f2d..15ada0f335c 100644 --- a/tests/tidb-ci/new_collation_fullstack/expr.test +++ b/tests/tidb-ci/new_collation_fullstack/expr.test @@ -35,13 +35,6 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_s | 2 | abc | +------+-------+ -mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 = 'abc '; -+------+-------+ -| id | value1| -+------+-------+ -| 1 | abc | -| 2 | abc | -+------+-------+ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value from test.t where value like 'aB%'; +------+-------+ @@ -69,13 +62,6 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_s | 3 | def | +------+-------+ -mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 = 'def '; -+------+-------+ -| id | value1| -+------+-------+ -| 3 | def | -+------+-------+ - mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 in ('Abc','def'); +------+-------+ | id | value1|