From 67aa3559830efd5cc32c75ec3b09988564d837b9 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Tue, 9 Aug 2022 01:10:38 +0800 Subject: [PATCH] optimize fixed size --- .../Functions/CollationOperatorOptimized.h | 110 ++++++++++++++-- libs/libcommon/include/common/fixed_mem_eq.h | 124 ++++++++++++++++++ 2 files changed, 221 insertions(+), 13 deletions(-) create mode 100644 libs/libcommon/include/common/fixed_mem_eq.h diff --git a/dbms/src/Functions/CollationOperatorOptimized.h b/dbms/src/Functions/CollationOperatorOptimized.h index baa60764068..dd91180ec29 100644 --- a/dbms/src/Functions/CollationOperatorOptimized.h +++ b/dbms/src/Functions/CollationOperatorOptimized.h @@ -20,11 +20,11 @@ #include #include #include +#include #include #include - namespace DB { @@ -97,6 +97,23 @@ __attribute__((flatten, always_inline)) inline void LoopOneColumn( } } +template +__attribute__((flatten, always_inline)) inline void LoopOneColumnCmpEqFixedStr( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const char * src, + Result & c) +{ + LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&](std::string_view view, size_t i) { + if constexpr (trim) + view = RightTrim(view); + auto res = 1; + if (view.size() == n) + res = mem_utils::memcmp_eq_fixed_size(view.data(), src) ? 0 : 1; + c[i] = Op::apply(res, 0); + }); +} + // Handle str-column compare str-column. // - Optimize UTF8_BIN and UTF8MB4_BIN // - Check if columns do NOT contain tail space @@ -175,8 +192,6 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant( const TiDB::TiDBCollatorPtr & collator, Result & c) { - bool use_optimized_path = false; - switch (collator->getCollatorType()) { case TiDB::ITiDBCollator::CollatorType::UTF8MB4_BIN: @@ -184,11 +199,46 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant( case TiDB::ITiDBCollator::CollatorType::LATIN1_BIN: case TiDB::ITiDBCollator::CollatorType::ASCII_BIN: { - size_t size = a_offsets.size(); - std::string_view tar_str_view = RightTrim(b); // right trim const-str first - LoopOneColumn(a_data, a_offsets, size, [&c, &tar_str_view](const std::string_view & view, size_t i) { + if constexpr (IsEqualRelated::value) + { +#ifdef M + static_assert(false, "`M` is defined"); +#endif +#define M(k) \ + case k: \ + { \ + LoopOneColumnCmpEqFixedStr(a_data, a_offsets, tar_str_view.data(), c); \ + return true; \ + } + + switch (tar_str_view.size()) + { + M(0); + M(1); + M(2); + M(3); + M(4); + M(5); + M(6); + M(7); + M(8); + M(9); + M(10); + M(11); + M(12); + M(13); + M(14); + M(15); + M(16); + default: + break; + } +#undef M + } + + LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&c, &tar_str_view](const std::string_view & view, size_t i) { if constexpr (IsEqualRelated::value) { c[i] = Op::apply(RawStrEqualCompare(RightTrim(view), tar_str_view), 0); @@ -199,13 +249,48 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant( } }); - use_optimized_path = true; - break; + return true; } case TiDB::ITiDBCollator::CollatorType::BINARY: { - size_t size = a_offsets.size(); - LoopOneColumn(a_data, a_offsets, size, [&c, &b](const std::string_view & view, size_t i) { + if constexpr (IsEqualRelated::value) + { +#ifdef M + static_assert(false, "`M` is defined"); +#endif +#define M(k) \ + case k: \ + { \ + LoopOneColumnCmpEqFixedStr(a_data, a_offsets, b.data(), c); \ + return true; \ + } + + switch (b.size()) + { + M(0); + M(1); + M(2); + M(3); + M(4); + M(5); + M(6); + M(7); + M(8); + M(9); + M(10); + M(11); + M(12); + M(13); + M(14); + M(15); + M(16); + default: + break; + } +#undef M + } + + LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&c, &b](const std::string_view & view, size_t i) { if constexpr (IsEqualRelated::value) { c[i] = Op::apply(RawStrEqualCompare((view), b), 0); @@ -216,13 +301,12 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant( } }); - use_optimized_path = true; - break; + return true; } default: break; } - return use_optimized_path; + return false; } } // namespace DB diff --git a/libs/libcommon/include/common/fixed_mem_eq.h b/libs/libcommon/include/common/fixed_mem_eq.h new file mode 100644 index 00000000000..489a27572f4 --- /dev/null +++ b/libs/libcommon/include/common/fixed_mem_eq.h @@ -0,0 +1,124 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include + +namespace mem_utils +{ + +#define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure)) + +FLATTEN_INLINE_PURE +inline bool memcmp_eq1(const char * a, const char * b) +{ + return a[0] == b[0]; +} + +FLATTEN_INLINE_PURE +inline bool memcmp_eq2(const char * a, const char * b) +{ + const uint16_t ax = *reinterpret_cast(a); + const uint16_t bx = *reinterpret_cast(b); + return ax == bx; +} + +FLATTEN_INLINE_PURE +inline bool memcmp_eq3(const char * a, const char * b) +{ + return memcmp_eq2(a, b) & memcmp_eq2(a + 1, b + 1); +} + +FLATTEN_INLINE_PURE +inline bool memcmp_eq4(const char * a, const char * b) +{ + const uint32_t ax = *reinterpret_cast(a); + const uint32_t bx = *reinterpret_cast(b); + return ax == bx; +} + +FLATTEN_INLINE_PURE +inline bool memcmp_eq8(const char * a, const char * b) +{ + const uint64_t ax = *reinterpret_cast(a); + const uint64_t bx = *reinterpret_cast(b); + return ax == bx; +} + +template +ALWAYS_INLINE inline bool memcmp_eq_fixed_size(const char * a, const char * b) +{ +#ifdef M + static_assert(false, "`M` is defined"); +#else +#define M(s) \ + else if constexpr (k == (s)) \ + { \ + return memcmp_eq##s(a, b); \ + } +#endif + + static_assert(k >= 0); + static_assert(k <= 32); + + if constexpr (k > 16) + { + return memcmp_eq_fixed_size<16>(a, b) & memcmp_eq_fixed_size(a + 16, b + 16); + } + else if constexpr (k > 8) + { + return memcmp_eq8(a, b) & memcmp_eq8(a + k - 8, b + k - 8); + } + else if constexpr (k > 4) + { + if constexpr (k == 8) + return memcmp_eq8(a, b); + else + return memcmp_eq_fixed_size<4>(a, b) & memcmp_eq_fixed_size<4>(a + k - 4, b + k - 4); + } + M(1) + M(2) + M(3) + M(4) + else if constexpr (k == 0) + { + return true; + } +#undef M +} + +/* +- with cxx flag `-mavx2` + - memcmp_eq_fixed_size<32>: + vmovdqu ymm0, ymmword ptr [rdi] + vpcmpeqq ymm0, ymm0, ymmword ptr [rsi] + vmovmskpd eax, ymm0 + cmp al, 15 + sete al + vzeroupper + ret + - memcmp_eq_fixed_size<16>: + vmovdqu xmm0, xmmword ptr [rdi] + vpcmpeqq xmm0, xmm0, xmmword ptr [rsi] + vmovmskpd eax, xmm0 + cmp al, 3 + sete al + ret +*/ + +} // namespace mem_utils