Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize equality comparison for small str with fixed size #5569

Merged
merged 12 commits into from
Aug 16, 2022
114 changes: 99 additions & 15 deletions dbms/src/Functions/CollationOperatorOptimized.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
#include <Storages/Transaction/CollatorUtils.h>
#include <common/StringRef.h>
#include <common/defines.h>
#include <common/fixed_mem_eq.h>

#include <cstddef>
#include <string_view>


namespace DB
{

Expand All @@ -50,7 +50,7 @@ struct IsEqualRelated<DB::NotEqualsOp<A...>>
// Loop columns and invoke callback for each pair.
// Remove last zero byte.
template <typename F>
__attribute__((flatten, always_inline)) inline void LoopTwoColumns(
FLATTEN_INLINE inline void LoopTwoColumns(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
const ColumnString::Chars_t & b_data,
Expand Down Expand Up @@ -79,7 +79,7 @@ __attribute__((flatten, always_inline)) inline void LoopTwoColumns(
// Loop one column and invoke callback for each pair.
// Remove last zero byte.
template <typename F>
__attribute__((flatten, always_inline)) inline void LoopOneColumn(
FLATTEN_INLINE inline void LoopOneColumn(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
size_t size,
Expand All @@ -97,6 +97,23 @@ __attribute__((flatten, always_inline)) inline void LoopOneColumn(
}
}

template <size_t n, typename Op, bool trim, typename Result>
FLATTEN_INLINE inline void LoopOneColumnCmpEqFixedStr(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
const char * src,
Result & c)
{
LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&](std::string_view view, size_t i) {
if constexpr (trim)
view = RightTrim(view);
auto res = 1;
if (view.size() == n)
res = mem_utils::memcmp_eq_fixed_size<n>(view.data(), src) ? 0 : 1;
c[i] = Op::apply(res, 0);
});
}

// Handle str-column compare str-column.
// - Optimize bin collator
// - Check if columns do NOT contain tail space
Expand Down Expand Up @@ -175,20 +192,53 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
const TiDB::TiDBCollatorPtr & collator,
Result & c)
{
bool use_optimized_path = false;

switch (collator->getCollatorType())
{
case TiDB::ITiDBCollator::CollatorType::UTF8MB4_BIN:
case TiDB::ITiDBCollator::CollatorType::UTF8_BIN:
case TiDB::ITiDBCollator::CollatorType::LATIN1_BIN:
case TiDB::ITiDBCollator::CollatorType::ASCII_BIN:
{
size_t size = a_offsets.size();

std::string_view tar_str_view = RightTrim(b); // right trim const-str first

LoopOneColumn(a_data, a_offsets, size, [&c, &tar_str_view](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
#ifdef M
static_assert(false, "`M` is defined");
#endif
#define M(k) \
case k: \
{ \
LoopOneColumnCmpEqFixedStr<k, Op, true>(a_data, a_offsets, tar_str_view.data(), c); \
return true; \
}

switch (tar_str_view.size())
{
M(0);
M(1);
M(2);
M(3);
M(4);
M(5);
M(6);
M(7);
M(8);
M(9);
M(10);
M(11);
M(12);
M(13);
M(14);
M(15);
M(16);
default:
break;
}
#undef M
}

LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&c, &tar_str_view](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
c[i] = Op::apply(RawStrEqualCompare(RightTrim(view), tar_str_view), 0);
Expand All @@ -199,13 +249,48 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
}
});

use_optimized_path = true;
break;
return true;
}
case TiDB::ITiDBCollator::CollatorType::BINARY:
{
size_t size = a_offsets.size();
LoopOneColumn(a_data, a_offsets, size, [&c, &b](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
#ifdef M
static_assert(false, "`M` is defined");
#endif
#define M(k) \
case k: \
{ \
LoopOneColumnCmpEqFixedStr<k, Op, false>(a_data, a_offsets, b.data(), c); \
return true; \
}

switch (b.size())
{
M(0);
M(1);
M(2);
M(3);
M(4);
M(5);
M(6);
M(7);
M(8);
M(9);
M(10);
M(11);
M(12);
M(13);
M(14);
M(15);
M(16);
default:
break;
}
#undef M
}

LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&c, &b](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
c[i] = Op::apply(RawStrEqualCompare((view), b), 0);
Expand All @@ -216,13 +301,12 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
}
});

use_optimized_path = true;
break;
return true;
}
default:
break;
}
return use_optimized_path;
return false;
}

} // namespace DB
2 changes: 0 additions & 2 deletions dbms/src/Storages/Transaction/CollatorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

#include <memory>

#define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure))

namespace DB
{

Expand Down
4 changes: 4 additions & 0 deletions libs/libcommon/include/common/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@
# define ALWAYS_INLINE __forceinline
# define NO_INLINE static __declspec(noinline)
# define MAY_ALIAS
# define FLATTEN_INLINE_PURE
# define FLATTEN_INLINE
#else
# define ALWAYS_INLINE __attribute__((__always_inline__))
# define NO_INLINE __attribute__((__noinline__))
# define MAY_ALIAS __attribute__((__may_alias__))
# define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure))
# define FLATTEN_INLINE __attribute__((flatten, always_inline))
#endif

#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__)
Expand Down
125 changes: 125 additions & 0 deletions libs/libcommon/include/common/fixed_mem_eq.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <common/defines.h>

#include <cstddef>
#include <cstdint>

namespace mem_utils
{

FLATTEN_INLINE_PURE
constexpr inline bool memcmp_eq0(const char *, const char *)
{
return true;
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq1(const char * a, const char * b)
{
return a[0] == b[0];
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq2(const char * a, const char * b)
{
const uint16_t ax = *reinterpret_cast<const uint16_t *>(a);
const uint16_t bx = *reinterpret_cast<const uint16_t *>(b);
return ax == bx;
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq3(const char * a, const char * b)
{
return memcmp_eq2(a, b) & memcmp_eq2(a + 1, b + 1);
windtalker marked this conversation as resolved.
Show resolved Hide resolved
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq4(const char * a, const char * b)
{
const uint32_t ax = *reinterpret_cast<const uint32_t *>(a);
const uint32_t bx = *reinterpret_cast<const uint32_t *>(b);
return ax == bx;
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq8(const char * a, const char * b)
{
const uint64_t ax = *reinterpret_cast<const uint64_t *>(a);
const uint64_t bx = *reinterpret_cast<const uint64_t *>(b);
return ax == bx;
}

template <size_t k>
ALWAYS_INLINE inline bool memcmp_eq_fixed_size(const char * a, const char * b)
{
#ifdef M
static_assert(false, "`M` is defined");
#else
#define M(s) \
else if constexpr (k == (s)) \
{ \
return memcmp_eq##s(a, b); \
}
#endif

static_assert(k >= 0);
static_assert(k <= 32);

if constexpr (k > 16)
{
return memcmp_eq_fixed_size<16>(a, b) & memcmp_eq_fixed_size<k - 16>(a + 16, b + 16);
solotzg marked this conversation as resolved.
Show resolved Hide resolved
}
else if constexpr (k > 8)
{
return memcmp_eq8(a, b) & memcmp_eq8(a + k - 8, b + k - 8);
}
else if constexpr (k > 4)
{
if constexpr (k == 8)
return memcmp_eq8(a, b);
else
return memcmp_eq_fixed_size<4>(a, b) & memcmp_eq_fixed_size<4>(a + k - 4, b + k - 4);
}
M(1)
M(2)
M(3)
M(4)
M(0)
#undef M
}

/*
- with cxx flag `-mavx2`
- memcmp_eq_fixed_size<32>:
vmovdqu ymm0, ymmword ptr [rdi]
vpcmpeqq ymm0, ymm0, ymmword ptr [rsi]
vmovmskpd eax, ymm0
cmp al, 15
sete al
vzeroupper
ret
- memcmp_eq_fixed_size<16>:
vmovdqu xmm0, xmmword ptr [rdi]
vpcmpeqq xmm0, xmm0, xmmword ptr [rsi]
vmovmskpd eax, xmm0
cmp al, 3
sete al
ret
*/

} // namespace mem_utils