Skip to content

Commit

Permalink
Optimize equality comparison for small str with fixed size
Browse files Browse the repository at this point in the history
  • Loading branch information
solotzg committed Aug 10, 2022
1 parent 89962b8 commit f13830c
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 17 deletions.
114 changes: 99 additions & 15 deletions dbms/src/Functions/CollationOperatorOptimized.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
#include <Storages/Transaction/CollatorUtils.h>
#include <common/StringRef.h>
#include <common/defines.h>
#include <common/fixed_mem_eq.h>

#include <cstddef>
#include <string_view>


namespace DB
{

Expand All @@ -50,7 +50,7 @@ struct IsEqualRelated<DB::NotEqualsOp<A...>>
// Loop columns and invoke callback for each pair.
// Remove last zero byte.
template <typename F>
__attribute__((flatten, always_inline)) inline void LoopTwoColumns(
FLATTEN_INLINE inline void LoopTwoColumns(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
const ColumnString::Chars_t & b_data,
Expand Down Expand Up @@ -79,7 +79,7 @@ __attribute__((flatten, always_inline)) inline void LoopTwoColumns(
// Loop one column and invoke callback for each pair.
// Remove last zero byte.
template <typename F>
__attribute__((flatten, always_inline)) inline void LoopOneColumn(
FLATTEN_INLINE inline void LoopOneColumn(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
size_t size,
Expand All @@ -97,6 +97,23 @@ __attribute__((flatten, always_inline)) inline void LoopOneColumn(
}
}

template <size_t n, typename Op, bool trim, typename Result>
FLATTEN_INLINE inline void LoopOneColumnCmpEqFixedStr(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
const char * src,
Result & c)
{
LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&](std::string_view view, size_t i) {
if constexpr (trim)
view = RightTrim(view);
auto res = 1;
if (view.size() == n)
res = mem_utils::memcmp_eq_fixed_size<n>(view.data(), src) ? 0 : 1;
c[i] = Op::apply(res, 0);
});
}

// Handle str-column compare str-column.
// - Optimize UTF8_BIN and UTF8MB4_BIN
// - Check if columns do NOT contain tail space
Expand Down Expand Up @@ -175,20 +192,53 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
const TiDB::TiDBCollatorPtr & collator,
Result & c)
{
bool use_optimized_path = false;

switch (collator->getCollatorType())
{
case TiDB::ITiDBCollator::CollatorType::UTF8MB4_BIN:
case TiDB::ITiDBCollator::CollatorType::UTF8_BIN:
case TiDB::ITiDBCollator::CollatorType::LATIN1_BIN:
case TiDB::ITiDBCollator::CollatorType::ASCII_BIN:
{
size_t size = a_offsets.size();

std::string_view tar_str_view = RightTrim(b); // right trim const-str first

LoopOneColumn(a_data, a_offsets, size, [&c, &tar_str_view](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
#ifdef M
static_assert(false, "`M` is defined");
#endif
#define M(k) \
case k: \
{ \
LoopOneColumnCmpEqFixedStr<k, Op, true>(a_data, a_offsets, tar_str_view.data(), c); \
return true; \
}

switch (tar_str_view.size())
{
M(0);
M(1);
M(2);
M(3);
M(4);
M(5);
M(6);
M(7);
M(8);
M(9);
M(10);
M(11);
M(12);
M(13);
M(14);
M(15);
M(16);
default:
break;
}
#undef M
}

LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&c, &tar_str_view](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
c[i] = Op::apply(RawStrEqualCompare(RightTrim(view), tar_str_view), 0);
Expand All @@ -199,13 +249,48 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
}
});

use_optimized_path = true;
break;
return true;
}
case TiDB::ITiDBCollator::CollatorType::BINARY:
{
size_t size = a_offsets.size();
LoopOneColumn(a_data, a_offsets, size, [&c, &b](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
#ifdef M
static_assert(false, "`M` is defined");
#endif
#define M(k) \
case k: \
{ \
LoopOneColumnCmpEqFixedStr<k, Op, false>(a_data, a_offsets, b.data(), c); \
return true; \
}

switch (b.size())
{
M(0);
M(1);
M(2);
M(3);
M(4);
M(5);
M(6);
M(7);
M(8);
M(9);
M(10);
M(11);
M(12);
M(13);
M(14);
M(15);
M(16);
default:
break;
}
#undef M
}

LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&c, &b](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
c[i] = Op::apply(RawStrEqualCompare((view), b), 0);
Expand All @@ -216,13 +301,12 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
}
});

use_optimized_path = true;
break;
return true;
}
default:
break;
}
return use_optimized_path;
return false;
}

} // namespace DB
2 changes: 0 additions & 2 deletions dbms/src/Storages/Transaction/CollatorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

#include <memory>

#define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure))

namespace DB
{

Expand Down
4 changes: 4 additions & 0 deletions libs/libcommon/include/common/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@
# define ALWAYS_INLINE __forceinline
# define NO_INLINE static __declspec(noinline)
# define MAY_ALIAS
# define FLATTEN_INLINE_PURE
# define FLATTEN_INLINE
#else
# define ALWAYS_INLINE __attribute__((__always_inline__))
# define NO_INLINE __attribute__((__noinline__))
# define MAY_ALIAS __attribute__((__may_alias__))
# define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure))
# define FLATTEN_INLINE __attribute__((flatten, always_inline))
#endif

#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__)
Expand Down
125 changes: 125 additions & 0 deletions libs/libcommon/include/common/fixed_mem_eq.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <common/defines.h>

#include <cstddef>
#include <cstdint>

namespace mem_utils
{

FLATTEN_INLINE_PURE
constexpr inline bool memcmp_eq0(const char *, const char *)
{
return true;
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq1(const char * a, const char * b)
{
return a[0] == b[0];
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq2(const char * a, const char * b)
{
const uint16_t ax = *reinterpret_cast<const uint16_t *>(a);
const uint16_t bx = *reinterpret_cast<const uint16_t *>(b);
return ax == bx;
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq3(const char * a, const char * b)
{
return memcmp_eq2(a, b) & memcmp_eq2(a + 1, b + 1);
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq4(const char * a, const char * b)
{
const uint32_t ax = *reinterpret_cast<const uint32_t *>(a);
const uint32_t bx = *reinterpret_cast<const uint32_t *>(b);
return ax == bx;
}

FLATTEN_INLINE_PURE
inline bool memcmp_eq8(const char * a, const char * b)
{
const uint64_t ax = *reinterpret_cast<const uint64_t *>(a);
const uint64_t bx = *reinterpret_cast<const uint64_t *>(b);
return ax == bx;
}

template <size_t k>
ALWAYS_INLINE inline bool memcmp_eq_fixed_size(const char * a, const char * b)
{
#ifdef M
static_assert(false, "`M` is defined");
#else
#define M(s) \
else if constexpr (k == (s)) \
{ \
return memcmp_eq##s(a, b); \
}
#endif

static_assert(k >= 0);
static_assert(k <= 32);

if constexpr (k > 16)
{
return memcmp_eq_fixed_size<16>(a, b) & memcmp_eq_fixed_size<k - 16>(a + 16, b + 16);
}
else if constexpr (k > 8)
{
return memcmp_eq8(a, b) & memcmp_eq8(a + k - 8, b + k - 8);
}
else if constexpr (k > 4)
{
if constexpr (k == 8)
return memcmp_eq8(a, b);
else
return memcmp_eq_fixed_size<4>(a, b) & memcmp_eq_fixed_size<4>(a + k - 4, b + k - 4);
}
M(1)
M(2)
M(3)
M(4)
M(0)
#undef M
}

/*
- with cxx flag `-mavx2`
- memcmp_eq_fixed_size<32>:
vmovdqu ymm0, ymmword ptr [rdi]
vpcmpeqq ymm0, ymm0, ymmword ptr [rsi]
vmovmskpd eax, ymm0
cmp al, 15
sete al
vzeroupper
ret
- memcmp_eq_fixed_size<16>:
vmovdqu xmm0, xmmword ptr [rdi]
vpcmpeqq xmm0, xmm0, xmmword ptr [rsi]
vmovmskpd eax, xmm0
cmp al, 3
sete al
ret
*/

} // namespace mem_utils

0 comments on commit f13830c

Please sign in to comment.