Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize equality comparison for small str with fixed size #5569

Merged
merged 12 commits into from
Aug 16, 2022
114 changes: 99 additions & 15 deletions dbms/src/Functions/CollationOperatorOptimized.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
#include <Storages/Transaction/CollatorUtils.h>
#include <common/StringRef.h>
#include <common/defines.h>
#include <common/fixed_mem_eq.h>

#include <cstddef>
#include <string_view>


namespace DB
{

Expand All @@ -50,7 +50,7 @@ struct IsEqualRelated<DB::NotEqualsOp<A...>>
// Loop columns and invoke callback for each pair.
// Remove last zero byte.
template <typename F>
__attribute__((flatten, always_inline)) inline void LoopTwoColumns(
FLATTEN_INLINE inline void LoopTwoColumns(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
const ColumnString::Chars_t & b_data,
Expand Down Expand Up @@ -79,7 +79,7 @@ __attribute__((flatten, always_inline)) inline void LoopTwoColumns(
// Loop one column and invoke callback for each pair.
// Remove last zero byte.
template <typename F>
__attribute__((flatten, always_inline)) inline void LoopOneColumn(
FLATTEN_INLINE inline void LoopOneColumn(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
size_t size,
Expand All @@ -97,6 +97,23 @@ __attribute__((flatten, always_inline)) inline void LoopOneColumn(
}
}

template <size_t n, typename Op, bool trim, typename Result>
FLATTEN_INLINE inline void LoopOneColumnCmpEqFixedStr(
const ColumnString::Chars_t & a_data,
const ColumnString::Offsets & a_offsets,
const char * src,
Result & c)
{
LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&](std::string_view view, size_t i) {
if constexpr (trim)
view = RightTrim(view);
auto res = 1;
if (view.size() == n)
res = mem_utils::memcmp_eq_fixed_size<n>(view.data(), src) ? 0 : 1;
c[i] = Op::apply(res, 0);
});
}

// Handle str-column compare str-column.
// - Optimize bin collator
// - Check if columns do NOT contain tail space
Expand Down Expand Up @@ -175,20 +192,53 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
const TiDB::TiDBCollatorPtr & collator,
Result & c)
{
bool use_optimized_path = false;

switch (collator->getCollatorType())
{
case TiDB::ITiDBCollator::CollatorType::UTF8MB4_BIN:
case TiDB::ITiDBCollator::CollatorType::UTF8_BIN:
case TiDB::ITiDBCollator::CollatorType::LATIN1_BIN:
case TiDB::ITiDBCollator::CollatorType::ASCII_BIN:
{
size_t size = a_offsets.size();

std::string_view tar_str_view = RightTrim(b); // right trim const-str first

LoopOneColumn(a_data, a_offsets, size, [&c, &tar_str_view](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
#ifdef M
static_assert(false, "`M` is defined");
#endif
#define M(k) \
case k: \
{ \
LoopOneColumnCmpEqFixedStr<k, Op, true>(a_data, a_offsets, tar_str_view.data(), c); \
return true; \
}

switch (tar_str_view.size())
{
M(0);
M(1);
M(2);
M(3);
M(4);
M(5);
M(6);
M(7);
M(8);
M(9);
M(10);
M(11);
M(12);
M(13);
M(14);
M(15);
M(16);
default:
break;
}
#undef M
}

LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&c, &tar_str_view](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
c[i] = Op::apply(RawStrEqualCompare(RightTrim(view), tar_str_view), 0);
Expand All @@ -199,13 +249,48 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
}
});

use_optimized_path = true;
break;
return true;
}
case TiDB::ITiDBCollator::CollatorType::BINARY:
{
size_t size = a_offsets.size();
LoopOneColumn(a_data, a_offsets, size, [&c, &b](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
#ifdef M
static_assert(false, "`M` is defined");
#endif
#define M(k) \
case k: \
{ \
LoopOneColumnCmpEqFixedStr<k, Op, false>(a_data, a_offsets, b.data(), c); \
return true; \
}

switch (b.size())
{
M(0);
M(1);
M(2);
M(3);
M(4);
M(5);
M(6);
M(7);
M(8);
M(9);
M(10);
M(11);
M(12);
M(13);
M(14);
M(15);
M(16);
default:
break;
}
#undef M
}

LoopOneColumn(a_data, a_offsets, a_offsets.size(), [&c, &b](const std::string_view & view, size_t i) {
if constexpr (IsEqualRelated<Op>::value)
{
c[i] = Op::apply(RawStrEqualCompare((view), b), 0);
Expand All @@ -216,13 +301,12 @@ ALWAYS_INLINE inline bool CompareStringVectorConstant(
}
});

use_optimized_path = true;
break;
return true;
}
default:
break;
}
return use_optimized_path;
return false;
}

} // namespace DB
2 changes: 0 additions & 2 deletions dbms/src/Storages/Transaction/CollatorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

#include <memory>

#define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure))

namespace DB
{

Expand Down
4 changes: 4 additions & 0 deletions libs/libcommon/include/common/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@
# define ALWAYS_INLINE __forceinline
# define NO_INLINE static __declspec(noinline)
# define MAY_ALIAS
# define FLATTEN_INLINE_PURE
# define FLATTEN_INLINE
#else
# define ALWAYS_INLINE __attribute__((__always_inline__))
# define NO_INLINE __attribute__((__noinline__))
# define MAY_ALIAS __attribute__((__may_alias__))
# define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure))
# define FLATTEN_INLINE __attribute__((flatten, always_inline))
#endif

#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__)
Expand Down
Loading