Skip to content

Commit

Permalink
Optimize simd::leadingMask 10 times faster
Browse files Browse the repository at this point in the history
Differential Revision: D51555527
  • Loading branch information
Yuhta authored and facebook-github-bot committed Nov 27, 2023
1 parent f354679 commit 9915581
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 8 deletions.
65 changes: 57 additions & 8 deletions velox/common/base/SimdUtil-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,19 +242,68 @@ int32_t indicesOfSetBits(
return result - originalResult;
}

namespace detail {

template <typename T, typename A>
xsimd::batch_bool<T, A> leadingMask(int n, const A&) {
constexpr int N = xsimd::batch_bool<T, A>::size;
static const auto kMemo = ({
std::array<xsimd::batch_bool<T, A>, N> memo;
struct LeadingMask {
LeadingMask() {
bool tmp[N]{};
for (int i = 0; i < N; ++i) {
memo[i] = xsimd::batch_bool<T, A>::load_unaligned(tmp);
memo_[i] = xsimd::batch_bool<T, A>::load_unaligned(tmp);
tmp[i] = true;
}
memo;
});
return LIKELY(n >= N) ? xsimd::batch_bool<T, A>(true) : kMemo[n];
memo_[N] = xsimd::batch_bool<T, A>::load_unaligned(tmp);
}

xsimd::batch_bool<T, A> operator[](size_t i) const {
return memo_[i];
}

private:
static constexpr int N = xsimd::batch_bool<T, A>::size;
std::array<xsimd::batch_bool<T, A>, N + 1> memo_;
};

extern const LeadingMask<int32_t, xsimd::default_arch> leadingMask32;
extern const LeadingMask<int64_t, xsimd::default_arch> leadingMask64;

template <typename T, typename A>
xsimd::batch_bool<T, xsimd::default_arch> leadingMask(int i, const A&);

template <>
inline xsimd::batch_bool<int32_t, xsimd::default_arch> leadingMask(
int i,
const xsimd::default_arch&) {
return leadingMask32[i];
}

template <>
inline xsimd::batch_bool<float, xsimd::default_arch> leadingMask(
int i,
const xsimd::default_arch&) {
return xsimd::batch_bool<float, xsimd::default_arch>(leadingMask32[i].data);
}

template <>
inline xsimd::batch_bool<int64_t, xsimd::default_arch> leadingMask(
int i,
const xsimd::default_arch&) {
return leadingMask64[i];
}

template <>
inline xsimd::batch_bool<double, xsimd::default_arch> leadingMask(
int i,
const xsimd::default_arch&) {
return xsimd::batch_bool<double, xsimd::default_arch>(leadingMask64[i].data);
}

} // namespace detail

template <typename T, typename A>
xsimd::batch_bool<T, A> leadingMask(int n, const A& arch) {
constexpr int N = xsimd::batch_bool<T, A>::size;
return detail::leadingMask<T, A>(std::min(n, N), arch);
}

namespace detail {
Expand Down
3 changes: 3 additions & 0 deletions velox/common/base/SimdUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ namespace detail {
alignas(kPadding) int32_t byteSetBits[256][8];
alignas(kPadding) int32_t permute4x64Indices[16][8];

const LeadingMask<int32_t, xsimd::default_arch> leadingMask32;
const LeadingMask<int64_t, xsimd::default_arch> leadingMask64;

} // namespace detail

namespace {
Expand Down

0 comments on commit 9915581

Please sign in to comment.