Skip to content

Commit

Permalink
Preparing for xpu/perm32:
Browse files Browse the repository at this point in the history
- created build.hpp for TPUBuild
- epu8id and similar are now Epu8.id()
- improved doc
  • Loading branch information
hivert committed Nov 5, 2023
1 parent be2f558 commit 95cf9b3
Show file tree
Hide file tree
Showing 11 changed files with 182 additions and 152 deletions.
4 changes: 2 additions & 2 deletions benchmark/bench_epu8.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//****************************************************************************//
// Copyright (C) 2018 Florent Hivert <[email protected]>, //
// Copyright (C) 2018-2023 Florent Hivert <[email protected]>, //
// //
// Distributed under the terms of the GNU General Public License (GPL) //
// //
Expand Down Expand Up @@ -32,7 +32,7 @@ namespace {
struct RoundsMask {
constexpr RoundsMask() : arr() {
for (unsigned i = 0; i < sorting_rounds.size(); ++i)
arr[i] = sorting_rounds[i] < epu8id;
arr[i] = sorting_rounds[i] < Epu8.id();
}
epu8 arr[sorting_rounds.size()];
};
Expand Down
4 changes: 2 additions & 2 deletions benchmark/bench_fixture.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//****************************************************************************//
// Copyright (C) 2016 Florent Hivert <[email protected]>, //
// Copyright (C) 2016-2023 Florent Hivert <[email protected]>, //
// //
// Distributed under the terms of the GNU General Public License (GPL) //
// //
Expand Down Expand Up @@ -35,7 +35,7 @@ std::vector<epu8> rand_epu8(size_t sz) {
inline epu8 rand_perm() {
static std::random_device rd;
static std::mt19937 g(rd());
epu8 res = HPCombi::epu8id;
epu8 res = HPCombi::Epu8.id();
auto &ar = HPCombi::as_array(res);
std::shuffle(ar.begin(), ar.end(), g);
return res;
Expand Down
4 changes: 2 additions & 2 deletions examples/pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ void make_subsets_of_size(int n, int k) {
template <int Size>
epu8 extract_pattern(epu8 perm, epu8 permset) {
epu8 cst = Epu8({}, Size);
epu8 res = permuted(perm, permset) | (epu8id >= cst);
res = sort_perm(res) & (epu8id < cst);
epu8 res = permuted(perm, permset) | (Epu8.id() >= cst);
res = sort_perm(res) & (Epu8.id() < cst);
return res;
}

Expand Down
18 changes: 9 additions & 9 deletions include/hpcombi/bmat8_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,14 +254,14 @@ namespace detail {

inline void row_space_update_bitset(epu8 block, epu8 &set0, epu8 &set1) noexcept {
static const epu8 bound08 = simde_mm_slli_epi32(
static_cast<simde__m128i>(epu8id), 3); // shift for *8
static_cast<simde__m128i>(Epu8.id()), 3); // shift for *8
static const epu8 bound18 = bound08 + Epu8(0x80);
for (size_t slice8 = 0; slice8 < 16; slice8++) {
epu8 bm5 = Epu8(0xf8) & block; /* 11111000 */
epu8 shft = simde_mm_shuffle_epi8(shiftres, block - bm5);
set0 |= (bm5 == bound08) & shft;
set1 |= (bm5 == bound18) & shft;
block = simde_mm_shuffle_epi8(block, right_cycle);
block = simde_mm_shuffle_epi8(block, Epu8.right_cycle());
}
}
}
Expand All @@ -277,7 +277,7 @@ inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const noexcept {
res1 = epu8{};
for (size_t r = 0; r < 16; r++) {
detail::row_space_update_bitset(block0 | block1, res0, res1);
block1 = simde_mm_shuffle_epi8(block1, right_cycle);
block1 = simde_mm_shuffle_epi8(block1, Epu8.right_cycle());
}
}

Expand All @@ -292,7 +292,7 @@ inline uint64_t BMat8::row_space_size_bitset() const noexcept {

inline uint64_t BMat8::row_space_size_incl1() const noexcept {
epu8 in = simde_mm_set_epi64x(_data, _data);
epu8 block = epu8id;
epu8 block = Epu8.id();
uint64_t res = 0;
for (size_t r = 0; r < 16; r++) {
epu8 orincl{};
Expand All @@ -308,7 +308,7 @@ inline uint64_t BMat8::row_space_size_incl1() const noexcept {

inline uint64_t BMat8::row_space_size_incl() const noexcept {
epu8 in = simde_mm_set_epi64x(_data, _data);
epu8 block = epu8id;
epu8 block = Epu8.id();
uint64_t res = 0;
for (size_t r = 0; r < 16; r++) {
epu8 orincl = ((in | block) == block) & in;
Expand Down Expand Up @@ -466,11 +466,11 @@ inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const {
}

inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const noexcept {
epu8 x = permuted(simde_mm_set_epi64x(_data, 0), epu8rev);
epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), epu8rev);
epu8 x = permuted(simde_mm_set_epi64x(_data, 0), Epu8.rev());
epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), Epu8.rev());
// Vector ternary operator is not supported by clang.
// return (x != (epu8 {})) ? permutation_of(y, x) : epu8id;
return simde_mm_blendv_epi8(epu8id, permutation_of(y, x), x != epu8{});
// return (x != (epu8 {})) ? permutation_of(y, x) : Epu8.id();
return simde_mm_blendv_epi8(Epu8.id(), permutation_of(y, x), x != epu8{});
}

// Not noexcept because std::ostream::operator<< isn't
Expand Down
91 changes: 91 additions & 0 deletions include/hpcombi/builder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2023 Florent Hivert <[email protected]>, //
// //
// Distributed under the terms of the GNU General Public License (GPL) //
// //
// This code is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
// General Public License for more details. //
// //
// The full text of the GPL is available at: //
// //
// http://www.gnu.org/licenses/ //
////////////////////////////////////////////////////////////////////////////////

#ifndef HPCOMBI_BUILDER_HPP_INCLUDED
#define HPCOMBI_BUILDER_HPP_INCLUDED

/** Factory object for various SIMD constants in particular constexpr
*/
template <class TPU> struct TPUBuild {
// Type for Packed Unsigned integer (TPU)
using type_elem = typename std::remove_reference_t<decltype((TPU{})[0])>;
static constexpr size_t size_elem = sizeof(type_elem);
static constexpr size_t size = sizeof(TPU) / size_elem;

using array = std::array<type_elem, size>;

template <class Fun, decltype(size)... Is>
static constexpr TPU make_helper(Fun f, std::index_sequence<Is...>) {
static_assert(std::is_invocable_v<Fun, type_elem>);
return TPU{f(Is)...};
}

inline constexpr TPU operator()(std::initializer_list<type_elem> il,
type_elem def) const {
HPCOMBI_ASSERT(il.size() <= size);
array res;
std::copy(il.begin(), il.end(), res.begin());
std::fill(res.begin() + il.size(), res.end(), def);
return reinterpret_cast<const TPU &>(res);
}

template <class Fun> inline constexpr TPU operator()(Fun f) const {
static_assert(std::is_invocable_v<Fun, type_elem>);
return make_helper(f, std::make_index_sequence<size>{});
}

inline constexpr TPU operator()(type_elem c) const {
return make_helper([c](auto) { return c; },
std::make_index_sequence<size>{});
}
// explicit overloading for int constants
inline constexpr TPU operator()(int c) const {
return operator()(type_elem(c));
}
inline constexpr TPU operator()(size_t c) const {
return operator()(type_elem(c));
}

/** Return the identity element of type \c TPU
*/
constexpr TPU id() const { return operator()([](type_elem i) { return i; }); }
/** Return reversed element of type \c TPU
*/
constexpr TPU rev() const {
return (*this)([](type_elem i) { return size - 1 - i; });
}
constexpr TPU left_cycle() const {
return (*this)([](type_elem i) { return (i + size - 1) % size; });
}
constexpr TPU right_cycle() const {
return (*this)([](type_elem i) { return (i + 1) % size; });
}
constexpr TPU left_dup() const {
return (*this)([](type_elem i) { return i == 15 ? 15 : i + 1; });
}
constexpr TPU right_dup() const {
return (*this)([](type_elem i) { return i == 0 ? 0 : i - 1; });
}
constexpr TPU popcount() const {
return (*this)([](type_elem i) {
return (((i & 0x01) != 0 ? 1 : 0) + ((i & 0x02) != 0 ? 1 : 0) +
((i & 0x04) != 0 ? 1 : 0) + ((i & 0x08) != 0 ? 1 : 0) +
((i & 0x10) != 0 ? 1 : 0) + ((i & 0x20) != 0 ? 1 : 0) +
((i & 0x40) != 0 ? 1 : 0) + ((i & 0x80) != 0 ? 1 : 0));
});
}
};

#endif // HPCOMBI_BUILDER_HPP_INCLUDED
74 changes: 6 additions & 68 deletions include/hpcombi/epu8.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <utility> // for make_index_sequence, ind...

#include "debug.hpp" // for HPCOMBI_ASSERT
#include "builder.hpp" // for TPUBuild
#include "vect_generic.hpp" // for VectGeneric

#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde...
Expand All @@ -49,73 +50,10 @@ static_assert(alignof(epu8) == 16,
/// Currently not really used except in experiments
using xpu8 = uint8_t __attribute__((vector_size(32)));

namespace detail { // Implementation detail code

/// Factory object for various SIMD constants in particular constexpr
template <class TPU> struct TPUBuild {
// Type for Packed Unsigned integer (TPU)
using type_elem = typename std::remove_reference_t<decltype((TPU{})[0])>;
static constexpr size_t size_elem = sizeof(type_elem);
static constexpr size_t size = sizeof(TPU) / size_elem;

using array = std::array<type_elem, size>;

template <class Fun, decltype(size)... Is>
static constexpr TPU make_helper(Fun f, std::index_sequence<Is...>) {
static_assert(std::is_invocable_v<Fun, type_elem>);
return TPU{f(Is)...};
}

inline TPU operator()(std::initializer_list<type_elem> il,
type_elem def) const {
HPCOMBI_ASSERT(il.size() <= size);
array res;
std::copy(il.begin(), il.end(), res.begin());
std::fill(res.begin() + il.size(), res.end(), def);
return reinterpret_cast<const TPU &>(res);
}

template <class Fun> inline constexpr TPU operator()(Fun f) const {
static_assert(std::is_invocable_v<Fun, type_elem>);
return make_helper(f, std::make_index_sequence<size>{});
}

inline constexpr TPU operator()(type_elem c) const {
return make_helper([c](auto) { return c; },
std::make_index_sequence<size>{});
}
// explicit overloading for int constants
inline constexpr TPU operator()(int c) const {
return operator()(type_elem(c));
}
inline constexpr TPU operator()(size_t c) const {
return operator()(type_elem(c));
}
};

} // namespace detail

// Single instance of the TPUBuild<epu8> factory object
static constexpr detail::TPUBuild<epu8> Epu8;

/// The identity #HPCombi::epu8
/// The image of i by the identity function
constexpr epu8 epu8id = Epu8([](uint8_t i) { return i; });
/// The reverted identity #HPCombi::epu8
constexpr epu8 epu8rev = Epu8([](uint8_t i) { return 15 - i; });
/// Left cycle #HPCombi::epu8 permutation
constexpr epu8 left_cycle = Epu8([](uint8_t i) { return (i + 15) % 16; });
/// Right cycle #HPCombi::epu8 permutation
constexpr epu8 right_cycle = Epu8([](uint8_t i) { return (i + 1) % 16; });
/// Left shift #HPCombi::epu8, duplicating the rightmost entry
constexpr epu8 left_dup = Epu8([](uint8_t i) { return i == 15 ? 15 : i + 1; });
/// Right shift #HPCombi::epu8, duplicating the leftmost entry
constexpr epu8 right_dup = Epu8([](uint8_t i) { return i == 0 ? 0 : i - 1; });
/// Popcount #HPCombi::epu8: the ith entry contains the number of bits set in i
constexpr epu8 popcount4 = Epu8([](uint8_t i) {
return ((i & 1) != 0 ? 1 : 0) + ((i & 2) != 0 ? 1 : 0) +
((i & 4) != 0 ? 1 : 0) + ((i & 8) != 0 ? 1 : 0);
});

/// Single instance of the TPUBuild<epu8> factory object
static constexpr TPUBuild<epu8> Epu8;


/** Cast a #HPCombi::epu8 to a c++ \c std::array
*
Expand Down Expand Up @@ -189,7 +127,7 @@ inline epu8 shifted_right(epu8 a) noexcept {
*/
inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); }
/** Reverting a #HPCombi::epu8 */
inline epu8 reverted(epu8 a) noexcept { return permuted(a, epu8rev); }
inline epu8 reverted(epu8 a) noexcept { return permuted(a, Epu8.rev()); }

/** Vector min between two #HPCombi::epu8 0 */
inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); }
Expand Down
Loading

0 comments on commit 95cf9b3

Please sign in to comment.