Skip to content

Commit

Permalink
Factorize sum style cleanup into remove_garbage
Browse files Browse the repository at this point in the history
  • Loading branch information
jfalcou committed Jun 30, 2022
1 parent 49b9aa2 commit c50e4f5
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 40 deletions.
23 changes: 3 additions & 20 deletions include/eve/detail/function/simd/arm/neon/sum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,10 @@

#include <eve/detail/abi.hpp>
#include <eve/detail/category.hpp>
#include <eve/module/core/regular/slide_right.hpp>
#include <eve/detail/remove_garbage.hpp>

namespace eve::detail
{
template<typename T, typename N>
EVE_FORCEINLINE wide<T,N> arm_cleanup(wide<T,N> v) noexcept
{
// Clean up potential garbage
using ec_t = expected_cardinal_t<T,arm_64_>;
if constexpr(N::value < ec_t::value)
{
v = bit_cast( slide_right ( bit_cast(v,as<wide<T,ec_t>>())
, index<ec_t::value - N::value>
)
, as(v)
);
}

return v;
}

template<typename T, typename N>
EVE_FORCEINLINE wide<T,N> arm_sum_impl(wide<T,N> v) noexcept
{
Expand Down Expand Up @@ -57,7 +40,7 @@ namespace eve::detail
{
if constexpr( std::same_as<abi_t<T,N>, arm_64_> )
{
v = arm_cleanup(v);
v = slide_garbage(v);
if constexpr(sizeof(T) <= 4) v = arm_sum_impl(v);
if constexpr(sizeof(T) <= 2) v = arm_sum_impl(v);
if constexpr(sizeof(T) <= 1) v = arm_sum_impl(v);
Expand Down Expand Up @@ -85,7 +68,7 @@ namespace eve::detail
{
if constexpr(current_api >= asimd)
{
v = arm_cleanup(v);
v = slide_garbage(v);
constexpr auto c = categorize<wide<T, N>>();

if constexpr( c== category::float64x2 ) return vaddvq_f64(v);
Expand Down
23 changes: 4 additions & 19 deletions include/eve/detail/function/simd/x86/sum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include <eve/detail/abi.hpp>
#include <eve/detail/category.hpp>
#include <eve/module/core/regular/slide_right.hpp>
#include <eve/detail/remove_garbage.hpp>

namespace eve::detail
{
Expand Down Expand Up @@ -39,11 +39,7 @@ namespace eve::detail
else if constexpr( c == category::float32x4 )
{
// Clean up garbage if needed
if constexpr(N::value == 2) v = bit_cast( slide_right ( bit_cast(v,as<wide<T,fixed<4>>>())
, index<2>
)
, as(v)
);
v = slide_garbage(v);

if constexpr( current_api >= sse3 )
{
Expand All @@ -63,11 +59,7 @@ namespace eve::detail
else if constexpr( c == category::int32x4 || c == category::uint32x4 )
{
// Clean up garbage if needed
if constexpr(N::value == 2) v = bit_cast( slide_right ( bit_cast(v,as<wide<T,fixed<4>>>())
, index<2>
)
, as(v)
);
v = slide_garbage(v);

constexpr auto shuf = _MM_SHUFFLE(1, 0, 3, 2);
__m128i sum64;
Expand All @@ -81,14 +73,7 @@ namespace eve::detail
else if constexpr( c == category::uint8x16 || c == category::int8x16 )
{
// Clean up garbage if needed
if constexpr(N::value < 16)
{
v = bit_cast( slide_right ( bit_cast(v,as<wide<T,fixed<16>>>())
, index<16-N::value>
)
, as(v)
);
}
v = slide_garbage(v);

// https://stackoverflow.com/questions/36998538/fastest-way-to-horizontally-sum-sse-unsigned-byte-vector
__m128i vsum = _mm_sad_epu8(v, _mm_setzero_si128());
Expand Down
19 changes: 19 additions & 0 deletions include/eve/detail/remove_garbage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <eve/traits.hpp>
#include <eve/detail/function/bit_cast.hpp>
#include <eve/module/core/regular/slide_right.hpp>
#include <utility>

namespace eve::detail
Expand All @@ -29,6 +30,24 @@ namespace eve::detail
v = bit_cast(bit_cast(v,as<as_wide_t<v_t,ec_t>>()) & exact_mask, as(v));
}

return v;
}

template<typename W> auto slide_garbage(W v) noexcept
{
using v_t = element_type_t<W>;
using c_t = cardinal_t<W>;
using ec_t = expected_cardinal_t<v_t, typename W::abi_type>;

if constexpr(c_t::value < ec_t::value)
{
v = bit_cast( slide_right ( bit_cast(v,as<wide<v_t,ec_t>>())
, index<ec_t::value - c_t::value>
)
, as(v)
);
}

return v;
}
}
1 change: 0 additions & 1 deletion include/eve/module/core/regular/impl/slide_left.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
//==================================================================================================
#pragma once

#include <eve/module/core/regular/slide_right.hpp>
#include <eve/concept/vectorized.hpp>
#include <eve/detail/abi.hpp>
#include <eve/detail/meta.hpp>
Expand Down

0 comments on commit c50e4f5

Please sign in to comment.