Skip to content

Commit

Permalink
neon/cgtz: Add implementations of remaining functions
Browse files Browse the repository at this point in the history
Fixes #800
  • Loading branch information
Glitch18 authored and nemequ committed May 28, 2021
1 parent 9930c43 commit 4d749b5
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 6 deletions.
54 changes: 48 additions & 6 deletions simde/arm/neon/cgtz.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,48 @@ HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgtzd_s64(int64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_s64(a));
#else
return (a > 0) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzd_s64
#define vcgtzd_s64(a) simde_vcgtzd_s64(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
uint64_t
simde_vcgtzd_f64(simde_float64_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_f64(a));
#else
return (a > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzd_f64
#define vcgtzd_f64(a) simde_vcgtzd_f64(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
uint32_t
simde_vcgtzs_f32(simde_float32_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return HEDLEY_STATIC_CAST(uint32_t, vcgtzs_f32(a));
#else
return (a > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vcgtzs_f32
#define vcgtzs_f32(a) simde_vcgtzs_f32(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vcgtzq_f32(simde_float32x4_t a) {
Expand All @@ -54,7 +96,7 @@ simde_vcgtzq_f32(simde_float32x4_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
r_.values[i] = simde_vcgtzs_f32(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -82,7 +124,7 @@ simde_vcgtzq_f64(simde_float64x2_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
r_.values[i] = simde_vcgtzd_f64(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -194,7 +236,7 @@ simde_vcgtzq_s64(simde_int64x2_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > 0) ? UINT64_MAX : 0;
r_.values[i] = simde_vcgtzd_s64(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -222,7 +264,7 @@ simde_vcgtz_f32(simde_float32x2_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0;
r_.values[i] = simde_vcgtzs_f32(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -250,7 +292,7 @@ simde_vcgtz_f64(simde_float64x1_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0;
r_.values[i] = simde_vcgtzd_f64(a_.values[i]);
}
#endif

Expand Down Expand Up @@ -362,7 +404,7 @@ simde_vcgtz_s64(simde_int64x1_t a) {
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = (a_.values[i] > 0) ? UINT64_MAX : 0;
r_.values[i] = simde_vcgtzd_s64(a_.values[i]);
}
#endif

Expand Down
136 changes: 136 additions & 0 deletions test/arm/neon/cgtz.c
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,138 @@ test_simde_vcgtzq_s64 (SIMDE_MUNIT_TEST_ARGS) {
#endif
}

static int
test_simde_vcgtzd_s64 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
static const struct {
int64_t a;
uint64_t r;
} test_vec[] = {
{ -INT64_C( 7092078020180908211),
UINT64_C( 0) },
{ -INT64_C( 2081468342488495496),
UINT64_C( 0) },
{ INT64_C( 4158752479517361129),
UINT64_MAX },
{ -INT64_C( 4468962310629773370),
UINT64_C( 0) },
{ INT64_C( 1598791738623311349),
UINT64_MAX },
{ -INT64_C( 285058202827427690),
UINT64_C( 0) },
{ INT64_C( 1448273756347069320),
UINT64_MAX },
{ INT64_C( 7580350321362560764),
UINT64_MAX }
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
uint64_t r = simde_vcgtzd_s64(test_vec[i].a);
simde_assert_equal_u64(r, test_vec[i].r);
}

return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
int64_t a = simde_test_codegen_random_i64();
uint64_t r = simde_vcgtzd_s64(a);

simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}

static int
test_simde_vcgtzd_f64 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
static const struct {
simde_float64_t a;
uint64_t r;
} test_vec[] = {
{ SIMDE_FLOAT64_C( -807.76),
UINT64_C( 0) },
{ SIMDE_FLOAT64_C( 173.20),
UINT64_MAX },
{ SIMDE_FLOAT64_C( 642.73),
UINT64_MAX },
{ SIMDE_FLOAT64_C( 628.29),
UINT64_MAX },
{ SIMDE_FLOAT64_C( -508.17),
UINT64_C( 0) },
{ SIMDE_FLOAT64_C( 480.29),
UINT64_MAX },
{ SIMDE_FLOAT64_C( -490.40),
UINT64_C( 0) },
{ SIMDE_FLOAT64_C( 174.42),
UINT64_MAX }
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
uint64_t r = simde_vcgtzd_f64(test_vec[i].a);
simde_assert_equal_u64(r, test_vec[i].r);
}

return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_float64_t a = simde_test_codegen_random_f64(-1000, 1000);
uint64_t r = simde_vcgtzd_f64(a);

simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}

static int
test_simde_vcgtzs_f32 (SIMDE_MUNIT_TEST_ARGS) {
#if 1
static const struct {
simde_float32_t a;
uint32_t r;
} test_vec[] = {
{ SIMDE_FLOAT32_C( 278.45),
UINT32_MAX },
{ SIMDE_FLOAT32_C( -325.45),
UINT32_C( 0) },
{ SIMDE_FLOAT32_C( -203.78),
UINT32_C( 0) },
{ SIMDE_FLOAT32_C( -992.26),
UINT32_C( 0) },
{ SIMDE_FLOAT32_C( 215.89),
UINT32_MAX },
{ SIMDE_FLOAT32_C( 908.52),
UINT32_MAX },
{ SIMDE_FLOAT32_C( -860.21),
UINT32_C( 0) },
{ SIMDE_FLOAT32_C( -813.65),
UINT32_C( 0) }
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
uint32_t r = simde_vcgtzs_f32(test_vec[i].a);
simde_assert_equal_u32(r, test_vec[i].r);
}

return 0;
#else
fputc('\n', stdout);
for (int i = 0 ; i < 8 ; i++) {
simde_float32_t a = simde_test_codegen_random_f32(-1000, 1000);
uint32_t r = simde_vcgtzs_f32(a);

simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif
}

SIMDE_TEST_FUNC_LIST_BEGIN
SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_f32)
SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_f64)
Expand All @@ -687,6 +819,10 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s8)
SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s16)
SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s32)
SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s64)

SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzd_s64)
SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzd_f64)
SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzs_f32)
SIMDE_TEST_FUNC_LIST_END

#include "test-neon-footer.h"

0 comments on commit 4d749b5

Please sign in to comment.