Skip to content

Commit

Permalink
[Fix] Fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
yyctw authored and Eric Yi-Yen Chung committed Oct 23, 2023
1 parent 1423839 commit cadfb94
Show file tree
Hide file tree
Showing 73 changed files with 9,383 additions and 3,743 deletions.
12 changes: 7 additions & 5 deletions simde/arm/neon/abd.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@ SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float16_t
simde_vabdh_f16(simde_float16_t a, simde_float16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vabdh_f16(a, b);
#else
simde_float16_t r = a - b;
return r < 0 ? -r : r;
simde_float32_t a_ = simde_float16_to_float32(a);
simde_float32_t b_ = simde_float16_to_float32(b);
simde_float32_t r_ = a_ - b_;
return r_ < 0 ? simde_float16_from_float32(-r_) : simde_float16_from_float32(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand Down Expand Up @@ -86,7 +88,7 @@ simde_vabdd_f64(simde_float64_t a, simde_float64_t b) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x4_t
simde_vabd_f16(simde_float16x4_t a, simde_float16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vabd_f16(a, b);
#else
return simde_vabs_f16(simde_vsub_f16(a, b));
Expand Down Expand Up @@ -253,7 +255,7 @@ simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x8_t
simde_vabdq_f16(simde_float16x8_t a, simde_float16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vabdq_f16(a, b);
#else
return simde_vabsq_f16(simde_vsubq_f16(a, b));
Expand Down
12 changes: 6 additions & 6 deletions simde/arm/neon/addhn_high.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ simde_vaddhn_high_s16(simde_int8x8_t r, simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddhn_high_s16(r, a, b);
#else
return simde_vcombine_s16(r, simde_vaddhn_s16(a, b));
return simde_vcombine_s8(r, simde_vaddhn_s16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -54,7 +54,7 @@ simde_vaddhn_high_s32(simde_int16x4_t r, simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddhn_high_s32(r, a, b);
#else
return simde_vcombine_s32(r, simde_vaddhn_s32(a, b));
return simde_vcombine_s16(r, simde_vaddhn_s32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -68,7 +68,7 @@ simde_vaddhn_high_s64(simde_int32x2_t r, simde_int64x2_t a, simde_int64x2_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddhn_high_s64(r, a, b);
#else
return simde_vcombine_s64(r, simde_vaddhn_s64(a, b));
return simde_vcombine_s32(r, simde_vaddhn_s64(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -82,7 +82,7 @@ simde_vaddhn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a, simde_uint16x8_t b)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddhn_high_u16(r, a, b);
#else
return simde_vcombine_u16(r, simde_vaddhn_u16(a, b));
return simde_vcombine_u8(r, simde_vaddhn_u16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -96,7 +96,7 @@ simde_vaddhn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a, simde_uint32x4_t b
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddhn_high_u32(r, a, b);
#else
return simde_vcombine_u32(r, simde_vaddhn_u32(a, b));
return simde_vcombine_u16(r, simde_vaddhn_u32(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -110,7 +110,7 @@ simde_vaddhn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a, simde_uint64x2_t b
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vaddhn_high_u64(r, a, b);
#else
return simde_vcombine_u64(r, simde_vaddhn_u64(a, b));
return simde_vcombine_u32(r, simde_vaddhn_u64(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/cgez.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ simde_vcgezs_f32(simde_float32_t a) {
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vcgezh_f16(simde_float16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return HEDLEY_STATIC_CAST(uint16_t, vcgezh_f16(a));
#else
return (a >= SIMDE_FLOAT16_C(0.0)) ? UINT16_MAX : 0;
return (simde_float16_to_float32(a) >= SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -96,7 +96,7 @@ simde_vcgezh_f16(simde_float16_t a) {
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgezq_f16(simde_float16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcgezq_f16(a);
#else
simde_float16x8_private a_ = simde_float16x8_to_private(a);
Expand Down Expand Up @@ -286,7 +286,7 @@ simde_vcgezq_s64(simde_int64x2_t a) {
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcgez_f16(simde_float16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcgez_f16(a);
#else
simde_float16x4_private a_ = simde_float16x4_to_private(a);
Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/cgtz.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ simde_vcgtzd_f64(simde_float64_t a) {
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vcgtzh_f16(simde_float16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return HEDLEY_STATIC_CAST(uint16_t, vcgtzh_f16(a));
#else
return (a > SIMDE_FLOAT16_C(0.0)) ? UINT16_MAX : 0;
return (simde_float16_to_float32(a) > SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -84,7 +84,7 @@ simde_vcgtzh_f16(simde_float16_t a) {
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcgtzq_f16(simde_float16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcgtzq_f16(a);
#else
simde_float16x8_private a_ = simde_float16x8_to_private(a);
Expand Down Expand Up @@ -288,7 +288,7 @@ simde_vcgtzq_s64(simde_int64x2_t a) {
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcgtz_f16(simde_float16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcgtz_f16(a);
#else
simde_float16x4_private a_ = simde_float16x4_to_private(a);
Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/cle.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@ simde_vcles_f32(simde_float32_t a, simde_float32_t b) {
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vcleh_f16(simde_float16_t a, simde_float16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return HEDLEY_STATIC_CAST(uint16_t, vcleh_f16(a, b));
#else
return (a <= b) ? UINT16_MAX : 0;
return (simde_float16_to_float32(a) <= simde_float16_to_float32(b)) ? UINT16_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -108,7 +108,7 @@ simde_vcleh_f16(simde_float16_t a, simde_float16_t b) {
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcleq_f16(simde_float16x8_t a, simde_float16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcleq_f16(a, b);
#else
simde_float16x8_private
Expand Down Expand Up @@ -517,7 +517,7 @@ simde_vcleq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcle_f16(simde_float16x4_t a, simde_float16x4_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcle_f16(a, b);
#else
simde_float16x4_private
Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/cltz.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ simde_vcltzs_f32(simde_float32_t a) {
SIMDE_FUNCTION_ATTRIBUTES
uint16_t
simde_vcltzh_f16(simde_float16_t a) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return HEDLEY_STATIC_CAST(uint16_t, vcltzh_f16(a));
#else
return (a < SIMDE_FLOAT16_C(0.0)) ? UINT16_MAX : 0;
return (simde_float16_to_float32(a) < SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0;
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -99,7 +99,7 @@ simde_vcltzh_f16(simde_float16_t a) {
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x4_t
simde_vcltz_f16(simde_float16x4_t a) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcltz_f16(a);
#else
simde_float16x4_private a_ = simde_float16x4_to_private(a);
Expand Down Expand Up @@ -241,7 +241,7 @@ simde_vcltz_s64(simde_int64x1_t a) {
SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vcltzq_f16(simde_float16x8_t a) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vcltzq_f16(a);
#else
simde_float16x8_private a_ = simde_float16x8_to_private(a);
Expand Down
Loading

0 comments on commit cadfb94

Please sign in to comment.