Skip to content

Commit

Permalink
x86 sse4.1 mm_testz_si128: fix backwards short circuit logic
Browse files Browse the repository at this point in the history
Co-authored-by: Florent Hivert <[email protected]>
  • Loading branch information
mr-c and hivert committed Oct 27, 2023
1 parent d5d6d10 commit f132275
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
11 changes: 8 additions & 3 deletions simde/x86/sse4.1.h
Original file line number Diff line number Diff line change
Expand Up @@ -2340,14 +2340,19 @@ simde_mm_testz_si128 (simde__m128i a, simde__m128i b) {
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0;
#elif defined(SIMDE_HAVE_INT128_)
if ((a_.u128[0] & b_.u128[0]) == 0) {
return 1;
}
return 0;
#else
for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) {
if ((a_.u64[i] & b_.u64[i]) == 0)
return 1;
if ((a_.u64[i] & b_.u64[i]) > 0)
return 0;
}
#endif

return 0;
return 1;
#endif
}
#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES)
Expand Down
7 changes: 5 additions & 2 deletions test/x86/sse4.1.c
Original file line number Diff line number Diff line change
Expand Up @@ -3552,7 +3552,7 @@ test_simde_mm_testz_si128(SIMDE_MUNIT_TEST_ARGS) {
simde__m128i a;
simde__m128i b;
int r;
} test_vec[8] = {
} test_vec[] = {
{ simde_x_mm_set_epu32(UINT32_C(3990889899), UINT32_C(4230789057), UINT32_C(3808461042), UINT32_C(3371427410)),
simde_x_mm_set_epu32(UINT32_C( 331792), UINT32_C( 63127558), UINT32_C( 483990789), UINT32_C( 530828)),
1 },
Expand All @@ -3576,7 +3576,10 @@ test_simde_mm_testz_si128(SIMDE_MUNIT_TEST_ARGS) {
1 },
{ simde_x_mm_set_epu32(UINT32_C(3782150825), UINT32_C(3325635017), UINT32_C(1617333560), UINT32_C(3634437083)),
simde_x_mm_set_epu32(UINT32_C( 167825730), UINT32_C( 294047748), UINT32_C( 126906945), UINT32_C( 17884164)),
1 }
1 },
{ simde_x_mm_set_epu8(UINT8_C(0), UINT8_C(1), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0)),
simde_x_mm_set_epu8(UINT8_C(0), UINT8_C(1), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0), UINT8_C(0)),
0 }
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
Expand Down

0 comments on commit f132275

Please sign in to comment.