From 1baa3a98a6c83024406c5f40da507cf18ecbe0ce Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 12 Oct 2024 20:56:13 +0000 Subject: [PATCH] Fix: Missing `size_t` SVE overload --- include/stringzilla/stringzilla.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/stringzilla/stringzilla.h b/include/stringzilla/stringzilla.h index 299ee53..0068c11 100644 --- a/include/stringzilla/stringzilla.h +++ b/include/stringzilla/stringzilla.h @@ -6137,7 +6137,7 @@ SZ_PUBLIC void sz_fill_sve(sz_ptr_t target, sz_size_t length, sz_u8_t value) { if (length <= vec_len) { // Small buffer case: use mask to handle small writes - svbool_t mask = svwhilelt_b8((sz_size_t)0ull, length); + svbool_t mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)length); svst1_u8(mask, (unsigned char *)target, value_vec); } else { @@ -6147,7 +6147,7 @@ SZ_PUBLIC void sz_fill_sve(sz_ptr_t target, sz_size_t length, sz_u8_t value) { sz_size_t body_length = length - head_length - tail_length; // Handle unaligned head - svbool_t head_mask = svwhilelt_b8((sz_size_t)0ull, head_length); + svbool_t head_mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)head_length); svst1_u8(head_mask, (unsigned char *)target, value_vec); target += head_length; @@ -6157,7 +6157,7 @@ SZ_PUBLIC void sz_fill_sve(sz_ptr_t target, sz_size_t length, sz_u8_t value) { } // Handle unaligned tail - svbool_t tail_mask = svwhilelt_b8((sz_size_t)0ull, tail_length); + svbool_t tail_mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)tail_length); svst1_u8(tail_mask, (unsigned char *)target, value_vec); } } @@ -6174,7 +6174,7 @@ SZ_PUBLIC void sz_copy_sve(sz_ptr_t target, sz_cptr_t source, sz_size_t length) // When the buffer is small, there isn't much to innovate. if (length <= vec_len) { // Small buffer case: use mask to handle small writes - svbool_t mask = svwhilelt_b8((sz_size_t)0ull, length); + svbool_t mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)length); svuint8_t data = svld1_u8(mask, (unsigned char *)source); svst1_u8(mask, (unsigned char *)target, data); } @@ -6199,10 +6199,10 @@ SZ_PUBLIC void sz_copy_sve(sz_ptr_t target, sz_cptr_t source, sz_size_t length) sz_size_t body_length = length - head_length - tail_length; // Handle unaligned parts - svbool_t head_mask = svwhilelt_b8((sz_size_t)0ull, head_length); + svbool_t head_mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)head_length); svuint8_t head_data = svld1_u8(head_mask, (unsigned char *)source); svst1_u8(head_mask, (unsigned char *)target, head_data); - svbool_t tail_mask = svwhilelt_b8((sz_size_t)0ull, tail_length); + svbool_t tail_mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)tail_length); svuint8_t tail_data = svld1_u8(tail_mask, (unsigned char *)source + head_length + body_length); svst1_u8(tail_mask, (unsigned char *)target + head_length + body_length, tail_data); target += head_length; @@ -6218,7 +6218,7 @@ SZ_PUBLIC void sz_copy_sve(sz_ptr_t target, sz_cptr_t source, sz_size_t length) // Up to (vec_len * 2 - 1) bytes of data may be left in the body, // so we can unroll the last two optional loop iterations. if (body_length > vec_len) { - svbool_t mask = svwhilelt_b8((sz_size_t)0ull, body_length); + svbool_t mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)body_length); svuint8_t data = svld1_u8(mask, (unsigned char *)source); svst1_u8(mask, (unsigned char *)target, data); body_length -= vec_len; @@ -6226,7 +6226,7 @@ SZ_PUBLIC void sz_copy_sve(sz_ptr_t target, sz_cptr_t source, sz_size_t length) target += body_length; } if (body_length) { - svbool_t mask = svwhilelt_b8((sz_size_t)0ull, body_length); + svbool_t mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)body_length); svuint8_t data = svld1_u8(mask, (unsigned char *)source); svst1_u8(mask, (unsigned char *)target, data); }