Skip to content

Commit

Permalink
Fix: Missing size_t SVE overload
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Oct 12, 2024
1 parent 3898481 commit 1baa3a9
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions include/stringzilla/stringzilla.h
Original file line number Diff line number Diff line change
Expand Up @@ -6137,7 +6137,7 @@ SZ_PUBLIC void sz_fill_sve(sz_ptr_t target, sz_size_t length, sz_u8_t value) {

if (length <= vec_len) {
// Small buffer case: use mask to handle small writes
svbool_t mask = svwhilelt_b8((sz_size_t)0ull, length);
svbool_t mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)length);
svst1_u8(mask, (unsigned char *)target, value_vec);
}
else {
Expand All @@ -6147,7 +6147,7 @@ SZ_PUBLIC void sz_fill_sve(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
sz_size_t body_length = length - head_length - tail_length;

// Handle unaligned head
svbool_t head_mask = svwhilelt_b8((sz_size_t)0ull, head_length);
svbool_t head_mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)head_length);
svst1_u8(head_mask, (unsigned char *)target, value_vec);
target += head_length;

Expand All @@ -6157,7 +6157,7 @@ SZ_PUBLIC void sz_fill_sve(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
}

// Handle unaligned tail
svbool_t tail_mask = svwhilelt_b8((sz_size_t)0ull, tail_length);
svbool_t tail_mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)tail_length);
svst1_u8(tail_mask, (unsigned char *)target, value_vec);
}
}
Expand All @@ -6174,7 +6174,7 @@ SZ_PUBLIC void sz_copy_sve(sz_ptr_t target, sz_cptr_t source, sz_size_t length)
// When the buffer is small, there isn't much to innovate.
if (length <= vec_len) {
// Small buffer case: use mask to handle small writes
svbool_t mask = svwhilelt_b8((sz_size_t)0ull, length);
svbool_t mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)length);
svuint8_t data = svld1_u8(mask, (unsigned char *)source);
svst1_u8(mask, (unsigned char *)target, data);
}
Expand All @@ -6199,10 +6199,10 @@ SZ_PUBLIC void sz_copy_sve(sz_ptr_t target, sz_cptr_t source, sz_size_t length)
sz_size_t body_length = length - head_length - tail_length;

// Handle unaligned parts
svbool_t head_mask = svwhilelt_b8((sz_size_t)0ull, head_length);
svbool_t head_mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)head_length);
svuint8_t head_data = svld1_u8(head_mask, (unsigned char *)source);
svst1_u8(head_mask, (unsigned char *)target, head_data);
svbool_t tail_mask = svwhilelt_b8((sz_size_t)0ull, tail_length);
svbool_t tail_mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)tail_length);
svuint8_t tail_data = svld1_u8(tail_mask, (unsigned char *)source + head_length + body_length);
svst1_u8(tail_mask, (unsigned char *)target + head_length + body_length, tail_data);
target += head_length;
Expand All @@ -6218,15 +6218,15 @@ SZ_PUBLIC void sz_copy_sve(sz_ptr_t target, sz_cptr_t source, sz_size_t length)
// Up to (vec_len * 2 - 1) bytes of data may be left in the body,
// so we can unroll the last two optional loop iterations.
if (body_length > vec_len) {
svbool_t mask = svwhilelt_b8((sz_size_t)0ull, body_length);
svbool_t mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)body_length);
svuint8_t data = svld1_u8(mask, (unsigned char *)source);
svst1_u8(mask, (unsigned char *)target, data);
body_length -= vec_len;
source += body_length;
target += body_length;
}
if (body_length) {
svbool_t mask = svwhilelt_b8((sz_size_t)0ull, body_length);
svbool_t mask = svwhilelt_b8((sz_u32_t)0ull, (sz_u32_t)body_length);
svuint8_t data = svld1_u8(mask, (unsigned char *)source);
svst1_u8(mask, (unsigned char *)target, data);
}
Expand Down

0 comments on commit 1baa3a9

Please sign in to comment.