Skip to content

Commit

Permalink
buffer: use simdutf convert_latin1_to_utf8_safe
Browse files Browse the repository at this point in the history
simdutf 5.5 includes convert_latin1_to_utf8_safe

PR-URL: #54798
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Daniel Lemire <[email protected]>
  • Loading branch information
ronag authored and targos committed Oct 4, 2024
1 parent 90a87ca commit d109f1c
Showing 1 changed file with 1 addition and 53 deletions.
54 changes: 1 addition & 53 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1440,58 +1440,6 @@ void CopyArrayBuffer(const FunctionCallbackInfo<Value>& args) {
memcpy(dest, src, bytes_to_copy);
}

size_t convert_latin1_to_utf8_s(const char* src,
size_t src_len,
char* dst,
size_t dst_len) noexcept {
size_t src_pos = 0;
size_t dst_pos = 0;

const auto safe_len = std::min(src_len, dst_len >> 1);
if (safe_len > 16) {
// convert_latin1_to_utf8 will never write more than input length * 2.
dst_pos += simdutf::convert_latin1_to_utf8(src, safe_len, dst);
src_pos += safe_len;
}

// Based on:
// https://github.com/simdutf/simdutf/blob/master/src/scalar/latin1_to_utf8/latin1_to_utf8.h
// with an upper limit on the number of bytes to write.

const auto src_ptr = reinterpret_cast<const uint8_t*>(src);
const auto dst_ptr = reinterpret_cast<uint8_t*>(dst);

size_t skip_pos = src_pos;
while (src_pos < src_len && dst_pos < dst_len) {
if (skip_pos <= src_pos && src_pos + 16 <= src_len &&
dst_pos + 16 <= dst_len) {
uint64_t v1;
memcpy(&v1, src_ptr + src_pos + 0, 8);
uint64_t v2;
memcpy(&v2, src_ptr + src_pos + 8, 8);
if (((v1 | v2) & UINT64_C(0x8080808080808080)) == 0) {
memcpy(dst_ptr + dst_pos, src_ptr + src_pos, 16);
dst_pos += 16;
src_pos += 16;
} else {
skip_pos = src_pos + 16;
}
} else {
const auto byte = src_ptr[src_pos++];
if ((byte & 0x80) == 0) {
dst_ptr[dst_pos++] = byte;
} else if (dst_pos + 2 <= dst_len) {
dst_ptr[dst_pos++] = (byte >> 6) | 0b11000000;
dst_ptr[dst_pos++] = (byte & 0b111111) | 0b10000000;
} else {
break;
}
}
}

return dst_pos;
}

template <encoding encoding>
uint32_t WriteOneByteString(const char* src,
uint32_t src_len,
Expand All @@ -1502,7 +1450,7 @@ uint32_t WriteOneByteString(const char* src,
}

if (encoding == UTF8) {
return convert_latin1_to_utf8_s(src, src_len, dst, dst_len);
return simdutf::convert_latin1_to_utf8_safe(src, src_len, dst, dst_len);
} else if (encoding == LATIN1 || encoding == ASCII) {
const auto size = std::min(src_len, dst_len);
memcpy(dst, src, size);
Expand Down

0 comments on commit d109f1c

Please sign in to comment.