From 0d4387ebe2de74bcbcc37e7ab068717cf5326af5 Mon Sep 17 00:00:00 2001 From: Robert Nagy Date: Wed, 18 Sep 2024 20:39:01 +0200 Subject: [PATCH] buffer: use simdutf convert_latin1_to_utf8_safe simdutf 5.5 includes convert_latin1_to_utf8_safe PR-URL: https://github.com/nodejs/node/pull/54798 Reviewed-By: Yagiz Nizipli Reviewed-By: James M Snell Reviewed-By: Daniel Lemire --- src/node_buffer.cc | 54 +--------------------------------------------- 1 file changed, 1 insertion(+), 53 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 2302e8d94da325..ad6b794cf5dc09 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1440,58 +1440,6 @@ void CopyArrayBuffer(const FunctionCallbackInfo& args) { memcpy(dest, src, bytes_to_copy); } -size_t convert_latin1_to_utf8_s(const char* src, - size_t src_len, - char* dst, - size_t dst_len) noexcept { - size_t src_pos = 0; - size_t dst_pos = 0; - - const auto safe_len = std::min(src_len, dst_len >> 1); - if (safe_len > 16) { - // convert_latin1_to_utf8 will never write more than input length * 2. - dst_pos += simdutf::convert_latin1_to_utf8(src, safe_len, dst); - src_pos += safe_len; - } - - // Based on: - // https://github.com/simdutf/simdutf/blob/master/src/scalar/latin1_to_utf8/latin1_to_utf8.h - // with an upper limit on the number of bytes to write. - - const auto src_ptr = reinterpret_cast(src); - const auto dst_ptr = reinterpret_cast(dst); - - size_t skip_pos = src_pos; - while (src_pos < src_len && dst_pos < dst_len) { - if (skip_pos <= src_pos && src_pos + 16 <= src_len && - dst_pos + 16 <= dst_len) { - uint64_t v1; - memcpy(&v1, src_ptr + src_pos + 0, 8); - uint64_t v2; - memcpy(&v2, src_ptr + src_pos + 8, 8); - if (((v1 | v2) & UINT64_C(0x8080808080808080)) == 0) { - memcpy(dst_ptr + dst_pos, src_ptr + src_pos, 16); - dst_pos += 16; - src_pos += 16; - } else { - skip_pos = src_pos + 16; - } - } else { - const auto byte = src_ptr[src_pos++]; - if ((byte & 0x80) == 0) { - dst_ptr[dst_pos++] = byte; - } else if (dst_pos + 2 <= dst_len) { - dst_ptr[dst_pos++] = (byte >> 6) | 0b11000000; - dst_ptr[dst_pos++] = (byte & 0b111111) | 0b10000000; - } else { - break; - } - } - } - - return dst_pos; -} - template uint32_t WriteOneByteString(const char* src, uint32_t src_len, @@ -1502,7 +1450,7 @@ uint32_t WriteOneByteString(const char* src, } if (encoding == UTF8) { - return convert_latin1_to_utf8_s(src, src_len, dst, dst_len); + return simdutf::convert_latin1_to_utf8_safe(src, src_len, dst, dst_len); } else if (encoding == LATIN1 || encoding == ASCII) { const auto size = std::min(src_len, dst_len); memcpy(dst, src, size);