Skip to content

Commit

Permalink
buffer: improve base64 and base64url performance
Browse files Browse the repository at this point in the history
Co-authored-by: Daniel Lemire <[email protected]>
  • Loading branch information
anonrig and lemire committed Apr 8, 2024
1 parent db17461 commit b9b5d45
Showing 1 changed file with 88 additions and 14 deletions.
102 changes: 88 additions & 14 deletions src/string_bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -346,14 +346,76 @@ size_t StringBytes::Write(Isolate* isolate,
}

case BASE64URL:
// Fall through
if (str->IsExternalOneByte()) { // 8-bit case
auto ext = str->GetExternalOneByteStringResource();
// Try with WHATWG base64 standard first, adapted for base64url
simdutf::result r = simdutf::base64_to_binary_safe(
ext->data(), ext->length(), buf, buflen, simdutf::base64_url);
if (r.error == simdutf::error_code::SUCCESS) {
nbytes = buflen;
} else {
// The input does not follow the WHATWG forgiving-base64 specification
// adapted for base64url
// https://infra.spec.whatwg.org/#forgiving-base64-decode
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
}
} else { // 16-bit case
// Typically, a base64url string is stored as an 8-bit string within v8.
// Thus str->IsOneByte() is typically true. The next line thus often
// allocates a temporary 16-bit buffer to store a 16-bit copy of the
// 8-bit v8 string. Hence the creation of the String::Value value is
// likely a performance bottleneck.
String::Value value(isolate, str);
// Try with WHATWG base64 standard first
simdutf::result r = simdutf::base64_to_binary_safe(
reinterpret_cast<const char16_t*>(*value),
value.length(),
buf,
buflen,
simdutf::base64_url);
if (r.error == simdutf::error_code::SUCCESS) {
nbytes = buflen;
} else {
// The input does not follow the WHATWG forgiving-base64 specification
// (adapted for base64url with + and / replaced by - and _).
// https://infra.spec.whatwg.org/#forgiving-base64-decode
nbytes = base64_decode(buf, buflen, *value, value.length());
}
}
case BASE64:
if (str->IsExternalOneByte()) {
if (str->IsExternalOneByte()) { // 8-bit case
auto ext = str->GetExternalOneByteStringResource();
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
} else {
// Try with WHATWG base64 standard first
auto result = simdutf::base64_to_binary_safe(
ext->data(), ext->length(), buf, buflen, simdutf::base64_default);
if (result.error == simdutf::error_code::SUCCESS) {
nbytes = buflen;
} else {
// The input does not follow the WHATWG forgiving-base64 specification
// https://infra.spec.whatwg.org/#forgiving-base64-decode
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
}
} else { // 16-bit case
// Typically, a base64 string is stored as an 8-bit string within v8.
// Thus str->IsOneByte() is typically true. The next line thus often
// allocates a temporary 16-bit buffer to store a 16-bit copy of the
// 8-bit v8 string. Hence, the creation of the String::Value value is
// likely a performance bottleneck.
String::Value value(isolate, str);
nbytes = base64_decode(buf, buflen, *value, value.length());
// Try with WHATWG base64 standard first
auto result = simdutf::base64_to_binary_safe(
reinterpret_cast<const char16_t*>(*value),
value.length(),
buf,
buflen,
simdutf::base64_default);
if (result.error == simdutf::error_code::SUCCESS) {
nbytes = buflen;
} else {
// The input does not follow the WHATWG base64 specification
// https://infra.spec.whatwg.org/#forgiving-base64-decode
nbytes = base64_decode(buf, buflen, *value, value.length());
}
}
break;

Expand Down Expand Up @@ -411,9 +473,12 @@ Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
break;

case BASE64URL:
// Fall through
data_size = simdutf::base64_length_from_binary(str->Length(),
simdutf::base64_url);
break;

case BASE64:
data_size = base64_decoded_size_fast(str->Length());
data_size = simdutf::base64_length_from_binary(str->Length());
break;

case HEX:
Expand Down Expand Up @@ -452,11 +517,16 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
case UCS2:
return Just(str->Length() * sizeof(uint16_t));

case BASE64URL:
// Fall through
case BASE64URL: {
String::Value value(isolate, str);
return Just(simdutf::base64_length_from_binary(value.length(),
simdutf::base64_url));
}

case BASE64: {
String::Value value(isolate, str);
return Just(base64_decoded_size(*value, value.length()));
return Just(simdutf::base64_length_from_binary(value.length(),
simdutf::base64_default));
}

case HEX:
Expand Down Expand Up @@ -609,28 +679,32 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error);

case BASE64: {
size_t dlen = base64_encoded_size(buflen);
size_t dlen =
simdutf::base64_length_from_binary(buflen, simdutf::base64_default);
char* dst = node::UncheckedMalloc(dlen);
if (dst == nullptr) {
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
return MaybeLocal<Value>();
}

size_t written = base64_encode(buf, buflen, dst, dlen);
size_t written =
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_default);
CHECK_EQ(written, dlen);

return ExternOneByteString::New(isolate, dst, dlen, error);
}

case BASE64URL: {
size_t dlen = base64_encoded_size(buflen, Base64Mode::URL);
size_t dlen =
simdutf::base64_length_from_binary(buflen, simdutf::base64_url);
char* dst = node::UncheckedMalloc(dlen);
if (dst == nullptr) {
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
return MaybeLocal<Value>();
}

size_t written = base64_encode(buf, buflen, dst, dlen, Base64Mode::URL);
size_t written =
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
CHECK_EQ(written, dlen);

return ExternOneByteString::New(isolate, dst, dlen, error);
Expand Down

0 comments on commit b9b5d45

Please sign in to comment.