Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(node/buffer): utf8ToBytes should return a Uint8Array #20769

Merged
merged 17 commits into from
Oct 8, 2023
82 changes: 8 additions & 74 deletions ext/node/polyfills/internal/buffer.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
bytesToUtf16le,
hexToBytes,
utf16leToBytes,
utf8ToBytes,
} from "ext:deno_node/internal_binding/_utils.ts";
import {
isAnyArrayBuffer,
Expand Down Expand Up @@ -1687,80 +1688,13 @@ function checkIntBI(value, min, max, buf, offset, byteLength2) {
checkBounds(buf, offset, byteLength2);
}

function utf8ToBytes(string, units) {
units = units || Infinity;
let codePoint;
const length = string.length;
let leadSurrogate = null;
const bytes = [];
for (let i = 0; i < length; ++i) {
codePoint = string.charCodeAt(i);
if (codePoint > 55295 && codePoint < 57344) {
if (!leadSurrogate) {
if (codePoint > 56319) {
if ((units -= 3) > -1) {
bytes.push(239, 191, 189);
}
continue;
} else if (i + 1 === length) {
if ((units -= 3) > -1) {
bytes.push(239, 191, 189);
}
continue;
}
leadSurrogate = codePoint;
continue;
}
if (codePoint < 56320) {
if ((units -= 3) > -1) {
bytes.push(239, 191, 189);
}
leadSurrogate = codePoint;
continue;
}
codePoint = (leadSurrogate - 55296 << 10 | codePoint - 56320) + 65536;
} else if (leadSurrogate) {
if ((units -= 3) > -1) {
bytes.push(239, 191, 189);
}
}
leadSurrogate = null;
if (codePoint < 128) {
if ((units -= 1) < 0) {
break;
}
bytes.push(codePoint);
} else if (codePoint < 2048) {
if ((units -= 2) < 0) {
break;
}
bytes.push(codePoint >> 6 | 192, codePoint & 63 | 128);
} else if (codePoint < 65536) {
if ((units -= 3) < 0) {
break;
}
bytes.push(
codePoint >> 12 | 224,
codePoint >> 6 & 63 | 128,
codePoint & 63 | 128,
);
} else if (codePoint < 1114112) {
if ((units -= 4) < 0) {
break;
}
bytes.push(
codePoint >> 18 | 240,
codePoint >> 12 & 63 | 128,
codePoint >> 6 & 63 | 128,
codePoint & 63 | 128,
);
} else {
throw new Error("Invalid code point");
}
}
return bytes;
}

/**
* @param {Uint8Array} src Source buffer to read from
* @param {Buffer} dst Destination buffer to write to
* @param {number} offset Byte offset to write at in the destination buffer
* @param {number} [byteLength] Optional number of bytes to, at most, write into destination buffer.
* @returns {number} Number of bytes written to destination buffer
*/
function blitBuffer(src, dst, offset, byteLength = Infinity) {
const srcLength = src.length;
// Establish the number of bytes to be written
Expand Down
96 changes: 96 additions & 0 deletions ext/node/polyfills/internal_binding/_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,102 @@ export function utf16leToBytes(str: string, units?: number) {
return i * 2 === length ? byteArray : byteArray.subarray(0, i * 2);
}

export function utf8ToBytes(str: string, units?: number) {
aapoalas marked this conversation as resolved.
Show resolved Hide resolved
units = units || Infinity;
// The byte array length is determined by a conservative 'length quadrupled' calculation.
// This overallocates massively but will never fail.
const strLength = str.length;
const byteArrayLength = Math.min(strLength * 4, units * 4);
const byteArray = new Uint8Array(byteArrayLength);
let codePoint: number;
let leadSurrogate: null | number = null;
/**
* Next index to assign into.
*/
let byteIndex = 0;
for (let i = 0; i < strLength; ++i) {
codePoint = str.charCodeAt(i);
if (codePoint > 55295 && codePoint < 57344) {
if (!leadSurrogate) {
if (codePoint > 56319) {
if ((units -= 3) > -1) {
byteArray[byteIndex++] = 239;
byteArray[byteIndex++] = 191;
byteArray[byteIndex++] = 189;
}
continue;
} else if (i + 1 === strLength) {
if ((units -= 3) > -1) {
byteArray[byteIndex++] = 239;
byteArray[byteIndex++] = 191;
byteArray[byteIndex++] = 189;
}
continue;
}
leadSurrogate = codePoint;
continue;
}
if (codePoint < 56320) {
if ((units -= 3) > -1) {
byteArray[byteIndex++] = 239;
byteArray[byteIndex++] = 191;
byteArray[byteIndex++] = 189;
}
leadSurrogate = codePoint;
continue;
}
codePoint = (leadSurrogate - 55296 << 10 | codePoint - 56320) + 65536;
} else if (leadSurrogate) {
if ((units -= 3) > -1) {
byteArray[byteIndex++] = 239;
byteArray[byteIndex++] = 191;
byteArray[byteIndex++] = 189;
}
}
leadSurrogate = null;
if (codePoint < 128) {
if ((units -= 1) < 0) {
break;
}
byteArray[byteIndex++] = codePoint;
} else if (codePoint < 2048) {
if ((units -= 2) < 0) {
break;
}
byteArray[byteIndex++] = codePoint >> 6 | 192;
byteArray[byteIndex++] = codePoint & 63 | 128;
} else if (codePoint < 65536) {
if ((units -= 3) < 0) {
break;
}
byteArray[byteIndex++] = codePoint >> 12 | 224;
byteArray[byteIndex++] = codePoint >> 6 & 63 | 128;
byteArray[byteIndex++] = codePoint & 63 | 128;
} else if (codePoint < 1114112) {
if ((units -= 4) < 0) {
break;
}
byteArray[byteIndex++] = codePoint >> 18 | 240;
byteArray[byteIndex++] = codePoint >> 12 & 63 | 128;
byteArray[byteIndex++] = codePoint >> 6 & 63 | 128;
byteArray[byteIndex++] = codePoint & 63 | 128;
} else {
console.trace();
throw new Error(
"Invalid code point: " + str[i] + " " + i + ": '" + str + "'",
);
}
}
// If the next byte index (to assign into) is equal to length, ie.
// one past the last byte of the byte array, then we've filled the buffer.
// Otherwise: Returning a buffer subarray is okay: This API's return value
// is never exposed to users and is only ever used for its length
// and the data within the subarray.
return byteIndex === byteArrayLength
? byteArray
: byteArray.subarray(0, byteIndex);
}

export function bytesToAscii(bytes: Uint8Array) {
let res = "";
const length = bytes.byteLength;
Expand Down