From a80dce67f5e00c3825bb788b87fc558671346110 Mon Sep 17 00:00:00 2001 From: weipeng Date: Tue, 9 Jan 2024 10:33:04 +0800 Subject: [PATCH] feat(JavaScript): enhance performance 64bits number (#1320) 1. Creating BigInts is too performance-intensive; we'll use uint32 instead. 2. Reduce memory reads as much as possiable; Replace calle uint8 four times by uint32 once. --- .../fury/lib/{reader.ts => reader/index.ts} | 216 ++++++++++-------- .../packages/fury/lib/{ => reader}/string.ts | 71 ++++-- javascript/packages/fury/lib/writer.ts | 29 ++- javascript/packages/fury/package.json | 2 +- javascript/test/util.js | 6 + 5 files changed, 209 insertions(+), 115 deletions(-) rename javascript/packages/fury/lib/{reader.ts => reader/index.ts} (50%) rename javascript/packages/fury/lib/{ => reader}/string.ts (66%) diff --git a/javascript/packages/fury/lib/reader.ts b/javascript/packages/fury/lib/reader/index.ts similarity index 50% rename from javascript/packages/fury/lib/reader.ts rename to javascript/packages/fury/lib/reader/index.ts index 46b4e5fe96..e32b02f7e8 100644 --- a/javascript/packages/fury/lib/reader.ts +++ b/javascript/packages/fury/lib/reader/index.ts @@ -17,10 +17,10 @@ * under the License. */ -import { Config, LATIN1 } from "./type"; -import { isNodeEnv } from "./util"; -import { PlatformBuffer, alloc, fromUint8Array } from "./platformBuffer"; -import { read1, read10, read11, read12, read13, read14, read15, read2, read3, read4, read5, read6, read7, read8, read9 } from "./string"; +import { Config, LATIN1 } from "../type"; +import { isNodeEnv } from "../util"; +import { PlatformBuffer, alloc, fromUint8Array } from "../platformBuffer"; +import { readLatin1String } from "./string"; export const BinaryReader = (config: Config) => { const sliceStringEnable = isNodeEnv && config.useSliceString; @@ -135,42 +135,7 @@ export const BinaryReader = (config: Config) => { function stringLatin1Slow(len: number) { const rawCursor = cursor; cursor += len; - switch (len) { - case 0: - return ""; - case 1: - return read1(buffer, rawCursor); - case 2: - return read2(buffer, rawCursor); - case 3: - return read3(buffer, rawCursor); - case 4: - return read4(buffer, rawCursor); - case 5: - return read5(buffer, rawCursor); - case 6: - return read6(buffer, rawCursor); - case 7: - return read7(buffer, rawCursor); - case 8: - return read8(buffer, rawCursor); - case 9: - return read9(buffer, rawCursor); - case 10: - return read10(buffer, rawCursor); - case 11: - return read11(buffer, rawCursor); - case 12: - return read12(buffer, rawCursor); - case 13: - return read13(buffer, rawCursor); - case 14: - return read14(buffer, rawCursor); - case 15: - return read15(buffer, rawCursor); - default: - return buffer.latin1Slice(rawCursor, cursor); - } + return readLatin1String(buffer, len, rawCursor); } function binary(len: number) { @@ -186,29 +151,45 @@ export const BinaryReader = (config: Config) => { return result; } - function zigZag(v: number) { - return (v >> 1) ^ -(v & 1); - } - - function zigZagBigInt(v: bigint) { - return (v >> 1n) ^ -(v & 1n); - } - function varUInt32() { - let byte_ = uint8(); - let result = byte_ & 0x7f; - if ((byte_ & 0x80) != 0) { - byte_ = uint8(); - result |= (byte_ & 0x7f) << 7; - if ((byte_ & 0x80) != 0) { - byte_ = uint8(); - result |= (byte_ & 0x7f) << 14; - if ((byte_ & 0x80) != 0) { - byte_ = uint8(); - result |= (byte_ & 0x7f) << 21; - if ((byte_ & 0x80) != 0) { - byte_ = uint8(); - result |= (byte_) << 28; + // Reduce memory reads as much as possible. Reading a uint32 at once is far faster than reading four uint8s separately. + if (buffer.byteLength - cursor >= 5) { + const u32 = dataView.getUint32(cursor++, true); + let result = u32 & 0x7f; + if ((u32 & 0x80) != 0) { + cursor++; + const b2 = u32 >> 8; + result |= (b2 & 0x7f) << 7; + if ((b2 & 0x80) != 0) { + cursor++; + const b3 = u32 >> 16; + result |= (b3 & 0x7f) << 14; + if ((b3 & 0x80) != 0) { + cursor++; + const b4 = u32 >> 24; + result |= (b4 & 0x7f) << 21; + if ((b4 & 0x80) != 0) { + result |= (uint8()) << 28; + } + } + } + } + return result; + } + let byte = uint8(); + let result = byte & 0x7f; + if ((byte & 0x80) != 0) { + byte = uint8(); + result |= (byte & 0x7f) << 7; + if ((byte & 0x80) != 0) { + byte = uint8(); + result |= (byte & 0x7f) << 14; + if ((byte & 0x80) != 0) { + byte = uint8(); + result |= (byte & 0x7f) << 21; + if ((byte & 0x80) != 0) { + byte = uint8(); + result |= (byte) << 28; } } } @@ -216,37 +197,88 @@ export const BinaryReader = (config: Config) => { return result; } + function varInt32() { + const v = varUInt32(); + return (v >> 1) ^ -(v & 1); // zigZag decode + } + function bigUInt8() { return BigInt(uint8() >>> 0); } function varUInt64() { - let byte_ = bigUInt8(); - let result = byte_ & 0x7fn; - if ((byte_ & 0x80n) != 0n) { - byte_ = bigUInt8(); - result |= (byte_ & 0x7fn) << 7n; - if ((byte_ & 0x80n) != 0n) { - byte_ = bigUInt8(); - result |= (byte_ & 0x7fn) << 14n; - if ((byte_ & 0x80n) != 0n) { - byte_ = bigUInt8(); - result |= (byte_ & 0x7fn) << 21n; - if ((byte_ & 0x80n) != 0n) { - byte_ = bigUInt8(); - result |= (byte_ & 0x7fn) << 28n; - if ((byte_ & 0x80n) != 0n) { - byte_ = bigUInt8(); - result |= (byte_ & 0x7fn) << 35n; - if ((byte_ & 0x80n) != 0n) { - byte_ = bigUInt8(); - result |= (byte_ & 0x7fn) << 42n; - if ((byte_ & 0x80n) != 0n) { - byte_ = bigUInt8(); - result |= (byte_ & 0x7fn) << 49n; - if ((byte_ & 0x80n) != 0n) { - byte_ = bigUInt8(); - result |= (byte_) << 56n; + // Creating BigInts is too performance-intensive; we'll use uint32 instead. + if (buffer.byteLength - cursor < 8) { + let byte = bigUInt8(); + let result = byte & 0x7fn; + if ((byte & 0x80n) != 0n) { + byte = bigUInt8(); + result |= (byte & 0x7fn) << 7n; + if ((byte & 0x80n) != 0n) { + byte = bigUInt8(); + result |= (byte & 0x7fn) << 14n; + if ((byte & 0x80n) != 0n) { + byte = bigUInt8(); + result |= (byte & 0x7fn) << 21n; + if ((byte & 0x80n) != 0n) { + byte = bigUInt8(); + result |= (byte & 0x7fn) << 28n; + if ((byte & 0x80n) != 0n) { + byte = bigUInt8(); + result |= (byte & 0x7fn) << 35n; + if ((byte & 0x80n) != 0n) { + byte = bigUInt8(); + result |= (byte & 0x7fn) << 42n; + if ((byte & 0x80n) != 0n) { + byte = bigUInt8(); + result |= (byte & 0x7fn) << 49n; + if ((byte & 0x80n) != 0n) { + byte = bigUInt8(); + result |= (byte) << 56n; + } + } + } + } + } + } + } + } + return result; + } + const l32 = dataView.getUint32(cursor++, true); + let byte = l32 & 0xff; + let rl28 = byte & 0x7f; + let rh28 = 0; + if ((byte & 0x80) != 0) { + byte = l32 & 0xff00 >> 8; + cursor++; + rl28 |= (byte & 0x7f) << 7; + if ((byte & 0x80) != 0) { + byte = l32 & 0xff0000 >> 16; + cursor++; + rl28 |= (byte & 0x7f) << 14; + if ((byte & 0x80) != 0) { + byte = l32 & 0xff000000 >> 24; + cursor++; + rl28 |= (byte & 0x7f) << 21; + if ((byte & 0x80) != 0) { + const h32 = dataView.getUint32(cursor++, true); + byte = h32 & 0xff; + rh28 |= (byte & 0x7f); + if ((byte & 0x80) != 0) { + byte = h32 & 0xff00 >> 8; + cursor++; + rh28 |= (byte & 0x7f) << 7; + if ((byte & 0x80) != 0) { + byte = h32 & 0xff0000 >> 16; + cursor++; + rh28 |= (byte & 0x7f) << 14; + if ((byte & 0x80) != 0) { + byte = h32 & 0xff000000 >> 24; + cursor++; + rh28 |= (byte & 0x7f) << 21; + if ((byte & 0x80) != 0) { + return (BigInt(uint8()) << 56n) | BigInt(rh28) << 28n | BigInt(rl28); } } } @@ -255,15 +287,13 @@ export const BinaryReader = (config: Config) => { } } } - return result; - } - function varInt32() { - return zigZag(varUInt32()); + return BigInt(rh28) << 28n | BigInt(rl28); } function varInt64() { - return zigZagBigInt(varUInt64()); + const v = varUInt64(); + return (v >> 1n) ^ -(v & 1n); // zigZag decode } return { diff --git a/javascript/packages/fury/lib/string.ts b/javascript/packages/fury/lib/reader/string.ts similarity index 66% rename from javascript/packages/fury/lib/string.ts rename to javascript/packages/fury/lib/reader/string.ts index 3905254e77..8b9c78c6fa 100644 --- a/javascript/packages/fury/lib/string.ts +++ b/javascript/packages/fury/lib/reader/string.ts @@ -17,48 +17,89 @@ * under the License. */ -export const read1 = (buffer: Uint8Array, cursor: number) => { +import { PlatformBuffer } from "../platformBuffer"; + +const read1 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor]); }; -export const read2 = (buffer: Uint8Array, cursor: number) => { +const read2 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1]); }; -export const read3 = (buffer: Uint8Array, cursor: number) => { +const read3 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2]); }; -export const read4 = (buffer: Uint8Array, cursor: number) => { +const read4 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3]); }; -export const read5 = (buffer: Uint8Array, cursor: number) => { +const read5 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4]); }; -export const read6 = (buffer: Uint8Array, cursor: number) => { +const read6 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5]); }; -export const read7 = (buffer: Uint8Array, cursor: number) => { +const read7 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6]); }; -export const read8 = (buffer: Uint8Array, cursor: number) => { +const read8 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6], buffer[cursor + 7]); }; -export const read9 = (buffer: Uint8Array, cursor: number) => { +const read9 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6], buffer[cursor + 7], buffer[cursor + 8]); }; -export const read10 = (buffer: Uint8Array, cursor: number) => { +const read10 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6], buffer[cursor + 7], buffer[cursor + 8], buffer[cursor + 9]); }; -export const read11 = (buffer: Uint8Array, cursor: number) => { +const read11 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6], buffer[cursor + 7], buffer[cursor + 8], buffer[cursor + 9], buffer[cursor + 10]); }; -export const read12 = (buffer: Uint8Array, cursor: number) => { +const read12 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6], buffer[cursor + 7], buffer[cursor + 8], buffer[cursor + 9], buffer[cursor + 10], buffer[cursor + 11]); }; -export const read13 = (buffer: Uint8Array, cursor: number) => { +const read13 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6], buffer[cursor + 7], buffer[cursor + 8], buffer[cursor + 9], buffer[cursor + 10], buffer[cursor + 11], buffer[cursor + 12]); }; -export const read14 = (buffer: Uint8Array, cursor: number) => { +const read14 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6], buffer[cursor + 7], buffer[cursor + 8], buffer[cursor + 9], buffer[cursor + 10], buffer[cursor + 11], buffer[cursor + 12], buffer[cursor + 13]); }; -export const read15 = (buffer: Uint8Array, cursor: number) => { +const read15 = (buffer: Uint8Array, cursor: number) => { return String.fromCharCode(buffer[cursor], buffer[cursor + 1], buffer[cursor + 2], buffer[cursor + 3], buffer[cursor + 4], buffer[cursor + 5], buffer[cursor + 6], buffer[cursor + 7], buffer[cursor + 8], buffer[cursor + 9], buffer[cursor + 10], buffer[cursor + 11], buffer[cursor + 12], buffer[cursor + 13], buffer[cursor + 14]); }; + +export const readLatin1String = (buffer: PlatformBuffer, len: number, cursor: number) => { + switch (len) { + case 0: + return ""; + case 1: + return read1(buffer, cursor); + case 2: + return read2(buffer, cursor); + case 3: + return read3(buffer, cursor); + case 4: + return read4(buffer, cursor); + case 5: + return read5(buffer, cursor); + case 6: + return read6(buffer, cursor); + case 7: + return read7(buffer, cursor); + case 8: + return read8(buffer, cursor); + case 9: + return read9(buffer, cursor); + case 10: + return read10(buffer, cursor); + case 11: + return read11(buffer, cursor); + case 12: + return read12(buffer, cursor); + case 13: + return read13(buffer, cursor); + case 14: + return read14(buffer, cursor); + case 15: + return read15(buffer, cursor); + default: + return buffer.latin1Slice(cursor, cursor + len); + } +}; diff --git a/javascript/packages/fury/lib/writer.ts b/javascript/packages/fury/lib/writer.ts index d32017003e..31cfddf0b0 100644 --- a/javascript/packages/fury/lib/writer.ts +++ b/javascript/packages/fury/lib/writer.ts @@ -229,13 +229,30 @@ export const BinaryWriter = (config: Config) => { return varUInt32((v << 1) ^ (v >> 31)); } - function varUInt32(val: number) { - val = (val >>> 0) & 0xFFFFFFFF; // keep only the lower 32 bits - while (val > 127) { - arrayBuffer[cursor++] = val & 127 | 128; - val >>>= 7; + function varUInt32(value: number) { + value = (value >>> 0) & 0xFFFFFFFF; // keep only the lower 32 bits + + if (value >> 7 == 0) { + arrayBuffer[cursor++] = value; + return; + } + const rawCursor = cursor; + let u32 = 0; + if (value >> 14 == 0) { + u32 = ((value & 0x7f | 0x80) << 24) | ((value >> 7) << 16); + cursor += 2; + } else if (value >> 21 == 0) { + u32 = ((value & 0x7f | 0x80) << 24) | ((value >> 7 & 0x7f | 0x80) << 16) | ((value >> 14) << 8); + cursor += 3; + } else if (value >> 28 == 0) { + u32 = ((value & 0x7f | 0x80) << 24) | ((value >> 7 & 0x7f | 0x80) << 16) | ((value >> 14 & 0x7f | 0x80) << 8) | (value >> 21); + cursor += 4; + } else { + u32 = ((value & 0x7f | 0x80) << 24) | ((value >> 7 & 0x7f | 0x80) << 16) | ((value >> 14 & 0x7f | 0x80) << 8) | (value >> 21 & 0x7f | 0x80); + arrayBuffer[rawCursor + 4] = value >> 28; + cursor += 5; } - arrayBuffer[cursor++] = val; + dataView.setUint32(rawCursor, u32); } function varInt64(v: bigint) { diff --git a/javascript/packages/fury/package.json b/javascript/packages/fury/package.json index 5a6c05b8c4..6cfa0fb4e0 100644 --- a/javascript/packages/fury/package.json +++ b/javascript/packages/fury/package.json @@ -1,6 +1,6 @@ { "name": "@furyjs/fury", - "version": "0.5.3-beta", + "version": "0.5.5-beta", "description": "A blazing fast multi-language serialization framework powered by jit and zero-copy", "main": "dist/index.js", "scripts": { diff --git a/javascript/test/util.js b/javascript/test/util.js index 719d5a528e..3afb94903a 100644 --- a/javascript/test/util.js +++ b/javascript/test/util.js @@ -74,6 +74,12 @@ const mockData2Description = (data, tag) => { } } if (typeof data === 'number') { + if (data > Number.MAX_SAFE_INTEGER || data < Number.MIN_SAFE_INTEGER) { + return { + type: InternalSerializerType.INT64, + label: "int64" + } + } return { type: InternalSerializerType.INT32, label: "int32"