diff --git a/javascript/packages/fury/lib/internalSerializer/any.ts b/javascript/packages/fury/lib/internalSerializer/any.ts index 1d7487bab7..00fa993808 100644 --- a/javascript/packages/fury/lib/internalSerializer/any.ts +++ b/javascript/packages/fury/lib/internalSerializer/any.ts @@ -47,7 +47,7 @@ export default (fury: Fury) => { case RefFlags.RefValueFlag: return detectSerializer(cursor).read(); case RefFlags.RefFlag: - return referenceResolver.getReadObjectByRefId(binaryReader.varInt32()); + return referenceResolver.getReadObjectByRefId(binaryReader.varUInt32()); case RefFlags.NullFlag: return null; case RefFlags.NotNullValueFlag: diff --git a/javascript/packages/fury/lib/internalSerializer/array.ts b/javascript/packages/fury/lib/internalSerializer/array.ts index 62847d3563..dae37a3384 100644 --- a/javascript/packages/fury/lib/internalSerializer/array.ts +++ b/javascript/packages/fury/lib/internalSerializer/array.ts @@ -24,13 +24,13 @@ export const buildArray = (fury: Fury, item: Serializer, type: InternalSerialize const { binaryReader, binaryWriter, referenceResolver } = fury; const { pushReadObject } = referenceResolver; - const { varInt32: writeVarInt32, reserve: reserves } = binaryWriter; - const { varInt32: readVarInt32 } = binaryReader; + const { varUInt32: writeVarUInt32, reserve: reserves } = binaryWriter; + const { varUInt32: readVarUInt32 } = binaryReader; const { write, read } = item; const innerHeadSize = (item.config().reserve); return { ...referenceResolver.deref(() => { - const len = readVarInt32(); + const len = readVarUInt32(); const result = new Array(len); pushReadObject(result); for (let i = 0; i < result.length; i++) { @@ -39,7 +39,7 @@ export const buildArray = (fury: Fury, item: Serializer, type: InternalSerialize return result; }), write: referenceResolver.withNullableOrRefWriter(type, (v: any[]) => { - writeVarInt32(v.length); + writeVarUInt32(v.length); reserves(innerHeadSize * v.length); diff --git a/javascript/packages/fury/lib/internalSerializer/map.ts b/javascript/packages/fury/lib/internalSerializer/map.ts index a49e675ece..53a280a7ab 100644 --- a/javascript/packages/fury/lib/internalSerializer/map.ts +++ b/javascript/packages/fury/lib/internalSerializer/map.ts @@ -21,14 +21,14 @@ import { InternalSerializerType, Fury, Serializer } from "../type"; export default (fury: Fury, keySerializer: Serializer, valueSerializer: Serializer) => { const { binaryReader, binaryWriter, referenceResolver } = fury; - const { varInt32: readVarInt32 } = binaryReader; + const { varUInt32: readVarUInt32 } = binaryReader; - const { varInt32: writeVarInt32, reserve: reserves } = binaryWriter; + const { varUInt32: writeVarUInt32, reserve: reserves } = binaryWriter; const { pushReadObject } = referenceResolver; const innerHeadSize = keySerializer.config().reserve + valueSerializer.config().reserve; return { ...referenceResolver.deref(() => { - const len = readVarInt32(); + const len = readVarUInt32(); const result = new Map(); pushReadObject(result); for (let index = 0; index < len; index++) { @@ -40,7 +40,7 @@ export default (fury: Fury, keySerializer: Serializer, valueSerializer: Serializ }), write: referenceResolver.withNullableOrRefWriter(InternalSerializerType.MAP, (v: Map) => { const len = v.size; - writeVarInt32(len); + writeVarUInt32(len); reserves(innerHeadSize * v.size); for (const [key, value] of v.entries()) { keySerializer.write(key); diff --git a/javascript/packages/fury/lib/internalSerializer/set.ts b/javascript/packages/fury/lib/internalSerializer/set.ts index 1c638b4e9c..adae269305 100644 --- a/javascript/packages/fury/lib/internalSerializer/set.ts +++ b/javascript/packages/fury/lib/internalSerializer/set.ts @@ -22,13 +22,13 @@ import { InternalSerializerType } from "../type"; export default (fury: Fury, nestedSerializer: Serializer) => { const { binaryReader, binaryWriter, referenceResolver } = fury; - const { varInt32: writeVarInt32, reserve: reserves } = binaryWriter; - const { varInt32: readVarInt32 } = binaryReader; + const { varUInt32: writeVarUInt32, reserve: reserves } = binaryWriter; + const { varUInt32: readVarUInt32 } = binaryReader; const { pushReadObject } = referenceResolver; const innerHeadSize = nestedSerializer.config().reserve; return { ...referenceResolver.deref(() => { - const len = readVarInt32(); + const len = readVarUInt32(); const result = new Set(); pushReadObject(result); for (let index = 0; index < len; index++) { @@ -38,7 +38,7 @@ export default (fury: Fury, nestedSerializer: Serializer) => { }), write: referenceResolver.withNullableOrRefWriter(InternalSerializerType.FURY_SET, (v: Set) => { const len = v.size; - writeVarInt32(len); + writeVarUInt32(len); reserves(innerHeadSize * v.size); for (const value of v.values()) { nestedSerializer.write(value); diff --git a/javascript/packages/fury/lib/internalSerializer/string.ts b/javascript/packages/fury/lib/internalSerializer/string.ts index fa716e1b88..ebba64b0a6 100644 --- a/javascript/packages/fury/lib/internalSerializer/string.ts +++ b/javascript/packages/fury/lib/internalSerializer/string.ts @@ -22,15 +22,15 @@ import { InternalSerializerType, RefFlags } from "../type"; export default (fury: Fury) => { const { binaryReader, binaryWriter, referenceResolver } = fury; - const { stringOfVarInt32: writeStringOfVarInt32, int8 } = binaryWriter; - const { stringOfVarInt32: readStringOfVarInt32 } = binaryReader; + const { stringOfVarUInt32: writeStringOfVarUInt32, int8 } = binaryWriter; + const { stringOfVarUInt32: readStringOfVarUInt32 } = binaryReader; return { ...referenceResolver.deref(() => { - return readStringOfVarInt32(); + return readStringOfVarUInt32(); }), write: referenceResolver.withNotNullableWriter(InternalSerializerType.STRING, "", (v: string) => { - writeStringOfVarInt32(v); + writeStringOfVarUInt32(v); }), writeWithoutType: (v: string) => { if (v === null) { @@ -38,7 +38,7 @@ export default (fury: Fury) => { return; } int8(RefFlags.NotNullValueFlag); - writeStringOfVarInt32(v); + writeStringOfVarUInt32(v); }, config: () => { return { diff --git a/javascript/packages/fury/lib/internalSerializer/tuple.ts b/javascript/packages/fury/lib/internalSerializer/tuple.ts index e7eceb385d..046679292e 100644 --- a/javascript/packages/fury/lib/internalSerializer/tuple.ts +++ b/javascript/packages/fury/lib/internalSerializer/tuple.ts @@ -24,12 +24,12 @@ export const tupleSerializer = (fury: Fury, serializers: Serializer[]) => { const { binaryReader, binaryWriter, referenceResolver } = fury; const { pushReadObject } = referenceResolver; - const { varInt32: writeVarInt32, reserve: reserves } = binaryWriter; - const { varInt32: readVarInt32 } = binaryReader; + const { varUInt32: writeVarUInt32, reserve: reserves } = binaryWriter; + const { varUInt32: readVarUInt32 } = binaryReader; return { ...referenceResolver.deref(() => { - const len = readVarInt32(); + const len = readVarUInt32(); const result = new Array(len); pushReadObject(result); for (let i = 0; i < len; i++) { @@ -39,7 +39,7 @@ export const tupleSerializer = (fury: Fury, serializers: Serializer[]) => { return result; }), write: referenceResolver.withNullableOrRefWriter(InternalSerializerType.TUPLE, (v: any[]) => { - writeVarInt32(serializers.length); + writeVarUInt32(serializers.length); for (let i = 0; i < serializers.length; i++) { const item = serializers[i]; diff --git a/javascript/packages/fury/lib/reader.ts b/javascript/packages/fury/lib/reader.ts index 6b275f1d32..9513d34396 100644 --- a/javascript/packages/fury/lib/reader.ts +++ b/javascript/packages/fury/lib/reader.ts @@ -109,9 +109,9 @@ export const BinaryReader = (config: Config) => { return result; } - function stringOfVarInt32() { + function stringOfVarUInt32() { const isLatin1 = uint8() === LATIN1; - const len = varInt32(); + const len = varUInt32(); return isLatin1 ? stringLatin1(len) : stringUtf8(len); } @@ -140,7 +140,11 @@ export const BinaryReader = (config: Config) => { return result; } - function varInt32() { + function zigZag(v: number) { + return (v >> 1) ^ -(v & 1); + } + + function varUInt32() { let byte_ = int8(); let result = byte_ & 0x7f; if ((byte_ & 0x80) != 0) { @@ -162,10 +166,15 @@ export const BinaryReader = (config: Config) => { return result; } + function varInt32() { + return zigZag(varUInt32()); + } + return { getCursor: () => cursor, setCursor: (v: number) => (cursor = v), varInt32, + varUInt32, int8, buffer: binary, bufferRef, @@ -174,7 +183,7 @@ export const BinaryReader = (config: Config) => { stringUtf8At, stringUtf8, stringLatin1, - stringOfVarInt32, + stringOfVarUInt32, double, float, uint16, diff --git a/javascript/packages/fury/lib/referenceResolver.ts b/javascript/packages/fury/lib/referenceResolver.ts index 3e4e6c3125..c79f5400e1 100644 --- a/javascript/packages/fury/lib/referenceResolver.ts +++ b/javascript/packages/fury/lib/referenceResolver.ts @@ -90,7 +90,7 @@ export const ReferenceResolver = ( const existsId = existsWriteObject(v); if (typeof existsId === "number") { binaryWriter.int8(RefFlags.RefFlag); - binaryWriter.varInt32(existsId); + binaryWriter.varUInt32(existsId); } else { int24(head); pushWriteObject(v); @@ -137,7 +137,7 @@ export const ReferenceResolver = ( skipType(); return fn(); case RefFlags.RefFlag: - return getReadObjectByRefId(binaryReader.varInt32()); + return getReadObjectByRefId(binaryReader.varUInt32()); case RefFlags.NullFlag: return null; case RefFlags.NotNullValueFlag: @@ -150,7 +150,7 @@ export const ReferenceResolver = ( case RefFlags.RefValueFlag: return fn(); case RefFlags.RefFlag: - return getReadObjectByRefId(binaryReader.varInt32()); + return getReadObjectByRefId(binaryReader.varUInt32()); case RefFlags.NullFlag: return null; case RefFlags.NotNullValueFlag: diff --git a/javascript/packages/fury/lib/writer.ts b/javascript/packages/fury/lib/writer.ts index 57ea680f86..4f14212312 100644 --- a/javascript/packages/fury/lib/writer.ts +++ b/javascript/packages/fury/lib/writer.ts @@ -170,13 +170,13 @@ export const BinaryWriter = (config: Config) => { } } - function stringOfVarInt32Fast() { + function stringOfVarUInt32Fast() { const { isLatin1: detectIsLatin1, stringCopy } = config!.hps!; return function (v: string) { const isLatin1 = detectIsLatin1(v); const len = isLatin1 ? v.length : strByteLength(v); dataView.setUint8(cursor++, isLatin1 ? LATIN1 : UTF8); - varInt32(len); + varUInt32(len); reserve(len); if (isLatin1) { stringCopy(v, arrayBuffer, cursor); @@ -191,11 +191,11 @@ export const BinaryWriter = (config: Config) => { }; } - function stringOfVarInt32Slow(v: string) { + function stringOfVarUInt32Slow(v: string) { const len = strByteLength(v); const isLatin1 = len === v.length; dataView.setUint8(cursor++, isLatin1 ? LATIN1 : UTF8); - varInt32(len); + varUInt32(len); reserve(len); if (isLatin1) { if (len < 40) { @@ -215,7 +215,15 @@ export const BinaryWriter = (config: Config) => { cursor += len; } + function zigZag(v: number) { + return (v << 1) ^ (v >> 31); + } + function varInt32(val: number) { + return varUInt32(zigZag(val)); + } + + function varUInt32(val: number) { val = val >>> 0; while (val > 127) { arrayBuffer[cursor++] = val & 127 | 128; @@ -278,9 +286,10 @@ export const BinaryWriter = (config: Config) => { uint8, int16, varInt32, - stringOfVarInt32: config?.hps - ? stringOfVarInt32Fast() - : stringOfVarInt32Slow, + varUInt32, + stringOfVarUInt32: config?.hps + ? stringOfVarUInt32Fast() + : stringOfVarUInt32Slow, bufferWithoutMemCheck, uint64, buffer, diff --git a/javascript/test/io.test.ts b/javascript/test/io.test.ts index 040c9c7e2c..38f1b3f2c6 100644 --- a/javascript/test/io.test.ts +++ b/javascript/test/io.test.ts @@ -136,7 +136,7 @@ function num2Bin(num: number) { }) }); - test('should varint32 work', () => { + test('should varUInt32 work', () => { [ 1, 2, @@ -146,7 +146,7 @@ function num2Bin(num: number) { { const writer = BinaryWriter(config); const value = (2 ** (x * 7)) - 1; - writer.varInt32(value); + writer.varUInt32(value); const ab = writer.dump(); expect(ab.byteLength).toBe(x); for (let index = 0; index < ab.byteLength - 1; index++) { @@ -155,13 +155,13 @@ function num2Bin(num: number) { expect(num2Bin(ab[ab.byteLength - 1])).toBe('1111111'); const reader = BinaryReader(config); reader.reset(ab); - const vari32 = reader.varInt32(); + const vari32 = reader.varUInt32(); expect(vari32).toBe(value); } { const writer = BinaryWriter(config); const value = (2 ** (x * 7)); - writer.varInt32(value); + writer.varUInt32(value); const ab = writer.dump(); expect(ab.byteLength).toBe(x + 1); for (let index = 0; index < ab.byteLength - 1; index++) { @@ -170,20 +170,33 @@ function num2Bin(num: number) { expect(num2Bin(ab[ab.byteLength - 1])).toBe('1'); const reader = BinaryReader(config); reader.reset(ab); - const vari32 = reader.varInt32(); + const vari32 = reader.varUInt32(); expect(vari32).toBe(value); } }); }); + test('should varInt32 work', () => { + const writer = BinaryWriter(config); + const value = -1; + writer.varInt32(value); + const ab = writer.dump(); + expect(ab.byteLength).toBe(1); + expect(num2Bin(ab[0])).toBe('1'); + const reader = BinaryReader(config); + reader.reset(ab); + const vari32 = reader.varInt32(); + expect(vari32).toBe(value); + }); + test('should short latin1 string work', () => { const writer = BinaryWriter(config); - writer.stringOfVarInt32("hello world"); + writer.stringOfVarUInt32("hello world"); const ab = writer.dump(); const reader = BinaryReader(config); reader.reset(ab); expect(reader.uint8()).toBe(0); - const len = reader.varInt32(); + const len = reader.varUInt32(); expect(len).toBe(11); const str = reader.stringLatin1(11); expect(str).toBe("hello world"); @@ -192,12 +205,12 @@ function num2Bin(num: number) { test('should long latin1 string work', () => { const writer = BinaryWriter(config); const str = new Array(10).fill('hello world').join(''); - writer.stringOfVarInt32(str); + writer.stringOfVarUInt32(str); const ab = writer.dump(); const reader = BinaryReader(config); reader.reset(ab); expect(reader.uint8()).toBe(0); - const len = reader.varInt32(); + const len = reader.varUInt32(); expect(len).toBe(110); expect(reader.stringLatin1(len)).toBe(str); }); @@ -205,39 +218,39 @@ function num2Bin(num: number) { test('should short utf8 string work', () => { const writer = BinaryWriter(config); const str = new Array(1).fill('hello 你好 😁').join(''); - writer.stringOfVarInt32(str); + writer.stringOfVarUInt32(str); const ab = writer.dump(); const reader = BinaryReader(config); { reader.reset(ab); expect(reader.uint8()).toBe(1); - const len = reader.varInt32(); + const len = reader.varUInt32(); expect(len).toBe(17); expect(reader.stringUtf8(len)).toBe(str); } { reader.reset(ab); - expect(reader.stringOfVarInt32()).toBe(str); + expect(reader.stringOfVarUInt32()).toBe(str); } }); test('should long utf8 string work', () => { const writer = BinaryWriter(config); const str = new Array(10).fill('hello 你好 😁').join(''); - writer.stringOfVarInt32(str); + writer.stringOfVarUInt32(str); const ab = writer.dump(); const reader = BinaryReader(config); { reader.reset(ab); expect(reader.uint8()).toBe(1); - const len = reader.varInt32(); + const len = reader.varUInt32(); expect(len).toBe(170); expect(reader.stringUtf8(len)).toBe(str); } { reader.reset(ab); - expect(reader.stringOfVarInt32()).toBe(str); + expect(reader.stringOfVarUInt32()).toBe(str); } }); diff --git a/rust/fury/Cargo.toml b/rust/fury/Cargo.toml index 1f64725922..ed24f0cda2 100644 --- a/rust/fury/Cargo.toml +++ b/rust/fury/Cargo.toml @@ -29,5 +29,4 @@ fury-derive = { path="../fury-derive"} lazy_static = { version = "1.4.0" } byteorder = { version = "1.4.3" } chrono = "0.4.26" -thiserror = { default-features = false, version = "1.0.43" } -arrow = "49.0.0" \ No newline at end of file +thiserror = { default-features = false, version = "1.0.43" } \ No newline at end of file