From 1881ef02235b0e639c8a919cd7fa0988f7f610be Mon Sep 17 00:00:00 2001 From: Marc Durdin Date: Thu, 13 Jul 2023 06:26:12 +1000 Subject: [PATCH] fix: null terminated string support for utf16 (#52) * fix: null terminated string support for utf16 * fix: zero length string is falsy so beware --- README.md | 7 ++++--- src/String.js | 43 +++++++++++++++++++++++++++++++++---------- test/String.js | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 0d8b8a8..2203745 100644 --- a/README.md +++ b/README.md @@ -151,8 +151,9 @@ var struct = new r.Struct({ ### String -A `String` maps a JavaScript string to and from binary encodings. The length can be a constant, taken -from a previous field in the parent structure, or encoded using a number type immediately before the string. +A `String` maps a JavaScript string to and from binary encodings. The length, in bytes, can be a constant, +taken from a previous field in the parent structure, encoded using a number type immediately before the +string. Fully supported encodings include `'ascii'`, `'utf8'`, `'ucs2'`, `'utf16le'`, `'utf16be'`. Decoding is also possible with any encoding supported by [TextDecoder](https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings), @@ -172,7 +173,7 @@ var struct = new r.Struct({ }); // null-terminated string (also known as C string) -var str = new r.String(null, 'utf8') +var str = new r.String(null, 'utf8'); ``` ### Array diff --git a/src/String.js b/src/String.js index b128360..dd4b264 100644 --- a/src/String.js +++ b/src/String.js @@ -12,28 +12,33 @@ class StringT extends Base { decode(stream, parent) { let length, pos; + let { encoding } = this; + if (typeof encoding === 'function') { + encoding = encoding.call(parent, parent) || 'ascii'; + } + let width = encodingWidth(encoding); + if (this.length != null) { length = utils.resolveLength(this.length, stream, parent); } else { let buffer; ({buffer, length, pos} = stream); - while ((pos < length) && (buffer[pos] !== 0x00)) { - ++pos; + while ((pos < length - width + 1) && + (buffer[pos] !== 0x00 || + (width === 2 && buffer[pos+1] !== 0x00) + )) { + pos += width; } length = pos - stream.pos; } - let { encoding } = this; - if (typeof encoding === 'function') { - encoding = encoding.call(parent, parent) || 'ascii'; - } const string = stream.readString(length, encoding); if ((this.length == null) && (stream.pos < stream.length)) { - stream.pos++; + stream.pos+=width; } return string; @@ -41,7 +46,7 @@ class StringT extends Base { size(val, parent) { // Use the defined value if no value was given - if (!val) { + if (val === undefined || val === null) { return utils.resolveLength(this.length, null, parent); } @@ -60,7 +65,7 @@ class StringT extends Base { } if ((this.length == null)) { - size++; + size += encodingWidth(encoding); } return size; @@ -79,11 +84,29 @@ class StringT extends Base { stream.writeString(val, encoding); if ((this.length == null)) { - return stream.writeUInt8(0x00); + return encodingWidth(encoding) == 2 ? + stream.writeUInt16LE(0x0000) : + stream.writeUInt8(0x00); } } } +function encodingWidth(encoding) { + switch(encoding) { + case 'ascii': + case 'utf8': // utf8 is a byte-based encoding for zero-term string + return 1; + case 'utf16le': + case 'utf16-le': + case 'utf16be': + case 'utf16-be': + case 'ucs2': + return 2; + default: + throw new Error('Unknown encoding ' + encoding); + } +} + function byteLength(string, encoding) { switch (encoding) { case 'ascii': diff --git a/test/String.js b/test/String.js index 11191cd..e7287a4 100644 --- a/test/String.js +++ b/test/String.js @@ -1,5 +1,5 @@ import assert from 'assert'; -import {String as StringT, uint8, DecodeStream, EncodeStream} from 'restructure'; +import {String as StringT, uint16le, uint8, DecodeStream, Struct} from 'restructure'; describe('String', function() { describe('decode', function() { @@ -40,6 +40,18 @@ describe('String', function() { const string = new StringT(null, 'utf8'); assert.equal(string.fromBuffer(Buffer.from('🍻')), '🍻'); }); + + it('should decode two-byte null-terminated string for utf16le', function() { + const stream = new DecodeStream(Buffer.from('🍻\x00', 'utf16le')); + const string = new StringT(null, 'utf16le'); + assert.equal(string.decode(stream), '🍻'); + assert.equal(stream.pos, 6); + }); + + it('should decode remainder of buffer when null-byte missing, utf16le', function() { + const string = new StringT(null, 'utf16le'); + assert.equal(string.fromBuffer(Buffer.from('🍻', 'utf16le')), '🍻'); + }); }); describe('size', function() { @@ -73,6 +85,11 @@ describe('String', function() { assert.equal(string.size('🍻'), 5); }); + it('should take null-byte into account, utf16le', function() { + const string = new StringT(null, 'utf16le'); + assert.equal(string.size('🍻'), 6); + }); + it('should use defined length if no value given', function() { const array = new StringT(10); assert.equal(array.size(), 10); @@ -109,5 +126,20 @@ describe('String', function() { const string = new StringT(null, 'utf8'); assert.deepEqual(string.toBuffer('🍻'), Buffer.from('🍻\x00')); }); + + it('should encode using string length, utf16le', function() { + const string = new StringT(16, 'utf16le'); + assert.deepEqual(string.toBuffer('testing'), Buffer.from('testing', 'utf16le')); + }); + + it('should encode length as number before string utf16le', function() { + const string = new StringT(uint16le, 'utf16le'); + assert.deepEqual(string.toBuffer('testing 😜'), Buffer.from('\u0014testing 😜', 'utf16le')); + }); + + it('should encode two-byte null-terminated string for UTF-16', function() { + const string = new StringT(null, 'utf16le'); + assert.deepEqual(string.toBuffer('🍻'), Buffer.from('🍻\x00', 'utf16le')); + }); }); });