Skip to content

Commit

Permalink
fix: null terminated string support for utf16 (#52)
Browse files Browse the repository at this point in the history
* fix: null terminated string support for utf16
* fix: zero length string is falsy so beware
  • Loading branch information
mcdurdin authored Jul 12, 2023
1 parent b9b683c commit 1881ef0
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 14 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,9 @@ var struct = new r.Struct({

### String

A `String` maps a JavaScript string to and from binary encodings. The length can be a constant, taken
from a previous field in the parent structure, or encoded using a number type immediately before the string.
A `String` maps a JavaScript string to and from binary encodings. The length, in bytes, can be a constant,
taken from a previous field in the parent structure, encoded using a number type immediately before the
string.

Fully supported encodings include `'ascii'`, `'utf8'`, `'ucs2'`, `'utf16le'`, `'utf16be'`. Decoding is also possible
with any encoding supported by [TextDecoder](https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings),
Expand All @@ -172,7 +173,7 @@ var struct = new r.Struct({
});

// null-terminated string (also known as C string)
var str = new r.String(null, 'utf8')
var str = new r.String(null, 'utf8');
```

### Array
Expand Down
43 changes: 33 additions & 10 deletions src/String.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,41 @@ class StringT extends Base {
decode(stream, parent) {
let length, pos;

let { encoding } = this;
if (typeof encoding === 'function') {
encoding = encoding.call(parent, parent) || 'ascii';
}
let width = encodingWidth(encoding);

if (this.length != null) {
length = utils.resolveLength(this.length, stream, parent);
} else {
let buffer;
({buffer, length, pos} = stream);

while ((pos < length) && (buffer[pos] !== 0x00)) {
++pos;
while ((pos < length - width + 1) &&
(buffer[pos] !== 0x00 ||
(width === 2 && buffer[pos+1] !== 0x00)
)) {
pos += width;
}

length = pos - stream.pos;
}

let { encoding } = this;
if (typeof encoding === 'function') {
encoding = encoding.call(parent, parent) || 'ascii';
}

const string = stream.readString(length, encoding);

if ((this.length == null) && (stream.pos < stream.length)) {
stream.pos++;
stream.pos+=width;
}

return string;
}

size(val, parent) {
// Use the defined value if no value was given
if (!val) {
if (val === undefined || val === null) {
return utils.resolveLength(this.length, null, parent);
}

Expand All @@ -60,7 +65,7 @@ class StringT extends Base {
}

if ((this.length == null)) {
size++;
size += encodingWidth(encoding);
}

return size;
Expand All @@ -79,11 +84,29 @@ class StringT extends Base {
stream.writeString(val, encoding);

if ((this.length == null)) {
return stream.writeUInt8(0x00);
return encodingWidth(encoding) == 2 ?
stream.writeUInt16LE(0x0000) :
stream.writeUInt8(0x00);
}
}
}

function encodingWidth(encoding) {
switch(encoding) {
case 'ascii':
case 'utf8': // utf8 is a byte-based encoding for zero-term string
return 1;
case 'utf16le':
case 'utf16-le':
case 'utf16be':
case 'utf16-be':
case 'ucs2':
return 2;
default:
throw new Error('Unknown encoding ' + encoding);
}
}

function byteLength(string, encoding) {
switch (encoding) {
case 'ascii':
Expand Down
34 changes: 33 additions & 1 deletion test/String.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import assert from 'assert';
import {String as StringT, uint8, DecodeStream, EncodeStream} from 'restructure';
import {String as StringT, uint16le, uint8, DecodeStream, Struct} from 'restructure';

describe('String', function() {
describe('decode', function() {
Expand Down Expand Up @@ -40,6 +40,18 @@ describe('String', function() {
const string = new StringT(null, 'utf8');
assert.equal(string.fromBuffer(Buffer.from('🍻')), '🍻');
});

it('should decode two-byte null-terminated string for utf16le', function() {
const stream = new DecodeStream(Buffer.from('🍻\x00', 'utf16le'));
const string = new StringT(null, 'utf16le');
assert.equal(string.decode(stream), '🍻');
assert.equal(stream.pos, 6);
});

it('should decode remainder of buffer when null-byte missing, utf16le', function() {
const string = new StringT(null, 'utf16le');
assert.equal(string.fromBuffer(Buffer.from('🍻', 'utf16le')), '🍻');
});
});

describe('size', function() {
Expand Down Expand Up @@ -73,6 +85,11 @@ describe('String', function() {
assert.equal(string.size('🍻'), 5);
});

it('should take null-byte into account, utf16le', function() {
const string = new StringT(null, 'utf16le');
assert.equal(string.size('🍻'), 6);
});

it('should use defined length if no value given', function() {
const array = new StringT(10);
assert.equal(array.size(), 10);
Expand Down Expand Up @@ -109,5 +126,20 @@ describe('String', function() {
const string = new StringT(null, 'utf8');
assert.deepEqual(string.toBuffer('🍻'), Buffer.from('🍻\x00'));
});

it('should encode using string length, utf16le', function() {
const string = new StringT(16, 'utf16le');
assert.deepEqual(string.toBuffer('testing'), Buffer.from('testing', 'utf16le'));
});

it('should encode length as number before string utf16le', function() {
const string = new StringT(uint16le, 'utf16le');
assert.deepEqual(string.toBuffer('testing 😜'), Buffer.from('\u0014testing 😜', 'utf16le'));
});

it('should encode two-byte null-terminated string for UTF-16', function() {
const string = new StringT(null, 'utf16le');
assert.deepEqual(string.toBuffer('🍻'), Buffer.from('🍻\x00', 'utf16le'));
});
});
});

0 comments on commit 1881ef0

Please sign in to comment.