diff --git a/packages/SwingSet/src/netstring.js b/packages/SwingSet/src/netstring.js new file mode 100644 index 00000000000..938bf4c888c --- /dev/null +++ b/packages/SwingSet/src/netstring.js @@ -0,0 +1,92 @@ +// adapted from 'netstring-stream', https://github.com/tlivings/netstring-stream/ +const { Transform } = require('stream'); + +const COLON = 58; +const COMMA = 44; + +// input is a Buffer, output is a netstring-wrapped Buffer +export function encode(data) { + const prefix = Buffer.from(`${data.length}:`); + const suffix = Buffer.from(','); + return Buffer.concat([prefix, data, suffix]); +} + +// input is a sequence of strings, output is a byte pipe +export function encoderStream() { + function transform(chunk, encoding, callback) { + if (!Buffer.isBuffer(chunk)) { + throw Error('stream requires Buffers'); + } + let err; + try { + this.push(encode(chunk)); + } catch (e) { + err = e; + } + callback(err); + } + return new Transform({ transform, writableObjectMode: true }); +} + +// Input is a Buffer containing zero or more netstrings and maybe some +// leftover bytes. Output is zero or more decoded Buffers, one per netstring, +// plus a Buffer of leftover bytes. +// +export function decode(data) { + // TODO: it would be more efficient to accumulate pending data in an array, + // rather than doing a concat each time + let start = 0; + const payloads = []; + + for (;;) { + const colon = data.indexOf(COLON, start); + if (colon === -1) { + break; // still waiting for `${LENGTH}:` + } + const sizeString = data.toString('utf-8', start, colon); + const size = parseInt(sizeString, 10); + if (!(size > -1)) { + // reject NaN, all negative numbers + throw Error(`unparseable size '${sizeString}', should be integer`); + } + if (data.length < colon + 1 + size + 1) { + break; // still waiting for `${DATA}.` + } + if (data[colon + 1 + size] !== COMMA) { + throw Error(`malformed netstring: not terminated by comma`); + } + payloads.push(data.subarray(colon + 1, colon + 1 + size)); + start = colon + 1 + size + 1; + } + + const leftover = data.subarray(start); + return { leftover, payloads }; +} + +// input is a byte pipe, output is a sequence of Buffers +export function decoderStream() { + let buffered = Buffer.from(''); + + function transform(chunk, encoding, callback) { + if (!Buffer.isBuffer(chunk)) { + throw Error('stream requires Buffers'); + } + buffered = Buffer.concat([buffered, chunk]); + let err; + try { + const { leftover, payloads } = decode(buffered); + buffered = leftover; + for (let i = 0; i < payloads.length; i += 1) { + this.push(payloads[i]); + } + } catch (e) { + err = e; + } + // we buffer all data internally, to accommodate netstrings larger than + // Transform's default buffer size, and callback() indicates that we've + // consumed the input + callback(err); + } + + return new Transform({ transform, readableObjectMode: true }); +} diff --git a/packages/SwingSet/test/test-netstring.js b/packages/SwingSet/test/test-netstring.js new file mode 100644 index 00000000000..7421b44d053 --- /dev/null +++ b/packages/SwingSet/test/test-netstring.js @@ -0,0 +1,138 @@ +import '@agoric/install-ses'; // adds 'harden' to global + +import test from 'ava'; +import { encode, encoderStream, decode, decoderStream } from '../src/netstring'; + +const umlaut = 'ümlaut'; +const umlautBuffer = Buffer.from(umlaut, 'utf-8'); +// the following string may not render in your editor, but it contains four +// emoji glued together, which is frequently rendered as a single glyph. +const emoji = '👨‍👨‍👧‍👧'; +const emojiBuffer = Buffer.from(emoji, 'utf-8'); +// They are: +// U+1F468 "MAN" +// U+200D "ZERO WIDTH JOINER" +// U+1F468 "MAN" +// U+200D "ZERO WIDTH JOINER" +// U+1F467 "GIRL" +// U+200D "ZERO WIDTH JOINER" +// U+1F467 "GIRL" + +// The emoji are off the BMP and require two UTF-16 things, while the joiner +// only requires one. So JavaScript considers the length to be 2+1+2+1+2+1+2 +// = 11. The UTF-8 encoding needs four bytes for the emoji, and three for the +// joiner, so the Buffer length is 4+3+4+3+4+3+4 = 25. + +test('setup', t => { + t.is(umlaut.length, 6); + t.is(umlautBuffer.length, 7); + t.is(emoji.length, 11); + t.is(emojiBuffer.length, 25); +}); + +test('encode', t => { + function eq(input, expected) { + const encoded = encode(Buffer.from(input)); + const expBuf = Buffer.from(expected); + if (encoded.compare(expBuf) !== 0) { + console.log(`got : ${encoded}`); + console.log(`want: ${expBuf}`); + } + t.deepEqual(encoded, expBuf); + } + + eq('', '0:,'); + eq('a', '1:a,'); + eq('abc', '3:abc,'); + let expectedBuffer = Buffer.from(`7:${umlaut},`, 'utf-8'); + eq(umlautBuffer, expectedBuffer); + expectedBuffer = Buffer.from(`25:${emoji},`, 'utf-8'); + eq(emojiBuffer, expectedBuffer); +}); + +test('encode stream', async t => { + const e = encoderStream(); + const chunks = []; + e.on('data', data => chunks.push(data)); + e.write(Buffer.from('')); + const b1 = Buffer.from('0:,'); + t.deepEqual(Buffer.concat(chunks), b1); + e.write(Buffer.from('hello')); + const b2 = Buffer.from('5:hello,'); + t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2])); + e.write(umlautBuffer); + const b3 = Buffer.concat([Buffer.from('7:'), umlautBuffer, Buffer.from(',')]); + t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3])); + e.write(emojiBuffer); + const b4 = Buffer.concat([Buffer.from('25:'), emojiBuffer, Buffer.from(',')]); + t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3, b4])); + + e.end(); + t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3, b4])); +}); + +test('decode', t => { + function eq(input, expPayloads, expLeftover) { + const encPayloads = expPayloads.map(Buffer.from); + const encLeftover = Buffer.from(expLeftover); + + const { payloads, leftover } = decode(Buffer.from(input)); + t.deepEqual(payloads, encPayloads); + t.deepEqual(leftover, encLeftover); + } + + eq('', [], ''); + eq('0', [], '0'); + eq('0:', [], '0:'); + eq('0:,', [''], ''); + eq('0:,1', [''], '1'); + eq('0:,1:', [''], '1:'); + eq('0:,1:a', [''], '1:a'); + eq('0:,1:a,', ['', 'a'], ''); + + let expectedBuffer = Buffer.from(`7:${umlaut},`, 'utf-8'); + eq(expectedBuffer, [umlaut], ''); + + expectedBuffer = Buffer.from(`25:${emoji},`, 'utf-8'); + eq(expectedBuffer, [emoji], ''); + + function bad(input, message) { + t.throws(() => decode(Buffer.from(input)), { message }); + } + + // bad('a', 'non-numeric length prefix'); + bad('a:', `unparseable size 'a', should be integer`); + bad('1:ab', 'malformed netstring: not terminated by comma'); +}); + +test('decode stream', async t => { + const d = decoderStream(); + function write(s) { + d.write(Buffer.from(s)); + } + + const msgs = []; + d.on('data', msg => msgs.push(msg)); + + function eq(expectedMessages) { + t.deepEqual(msgs, expectedMessages.map(Buffer.from)); + } + + write(''); + eq([]); + write('0'); + eq([]); + write(':'); + eq([]); + write(','); + eq(['']); + + write('1:'); + eq(['']); + write('a,2:ab'); + eq(['', 'a']); + write(','); + eq(['', 'a', 'ab']); + write('3:abc,4:abcd,5:abcde,'); + eq(['', 'a', 'ab', 'abc', 'abcd', 'abcde']); +});