Skip to content

Commit

Permalink
Merge pull request #1808 from Agoric/1807-netstring
Browse files Browse the repository at this point in the history
fix: add netstring encode/decode/stream library to swingset
  • Loading branch information
warner authored Sep 20, 2020
2 parents bf8275f + 605f7df commit e3c969b
Show file tree
Hide file tree
Showing 2 changed files with 230 additions and 0 deletions.
92 changes: 92 additions & 0 deletions packages/SwingSet/src/netstring.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// adapted from 'netstring-stream', https://github.com/tlivings/netstring-stream/
const { Transform } = require('stream');

const COLON = 58;
const COMMA = 44;

// input is a Buffer, output is a netstring-wrapped Buffer
export function encode(data) {
const prefix = Buffer.from(`${data.length}:`);
const suffix = Buffer.from(',');
return Buffer.concat([prefix, data, suffix]);
}

// input is a sequence of strings, output is a byte pipe
export function encoderStream() {
function transform(chunk, encoding, callback) {
if (!Buffer.isBuffer(chunk)) {
throw Error('stream requires Buffers');
}
let err;
try {
this.push(encode(chunk));
} catch (e) {
err = e;
}
callback(err);
}
return new Transform({ transform, writableObjectMode: true });
}

// Input is a Buffer containing zero or more netstrings and maybe some
// leftover bytes. Output is zero or more decoded Buffers, one per netstring,
// plus a Buffer of leftover bytes.
//
export function decode(data) {
// TODO: it would be more efficient to accumulate pending data in an array,
// rather than doing a concat each time
let start = 0;
const payloads = [];

for (;;) {
const colon = data.indexOf(COLON, start);
if (colon === -1) {
break; // still waiting for `${LENGTH}:`
}
const sizeString = data.toString('utf-8', start, colon);
const size = parseInt(sizeString, 10);
if (!(size > -1)) {
// reject NaN, all negative numbers
throw Error(`unparseable size '${sizeString}', should be integer`);
}
if (data.length < colon + 1 + size + 1) {
break; // still waiting for `${DATA}.`
}
if (data[colon + 1 + size] !== COMMA) {
throw Error(`malformed netstring: not terminated by comma`);
}
payloads.push(data.subarray(colon + 1, colon + 1 + size));
start = colon + 1 + size + 1;
}

const leftover = data.subarray(start);
return { leftover, payloads };
}

// input is a byte pipe, output is a sequence of Buffers
export function decoderStream() {
let buffered = Buffer.from('');

function transform(chunk, encoding, callback) {
if (!Buffer.isBuffer(chunk)) {
throw Error('stream requires Buffers');
}
buffered = Buffer.concat([buffered, chunk]);
let err;
try {
const { leftover, payloads } = decode(buffered);
buffered = leftover;
for (let i = 0; i < payloads.length; i += 1) {
this.push(payloads[i]);
}
} catch (e) {
err = e;
}
// we buffer all data internally, to accommodate netstrings larger than
// Transform's default buffer size, and callback() indicates that we've
// consumed the input
callback(err);
}

return new Transform({ transform, readableObjectMode: true });
}
138 changes: 138 additions & 0 deletions packages/SwingSet/test/test-netstring.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import '@agoric/install-ses'; // adds 'harden' to global

import test from 'ava';
import { encode, encoderStream, decode, decoderStream } from '../src/netstring';

const umlaut = 'ümlaut';
const umlautBuffer = Buffer.from(umlaut, 'utf-8');
// the following string may not render in your editor, but it contains four
// emoji glued together, which is frequently rendered as a single glyph.
const emoji = '👨‍👨‍👧‍👧';
const emojiBuffer = Buffer.from(emoji, 'utf-8');
// They are:
// U+1F468 "MAN"
// U+200D "ZERO WIDTH JOINER"
// U+1F468 "MAN"
// U+200D "ZERO WIDTH JOINER"
// U+1F467 "GIRL"
// U+200D "ZERO WIDTH JOINER"
// U+1F467 "GIRL"

// The emoji are off the BMP and require two UTF-16 things, while the joiner
// only requires one. So JavaScript considers the length to be 2+1+2+1+2+1+2
// = 11. The UTF-8 encoding needs four bytes for the emoji, and three for the
// joiner, so the Buffer length is 4+3+4+3+4+3+4 = 25.

test('setup', t => {
t.is(umlaut.length, 6);
t.is(umlautBuffer.length, 7);
t.is(emoji.length, 11);
t.is(emojiBuffer.length, 25);
});

test('encode', t => {
function eq(input, expected) {
const encoded = encode(Buffer.from(input));
const expBuf = Buffer.from(expected);
if (encoded.compare(expBuf) !== 0) {
console.log(`got : ${encoded}`);
console.log(`want: ${expBuf}`);
}
t.deepEqual(encoded, expBuf);
}

eq('', '0:,');
eq('a', '1:a,');
eq('abc', '3:abc,');
let expectedBuffer = Buffer.from(`7:${umlaut},`, 'utf-8');
eq(umlautBuffer, expectedBuffer);
expectedBuffer = Buffer.from(`25:${emoji},`, 'utf-8');
eq(emojiBuffer, expectedBuffer);
});

test('encode stream', async t => {
const e = encoderStream();
const chunks = [];
e.on('data', data => chunks.push(data));
e.write(Buffer.from(''));
const b1 = Buffer.from('0:,');
t.deepEqual(Buffer.concat(chunks), b1);
e.write(Buffer.from('hello'));
const b2 = Buffer.from('5:hello,');
t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2]));
e.write(umlautBuffer);
const b3 = Buffer.concat([Buffer.from('7:'), umlautBuffer, Buffer.from(',')]);
t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3]));
e.write(emojiBuffer);
const b4 = Buffer.concat([Buffer.from('25:'), emojiBuffer, Buffer.from(',')]);
t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3, b4]));

e.end();
t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3, b4]));
});

test('decode', t => {
function eq(input, expPayloads, expLeftover) {
const encPayloads = expPayloads.map(Buffer.from);
const encLeftover = Buffer.from(expLeftover);

const { payloads, leftover } = decode(Buffer.from(input));
t.deepEqual(payloads, encPayloads);
t.deepEqual(leftover, encLeftover);
}

eq('', [], '');
eq('0', [], '0');
eq('0:', [], '0:');
eq('0:,', [''], '');
eq('0:,1', [''], '1');
eq('0:,1:', [''], '1:');
eq('0:,1:a', [''], '1:a');
eq('0:,1:a,', ['', 'a'], '');

let expectedBuffer = Buffer.from(`7:${umlaut},`, 'utf-8');
eq(expectedBuffer, [umlaut], '');

expectedBuffer = Buffer.from(`25:${emoji},`, 'utf-8');
eq(expectedBuffer, [emoji], '');

function bad(input, message) {
t.throws(() => decode(Buffer.from(input)), { message });
}

// bad('a', 'non-numeric length prefix');
bad('a:', `unparseable size 'a', should be integer`);
bad('1:ab', 'malformed netstring: not terminated by comma');
});

test('decode stream', async t => {
const d = decoderStream();
function write(s) {
d.write(Buffer.from(s));
}

const msgs = [];
d.on('data', msg => msgs.push(msg));

function eq(expectedMessages) {
t.deepEqual(msgs, expectedMessages.map(Buffer.from));
}

write('');
eq([]);
write('0');
eq([]);
write(':');
eq([]);
write(',');
eq(['']);

write('1:');
eq(['']);
write('a,2:ab');
eq(['', 'a']);
write(',');
eq(['', 'a', 'ab']);
write('3:abc,4:abcd,5:abcde,');
eq(['', 'a', 'ab', 'abc', 'abcd', 'abcde']);
});

0 comments on commit e3c969b

Please sign in to comment.