-
Notifications
You must be signed in to change notification settings - Fork 215
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1808 from Agoric/1807-netstring
fix: add netstring encode/decode/stream library to swingset
- Loading branch information
Showing
2 changed files
with
230 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
// adapted from 'netstring-stream', https://github.com/tlivings/netstring-stream/ | ||
const { Transform } = require('stream'); | ||
|
||
const COLON = 58; | ||
const COMMA = 44; | ||
|
||
// input is a Buffer, output is a netstring-wrapped Buffer | ||
export function encode(data) { | ||
const prefix = Buffer.from(`${data.length}:`); | ||
const suffix = Buffer.from(','); | ||
return Buffer.concat([prefix, data, suffix]); | ||
} | ||
|
||
// input is a sequence of strings, output is a byte pipe | ||
export function encoderStream() { | ||
function transform(chunk, encoding, callback) { | ||
if (!Buffer.isBuffer(chunk)) { | ||
throw Error('stream requires Buffers'); | ||
} | ||
let err; | ||
try { | ||
this.push(encode(chunk)); | ||
} catch (e) { | ||
err = e; | ||
} | ||
callback(err); | ||
} | ||
return new Transform({ transform, writableObjectMode: true }); | ||
} | ||
|
||
// Input is a Buffer containing zero or more netstrings and maybe some | ||
// leftover bytes. Output is zero or more decoded Buffers, one per netstring, | ||
// plus a Buffer of leftover bytes. | ||
// | ||
export function decode(data) { | ||
// TODO: it would be more efficient to accumulate pending data in an array, | ||
// rather than doing a concat each time | ||
let start = 0; | ||
const payloads = []; | ||
|
||
for (;;) { | ||
const colon = data.indexOf(COLON, start); | ||
if (colon === -1) { | ||
break; // still waiting for `${LENGTH}:` | ||
} | ||
const sizeString = data.toString('utf-8', start, colon); | ||
const size = parseInt(sizeString, 10); | ||
if (!(size > -1)) { | ||
// reject NaN, all negative numbers | ||
throw Error(`unparseable size '${sizeString}', should be integer`); | ||
} | ||
if (data.length < colon + 1 + size + 1) { | ||
break; // still waiting for `${DATA}.` | ||
} | ||
if (data[colon + 1 + size] !== COMMA) { | ||
throw Error(`malformed netstring: not terminated by comma`); | ||
} | ||
payloads.push(data.subarray(colon + 1, colon + 1 + size)); | ||
start = colon + 1 + size + 1; | ||
} | ||
|
||
const leftover = data.subarray(start); | ||
return { leftover, payloads }; | ||
} | ||
|
||
// input is a byte pipe, output is a sequence of Buffers | ||
export function decoderStream() { | ||
let buffered = Buffer.from(''); | ||
|
||
function transform(chunk, encoding, callback) { | ||
if (!Buffer.isBuffer(chunk)) { | ||
throw Error('stream requires Buffers'); | ||
} | ||
buffered = Buffer.concat([buffered, chunk]); | ||
let err; | ||
try { | ||
const { leftover, payloads } = decode(buffered); | ||
buffered = leftover; | ||
for (let i = 0; i < payloads.length; i += 1) { | ||
this.push(payloads[i]); | ||
} | ||
} catch (e) { | ||
err = e; | ||
} | ||
// we buffer all data internally, to accommodate netstrings larger than | ||
// Transform's default buffer size, and callback() indicates that we've | ||
// consumed the input | ||
callback(err); | ||
} | ||
|
||
return new Transform({ transform, readableObjectMode: true }); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import '@agoric/install-ses'; // adds 'harden' to global | ||
|
||
import test from 'ava'; | ||
import { encode, encoderStream, decode, decoderStream } from '../src/netstring'; | ||
|
||
const umlaut = 'ümlaut'; | ||
const umlautBuffer = Buffer.from(umlaut, 'utf-8'); | ||
// the following string may not render in your editor, but it contains four | ||
// emoji glued together, which is frequently rendered as a single glyph. | ||
const emoji = '👨👨👧👧'; | ||
const emojiBuffer = Buffer.from(emoji, 'utf-8'); | ||
// They are: | ||
// U+1F468 "MAN" | ||
// U+200D "ZERO WIDTH JOINER" | ||
// U+1F468 "MAN" | ||
// U+200D "ZERO WIDTH JOINER" | ||
// U+1F467 "GIRL" | ||
// U+200D "ZERO WIDTH JOINER" | ||
// U+1F467 "GIRL" | ||
|
||
// The emoji are off the BMP and require two UTF-16 things, while the joiner | ||
// only requires one. So JavaScript considers the length to be 2+1+2+1+2+1+2 | ||
// = 11. The UTF-8 encoding needs four bytes for the emoji, and three for the | ||
// joiner, so the Buffer length is 4+3+4+3+4+3+4 = 25. | ||
|
||
test('setup', t => { | ||
t.is(umlaut.length, 6); | ||
t.is(umlautBuffer.length, 7); | ||
t.is(emoji.length, 11); | ||
t.is(emojiBuffer.length, 25); | ||
}); | ||
|
||
test('encode', t => { | ||
function eq(input, expected) { | ||
const encoded = encode(Buffer.from(input)); | ||
const expBuf = Buffer.from(expected); | ||
if (encoded.compare(expBuf) !== 0) { | ||
console.log(`got : ${encoded}`); | ||
console.log(`want: ${expBuf}`); | ||
} | ||
t.deepEqual(encoded, expBuf); | ||
} | ||
|
||
eq('', '0:,'); | ||
eq('a', '1:a,'); | ||
eq('abc', '3:abc,'); | ||
let expectedBuffer = Buffer.from(`7:${umlaut},`, 'utf-8'); | ||
eq(umlautBuffer, expectedBuffer); | ||
expectedBuffer = Buffer.from(`25:${emoji},`, 'utf-8'); | ||
eq(emojiBuffer, expectedBuffer); | ||
}); | ||
|
||
test('encode stream', async t => { | ||
const e = encoderStream(); | ||
const chunks = []; | ||
e.on('data', data => chunks.push(data)); | ||
e.write(Buffer.from('')); | ||
const b1 = Buffer.from('0:,'); | ||
t.deepEqual(Buffer.concat(chunks), b1); | ||
e.write(Buffer.from('hello')); | ||
const b2 = Buffer.from('5:hello,'); | ||
t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2])); | ||
e.write(umlautBuffer); | ||
const b3 = Buffer.concat([Buffer.from('7:'), umlautBuffer, Buffer.from(',')]); | ||
t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3])); | ||
e.write(emojiBuffer); | ||
const b4 = Buffer.concat([Buffer.from('25:'), emojiBuffer, Buffer.from(',')]); | ||
t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3, b4])); | ||
|
||
e.end(); | ||
t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3, b4])); | ||
}); | ||
|
||
test('decode', t => { | ||
function eq(input, expPayloads, expLeftover) { | ||
const encPayloads = expPayloads.map(Buffer.from); | ||
const encLeftover = Buffer.from(expLeftover); | ||
|
||
const { payloads, leftover } = decode(Buffer.from(input)); | ||
t.deepEqual(payloads, encPayloads); | ||
t.deepEqual(leftover, encLeftover); | ||
} | ||
|
||
eq('', [], ''); | ||
eq('0', [], '0'); | ||
eq('0:', [], '0:'); | ||
eq('0:,', [''], ''); | ||
eq('0:,1', [''], '1'); | ||
eq('0:,1:', [''], '1:'); | ||
eq('0:,1:a', [''], '1:a'); | ||
eq('0:,1:a,', ['', 'a'], ''); | ||
|
||
let expectedBuffer = Buffer.from(`7:${umlaut},`, 'utf-8'); | ||
eq(expectedBuffer, [umlaut], ''); | ||
|
||
expectedBuffer = Buffer.from(`25:${emoji},`, 'utf-8'); | ||
eq(expectedBuffer, [emoji], ''); | ||
|
||
function bad(input, message) { | ||
t.throws(() => decode(Buffer.from(input)), { message }); | ||
} | ||
|
||
// bad('a', 'non-numeric length prefix'); | ||
bad('a:', `unparseable size 'a', should be integer`); | ||
bad('1:ab', 'malformed netstring: not terminated by comma'); | ||
}); | ||
|
||
test('decode stream', async t => { | ||
const d = decoderStream(); | ||
function write(s) { | ||
d.write(Buffer.from(s)); | ||
} | ||
|
||
const msgs = []; | ||
d.on('data', msg => msgs.push(msg)); | ||
|
||
function eq(expectedMessages) { | ||
t.deepEqual(msgs, expectedMessages.map(Buffer.from)); | ||
} | ||
|
||
write(''); | ||
eq([]); | ||
write('0'); | ||
eq([]); | ||
write(':'); | ||
eq([]); | ||
write(','); | ||
eq(['']); | ||
|
||
write('1:'); | ||
eq(['']); | ||
write('a,2:ab'); | ||
eq(['', 'a']); | ||
write(','); | ||
eq(['', 'a', 'ab']); | ||
write('3:abc,4:abcd,5:abcde,'); | ||
eq(['', 'a', 'ab', 'abc', 'abcd', 'abcde']); | ||
}); |