From a347c162102e24acff0f9211c1eb72bd38e7196b Mon Sep 17 00:00:00 2001 From: Mathias Bynens Date: Fri, 30 Jul 2021 16:04:44 +0200 Subject: [PATCH] Encode into / decode from Uint16Array --- README.md | 12 ++-- data/encoded.json | 131 +++++++++++++++++++++++++++++++++++++- iso-8859-14.mjs | 53 +++++++++------ scripts/export-data.js | 2 +- scripts/transform-data.js | 4 +- src/iso-8859-14.src.mjs | 53 +++++++++------ tests/tests.mjs | 78 +++++++++++++---------- tests/tests.src.mjs | 76 ++++++++++++---------- 8 files changed, 293 insertions(+), 116 deletions(-) diff --git a/README.md b/README.md index 043e066..f7e1854 100644 --- a/README.md +++ b/README.md @@ -28,25 +28,25 @@ An array of strings, each representing a [label](https://encoding.spec.whatwg.or ### `iso885914.encode(input, options)` -This function takes a plain text string (the `input` parameter) and encodes it according to iso-8859-14. The return value is a ‘byte string’, i.e. a string of which each item represents an octet as per iso-8859-14. +This function takes a plain text string (the `input` parameter) and encodes it according to iso-8859-14. The return value is an environment-agnostic `Uint16Array` of which each element represents an octet as per iso-8859-14. ```js const encodedData = iso885914.encode(text); ``` -The optional `options` object and its `mode` property can be used to set the [error mode](https://encoding.spec.whatwg.org/#error-mode). For encoding, the error mode can be `'fatal'` (the default) or `'html'`. +The optional `options` object and its `mode` property can be used to set the error mode. The two available error modes are `'fatal'` (the default) or `'replacement'`. (Note: This differs from [the spec](https://encoding.spec.whatwg.org/#error-mode), which recognizes `'fatal`' and `html` modes for encoders. The reason behind this difference is that the spec algorithm is aimed at producing HTML, whereas this library encodes into an environment-agnostic `Uint16Array` of bytes.) ```js const encodedData = iso885914.encode(text, { - mode: 'html' + mode: 'replacement' }); // If `text` contains a symbol that cannot be represented in iso-8859-14, -// instead of throwing an error, it will return an HTML entity for the symbol. +// instead of throwing an error, it becomes 0xFFFD. ``` ### `iso885914.decode(input, options)` -This function takes a byte string (the `input` parameter) and decodes it according to iso-8859-14. +This function decodes `input` according to iso-8859-14. The `input` parameter can either be a `Uint16Array` of which each element represents an octet as per iso-8859-14, or a ‘byte string’ (i.e. a string of which each item represents an octet as per iso-8859-14). ```js const text = iso885914.decode(encodedData); @@ -62,8 +62,6 @@ const text = iso885914.decode(encodedData, { // instead of replacing it with U+FFFD in the output, an error is thrown. ``` -For decoding a buffer (e.g. from `fs.readFile`) use `buffer.toString('binary')` to get the byte string which `decode` takes. - ## Notes [Similar modules for other single-byte legacy encodings are available.](https://www.npmjs.com/browse/keyword/legacy-encoding) diff --git a/data/encoded.json b/data/encoded.json index 1be7fbc..b7a86e8 100644 --- a/data/encoded.json +++ b/data/encoded.json @@ -1 +1,130 @@ -"\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AA\u00AB\u00AC\u00AD\u00AE\u00AF\u00B0\u00B1\u00B2\u00B3\u00B4\u00B5\u00B6\u00B7\u00B8\u00B9\u00BA\u00BB\u00BC\u00BD\u00BE\u00BF\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D7\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F7\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF" +[ + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255 +] diff --git a/iso-8859-14.mjs b/iso-8859-14.mjs index eaed4c2..d7a976f 100644 --- a/iso-8859-14.mjs +++ b/iso-8859-14.mjs @@ -264,12 +264,17 @@ const INDEX_BY_POINTER = new Map([ ]); // https://encoding.spec.whatwg.org/#error-mode -const error = (codePoint, mode) => { - if (mode == 'replacement') { +const decodingError = (mode) => { + if (mode === 'replacement') { return '\uFFFD'; } - if (codePoint !== null && mode === 'html') { - return '&#' + codePoint + ';'; + // Else, `mode == 'fatal'`. + throw new Error(); +}; + +const encodingError = (mode) => { + if (mode === 'replacement') { + return 0xFFFD; } // Else, `mode == 'fatal'`. throw new Error(); @@ -286,24 +291,36 @@ export const decode = (input, options) => { if (mode !== 'replacement' && mode !== 'fatal') { mode = 'replacement'; } + + const length = input.length; + + // Support byte strings as input. + if (typeof input === 'string') { + const bytes = new Uint16Array(length); + for (let index = 0; index < length; index++) { + bytes[index] = input.charCodeAt(index); + } + input = bytes; + } + const buffer = []; - for (let index = 0; index < input.length; index++) { - const byteValue = input.charCodeAt(index); - // “If `byte` is in the range `0x00` to `0x7F`, return a code point whose - // value is `byte`.” + for (let index = 0; index < length; index++) { + const byteValue = input[index]; + // “If `byte` is an ASCII byte, return a code point whose value is + // `byte`.” if (0x00 <= byteValue && byteValue <= 0x7F) { buffer.push(stringFromCharCode(byteValue)); continue; } // “Let `code point` be the index code point for `byte − 0x80` in index - // `single-byte`.” + // single-byte.” const pointer = byteValue - 0x80; if (INDEX_BY_POINTER.has(pointer)) { // “Return a code point whose value is `code point`.” buffer.push(INDEX_BY_POINTER.get(pointer)); } else { // “If `code point` is `null`, return `error`.” - buffer.push(error(null, mode)); + buffer.push(decodingError(mode)); } } const result = buffer.join(''); @@ -321,27 +338,27 @@ export const encode = (input, options) => { if (mode !== 'fatal' && mode !== 'html') { mode = 'fatal'; } - const buffer = []; - for (let index = 0; index < input.length; index++) { + const length = input.length; + const result = new Uint16Array(length); + for (let index = 0; index < length; index++) { const codePoint = input.charCodeAt(index); - // “If `code point` is in the range U+0000 to U+007F, return a byte whose + // “If `code point` is an ASCII code point, return a byte whose // value is `code point`.” if (0x00 <= codePoint && codePoint <= 0x7F) { - buffer.push(stringFromCharCode(codePoint)); + result[index] = codePoint; continue; } // “Let `pointer` be the index pointer for `code point` in index - // `single-byte`.” + // single-byte.” if (INDEX_BY_CODE_POINT.has(codePoint)) { const pointer = INDEX_BY_CODE_POINT.get(codePoint); // “Return a byte whose value is `pointer + 0x80`.” - buffer.push(stringFromCharCode(pointer + 0x80)); + result[index] = pointer + 0x80; } else { // “If `pointer` is `null`, return `error` with `code point`.” - buffer.push(error(codePoint, mode)); + result[index] = encodingError(mode); } } - const result = buffer.join(''); return result; }; diff --git a/scripts/export-data.js b/scripts/export-data.js index 602b50a..7f93809 100644 --- a/scripts/export-data.js +++ b/scripts/export-data.js @@ -15,7 +15,7 @@ function objectToMap(object) { module.exports = { labels: jsesc(readJSON('data/labels.json'), { compact: false }), - encoded: jsesc(readJSON('data/encoded.json'), { wrap: true }), + encoded: jsesc(readJSON('data/encoded.json'), { wrap: true, numbers: 'hexadecimal' }), decoded: jsesc(readJSON('data/decoded.json'), { wrap: true }), indexByCodePoint: jsesc(objectToMap(readJSON('data/index-by-code-point.json')), { compact: false }), indexByPointer: jsesc(objectToMap(readJSON('data/index-by-pointer.json')), { compact: false }), diff --git a/scripts/transform-data.js b/scripts/transform-data.js index 27f3af5..ca0f574 100644 --- a/scripts/transform-data.js +++ b/scripts/transform-data.js @@ -13,7 +13,7 @@ function parse(source) { const indexByCodePoint = {}; const indexByPointer = {}; let decoded = ''; - let encoded = ''; + const encoded = []; var lines = source.split('\n'); for (const line of lines) { const data = line.trim().split('\t'); @@ -24,7 +24,7 @@ function parse(source) { const codePoint = Number(data[1]); const symbol = String.fromCodePoint(codePoint); decoded += symbol; - encoded += String.fromCodePoint(pointer + 0x80); + encoded.push(pointer + 0x80); indexByCodePoint[codePoint] = pointer; indexByPointer[pointer] = symbol; } diff --git a/src/iso-8859-14.src.mjs b/src/iso-8859-14.src.mjs index 0649a6c..66bb62d 100644 --- a/src/iso-8859-14.src.mjs +++ b/src/iso-8859-14.src.mjs @@ -6,12 +6,17 @@ const INDEX_BY_CODE_POINT = <%= indexByCodePoint %>; const INDEX_BY_POINTER = <%= indexByPointer %>; // https://encoding.spec.whatwg.org/#error-mode -const error = (codePoint, mode) => { - if (mode == 'replacement') { +const decodingError = (mode) => { + if (mode === 'replacement') { return '\uFFFD'; } - if (codePoint !== null && mode === 'html') { - return '&#' + codePoint + ';'; + // Else, `mode == 'fatal'`. + throw new Error(); +}; + +const encodingError = (mode) => { + if (mode === 'replacement') { + return 0xFFFD; } // Else, `mode == 'fatal'`. throw new Error(); @@ -28,24 +33,36 @@ export const decode = (input, options) => { if (mode !== 'replacement' && mode !== 'fatal') { mode = 'replacement'; } + + const length = input.length; + + // Support byte strings as input. + if (typeof input === 'string') { + const bytes = new Uint16Array(length); + for (let index = 0; index < length; index++) { + bytes[index] = input.charCodeAt(index); + } + input = bytes; + } + const buffer = []; - for (let index = 0; index < input.length; index++) { - const byteValue = input.charCodeAt(index); - // “If `byte` is in the range `0x00` to `0x7F`, return a code point whose - // value is `byte`.” + for (let index = 0; index < length; index++) { + const byteValue = input[index]; + // “If `byte` is an ASCII byte, return a code point whose value is + // `byte`.” if (0x00 <= byteValue && byteValue <= 0x7F) { buffer.push(stringFromCharCode(byteValue)); continue; } // “Let `code point` be the index code point for `byte − 0x80` in index - // `single-byte`.” + // single-byte.” const pointer = byteValue - 0x80; if (INDEX_BY_POINTER.has(pointer)) { // “Return a code point whose value is `code point`.” buffer.push(INDEX_BY_POINTER.get(pointer)); } else { // “If `code point` is `null`, return `error`.” - buffer.push(error(null, mode)); + buffer.push(decodingError(mode)); } } const result = buffer.join(''); @@ -63,27 +80,27 @@ export const encode = (input, options) => { if (mode !== 'fatal' && mode !== 'html') { mode = 'fatal'; } - const buffer = []; - for (let index = 0; index < input.length; index++) { + const length = input.length; + const result = new Uint16Array(length); + for (let index = 0; index < length; index++) { const codePoint = input.charCodeAt(index); - // “If `code point` is in the range U+0000 to U+007F, return a byte whose + // “If `code point` is an ASCII code point, return a byte whose // value is `code point`.” if (0x00 <= codePoint && codePoint <= 0x7F) { - buffer.push(stringFromCharCode(codePoint)); + result[index] = codePoint; continue; } // “Let `pointer` be the index pointer for `code point` in index - // `single-byte`.” + // single-byte.” if (INDEX_BY_CODE_POINT.has(codePoint)) { const pointer = INDEX_BY_CODE_POINT.get(codePoint); // “Return a byte whose value is `pointer + 0x80`.” - buffer.push(stringFromCharCode(pointer + 0x80)); + result[index] = pointer + 0x80; } else { // “If `pointer` is `null`, return `error` with `code point`.” - buffer.push(error(codePoint, mode)); + result[index] = encodingError(mode); } } - const result = buffer.join(''); return result; }; diff --git a/tests/tests.mjs b/tests/tests.mjs index f00d1f6..170c259 100644 --- a/tests/tests.mjs +++ b/tests/tests.mjs @@ -3,14 +3,23 @@ import assert from 'node:assert'; import * as iso885914 from '../iso-8859-14.mjs'; console.log('Testing `iso885914.encode`…'); -assert.strictEqual( +assert.deepStrictEqual( iso885914.encode('\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F'), - '\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F', + new Uint16Array([ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + ]), 'U+0000 to U+007F remain unchanged' ); -assert.strictEqual( +assert.deepStrictEqual( iso885914.encode('\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\u1E02\u1E03\xA3\u010A\u010B\u1E0A\xA7\u1E80\xA9\u1E82\u1E0B\u1EF2\xAD\xAE\u0178\u1E1E\u1E1F\u0120\u0121\u1E40\u1E41\xB6\u1E56\u1E81\u1E57\u1E83\u1E60\u1EF3\u1E84\u1E85\u1E61\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\u0174\xD1\xD2\xD3\xD4\xD5\xD6\u1E6A\xD8\xD9\xDA\xDB\xDC\xDD\u0176\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\u0175\xF1\xF2\xF3\xF4\xF5\xF6\u1E6B\xF8\xF9\xFA\xFB\xFC\xFD\u0177\xFF'), - '\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF', + new Uint16Array([0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF]), 'Encoding all other symbols in the character set' ); assert.throws( @@ -41,70 +50,69 @@ assert.throws( Error, 'Mode names are case-insensitive' ); -assert.strictEqual( - iso885914.encode('\uFFFF', { mode: 'html' }), - '￿', - 'Encoding a code point that is invalid for this encoding returns an HTML entity in `html` mode' -); -assert.strictEqual( - iso885914.encode('\uFFFF', { mode: 'HTML' }), - '￿', - 'Mode names are case-insensitive' -); -assert.strictEqual( - iso885914.encode('\uFFFF', { mode: 'hTmL' }), - '￿', - 'Mode names are case-insensitive' -); console.log('Testing `iso885914.decode`…'); -assert.strictEqual( +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + ])), + '\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F', + 'U+0000 to U+007F remain unchanged when decoding from ArrayBuffer' +); +assert.deepStrictEqual( iso885914.decode('\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F'), '\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F', - 'U+0000 to U+007F remain unchanged' + 'U+0000 to U+007F remain unchanged when decoding from byte string' ); -assert.strictEqual( - iso885914.decode('\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF'), +assert.deepStrictEqual( + iso885914.decode([0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF]), '\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\u1E02\u1E03\xA3\u010A\u010B\u1E0A\xA7\u1E80\xA9\u1E82\u1E0B\u1EF2\xAD\xAE\u0178\u1E1E\u1E1F\u0120\u0121\u1E40\u1E41\xB6\u1E56\u1E81\u1E57\u1E83\u1E60\u1EF3\u1E84\u1E85\u1E61\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\u0174\xD1\xD2\xD3\xD4\xD5\xD6\u1E6A\xD8\xD9\xDA\xDB\xDC\xDD\u0176\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\u0175\xF1\xF2\xF3\xF4\xF5\xF6\u1E6B\xF8\xF9\xFA\xFB\xFC\xFD\u0177\xFF', - 'Decoding all other symbols in the character set' + 'Decoding all other symbols in the character set when decoding from ArrayBuffer' ); -assert.strictEqual( - iso885914.decode('\uFFFF'), +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([0xFFFF])), '\uFFFD', 'Decoding a byte that is invalid for this encoding returns U+FFFD in `replacement` mode, which is the implied default for `decode()`' ); -assert.strictEqual( - iso885914.decode('\uFFFF', { mode: 'replacement' }), +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'replacement' }), '\uFFFD', 'Decoding a byte that is invalid for this encoding returns U+FFFD in `replacement` mode' ); -assert.strictEqual( - iso885914.decode('\uFFFF', { mode: 'REPLACEMENT' }), +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'REPLACEMENT' }), '\uFFFD', 'Mode names are case-insensitive' ); -assert.strictEqual( - iso885914.decode('\uFFFF', { mode: 'rEpLaCeMeNt' }), +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'rEpLaCeMeNt' }), '\uFFFD', 'Mode names are case-insensitive' ); assert.throws( () => { - iso885914.decode('\uFFFF', { mode: 'fatal' }); + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'fatal' }); }, Error, 'Decoding a byte that is invalid for this encoding throws an error in `fatal` mode' ); assert.throws( () => { - iso885914.decode('\uFFFF', { mode: 'FATAL' }); + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'FATAL' }); }, Error, 'Decoding a byte that is invalid for this encoding throws an error in `fatal` mode' ); assert.throws( () => { - iso885914.decode('\uFFFF', { mode: 'fAtAl' }); + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'fAtAl' }); }, Error, 'Mode names are case-insensitive' diff --git a/tests/tests.src.mjs b/tests/tests.src.mjs index 6112bb4..7d5f6f8 100644 --- a/tests/tests.src.mjs +++ b/tests/tests.src.mjs @@ -3,14 +3,23 @@ import assert from 'node:assert'; import * as iso885914 from '../iso-8859-14.mjs'; console.log('Testing `iso885914.encode`…'); -assert.strictEqual( +assert.deepStrictEqual( iso885914.encode('\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F'), - '\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F', + new Uint16Array([ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + ]), 'U+0000 to U+007F remain unchanged' ); -assert.strictEqual( +assert.deepStrictEqual( iso885914.encode(<%= decoded %>), - <%= encoded %>, + new Uint16Array(<%= encoded %>), 'Encoding all other symbols in the character set' ); assert.throws( @@ -41,70 +50,69 @@ assert.throws( Error, 'Mode names are case-insensitive' ); -assert.strictEqual( - iso885914.encode('\uFFFF', { mode: 'html' }), - '￿', - 'Encoding a code point that is invalid for this encoding returns an HTML entity in `html` mode' -); -assert.strictEqual( - iso885914.encode('\uFFFF', { mode: 'HTML' }), - '￿', - 'Mode names are case-insensitive' -); -assert.strictEqual( - iso885914.encode('\uFFFF', { mode: 'hTmL' }), - '￿', - 'Mode names are case-insensitive' -); console.log('Testing `iso885914.decode`…'); -assert.strictEqual( +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + ])), + '\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F', + 'U+0000 to U+007F remain unchanged when decoding from ArrayBuffer' +); +assert.deepStrictEqual( iso885914.decode('\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F'), '\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F', - 'U+0000 to U+007F remain unchanged' + 'U+0000 to U+007F remain unchanged when decoding from byte string' ); -assert.strictEqual( +assert.deepStrictEqual( iso885914.decode(<%= encoded %>), <%= decoded %>, - 'Decoding all other symbols in the character set' + 'Decoding all other symbols in the character set when decoding from ArrayBuffer' ); -assert.strictEqual( - iso885914.decode('\uFFFF'), +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([0xFFFF])), '\uFFFD', 'Decoding a byte that is invalid for this encoding returns U+FFFD in `replacement` mode, which is the implied default for `decode()`' ); -assert.strictEqual( - iso885914.decode('\uFFFF', { mode: 'replacement' }), +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'replacement' }), '\uFFFD', 'Decoding a byte that is invalid for this encoding returns U+FFFD in `replacement` mode' ); -assert.strictEqual( - iso885914.decode('\uFFFF', { mode: 'REPLACEMENT' }), +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'REPLACEMENT' }), '\uFFFD', 'Mode names are case-insensitive' ); -assert.strictEqual( - iso885914.decode('\uFFFF', { mode: 'rEpLaCeMeNt' }), +assert.deepStrictEqual( + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'rEpLaCeMeNt' }), '\uFFFD', 'Mode names are case-insensitive' ); assert.throws( () => { - iso885914.decode('\uFFFF', { mode: 'fatal' }); + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'fatal' }); }, Error, 'Decoding a byte that is invalid for this encoding throws an error in `fatal` mode' ); assert.throws( () => { - iso885914.decode('\uFFFF', { mode: 'FATAL' }); + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'FATAL' }); }, Error, 'Decoding a byte that is invalid for this encoding throws an error in `fatal` mode' ); assert.throws( () => { - iso885914.decode('\uFFFF', { mode: 'fAtAl' }); + iso885914.decode(new Uint16Array([0xFFFF]), { mode: 'fAtAl' }); }, Error, 'Mode names are case-insensitive'