test: pull enconding WPT test fixtures

PR-URL: nodejs#25321 Reviewed-By: Rich Trott <[email protected]> Reviewed-By: James M Snell <[email protected]> Reviewed-By: Ruben Bridgewater <[email protected]>
BridgeAR · Jan 16, 2019 · cffffa4 · cffffa4
1 parent 0d92b0f
commit cffffa4
Show file tree

Hide file tree

Showing 323 changed files with 24,110 additions and 10 deletions.
diff --git a/test/fixtures/wpt/README.md b/test/fixtures/wpt/README.md
@@ -10,10 +10,11 @@ See [test/wpt](../../wpt/README.md) for information on how these tests are run.
 
 Last update:
 
-- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
-- interfaces: https://github.com/web-platform-tests/wpt/tree/db7f86289e/interfaces
 - console: https://github.com/web-platform-tests/wpt/tree/9786a4b131/console
+- encoding: https://github.com/web-platform-tests/wpt/tree/a093a659ed/encoding
 - url: https://github.com/web-platform-tests/wpt/tree/75b0f336c5/url
+- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
+- interfaces: https://github.com/web-platform-tests/wpt/tree/712c9f275e/interfaces
 
 [Web Platform Tests]: https://github.com/web-platform-tests/wpt
 [`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/master/docs/git-node.md#git-node-wpt
diff --git a/test/fixtures/wpt/encoding/META.yml b/test/fixtures/wpt/encoding/META.yml
@@ -0,0 +1,4 @@
+spec: https://encoding.spec.whatwg.org/
+suggested_reviewers:
+  - inexorabletash
+  - annevk
diff --git a/test/fixtures/wpt/encoding/api-basics.any.js b/test/fixtures/wpt/encoding/api-basics.any.js
@@ -0,0 +1,52 @@
+// META: title=Encoding API: Basics
+
+test(function() {
+    assert_equals((new TextEncoder).encoding, 'utf-8', 'default encoding is utf-8');
+    assert_equals((new TextDecoder).encoding, 'utf-8', 'default encoding is utf-8');
+}, 'Default encodings');
+
+test(function() {
+    assert_array_equals(new TextEncoder().encode(), [], 'input default should be empty string')
+    assert_array_equals(new TextEncoder().encode(undefined), [], 'input default should be empty string')
+}, 'Default inputs');
+
+
+function testDecodeSample(encoding, string, bytes) {
+  test(function() {
+    assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
+    assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
+  }, 'Decode sample: ' + encoding);
+}
+
+// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34),
+// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD)
+// byte-swapped BOM (non-character U+FFFE)
+var sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE';
+
+test(function() {
+  var encoding = 'utf-8';
+  var string = sample;
+  var bytes = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE];
+  var encoded = new TextEncoder().encode(string);
+  assert_array_equals([].slice.call(encoded), bytes);
+  assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
+  assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
+}, 'Encode/decode round trip: utf-8');
+
+testDecodeSample(
+  'utf-16le',
+  sample,
+  [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
+);
+
+testDecodeSample(
+  'utf-16be',
+  sample,
+  [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE]
+);
+
+testDecodeSample(
+  'utf-16',
+  sample,
+  [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
+);
diff --git a/test/fixtures/wpt/encoding/api-invalid-label.any.js b/test/fixtures/wpt/encoding/api-invalid-label.any.js
@@ -0,0 +1,24 @@
+// META: title=Encoding API: invalid label
+// META: timeout=long
+// META: script=resources/encodings.js
+
+var tests = ["invalid-invalidLabel"];
+setup(function() {
+  encodings_table.forEach(function(section) {
+    section.encodings.forEach(function(encoding) {
+      encoding.labels.forEach(function(label) {
+        ["\u0000", "\u000b", "\u00a0", "\u2028", "\u2029"].forEach(function(ws) {
+          tests.push(ws + label);
+          tests.push(label + ws);
+          tests.push(ws + label + ws);
+        });
+      });
+    });
+  });
+});
+
+tests.forEach(function(input) {
+  test(function() {
+    assert_throws(new RangeError(), function() { new TextDecoder(input); });
+  }, 'Invalid label ' + format_value(input) + ' should be rejected by TextDecoder.');
+});
diff --git a/test/fixtures/wpt/encoding/api-replacement-encodings.any.js b/test/fixtures/wpt/encoding/api-replacement-encodings.any.js
@@ -0,0 +1,14 @@
+// META: title=Encoding API: replacement encoding
+// META: script=resources/encodings.js
+
+encodings_table.forEach(function(section) {
+    section.encodings.filter(function(encoding) {
+        return encoding.name === 'replacement';
+    }).forEach(function(encoding) {
+        encoding.labels.forEach(function(label) {
+            test(function() {
+                assert_throws(new RangeError(), function() { new TextDecoder(label); });
+            }, 'Label for "replacement" should be rejected by API: ' + label);
+        });
+    });
+});
diff --git a/test/fixtures/wpt/encoding/api-surrogates-utf8.any.js b/test/fixtures/wpt/encoding/api-surrogates-utf8.any.js
@@ -0,0 +1,48 @@
+// META: title=Encoding API: Invalid UTF-16 surrogates with UTF-8 encoding
+
+var badStrings = [
+    {
+        input: 'abc123',
+        expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
+        decoded: 'abc123',
+        name: 'Sanity check'
+    },
+    {
+        input: '\uD800',
+        expected: [0xef, 0xbf, 0xbd],
+        decoded: '\uFFFD',
+        name: 'Surrogate half (low)'
+    },
+    {
+        input: '\uDC00',
+        expected: [0xef, 0xbf, 0xbd],
+        decoded: '\uFFFD',
+        name: 'Surrogate half (high)'
+    },
+    {
+        input: 'abc\uD800123',
+        expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
+        decoded: 'abc\uFFFD123',
+        name: 'Surrogate half (low), in a string'
+    },
+    {
+        input: 'abc\uDC00123',
+        expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
+        decoded: 'abc\uFFFD123',
+        name: 'Surrogate half (high), in a string'
+    },
+    {
+        input: '\uDC00\uD800',
+        expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
+        decoded: '\uFFFD\uFFFD',
+        name: 'Wrong order'
+    }
+];
+
+badStrings.forEach(function(t) {
+    test(function() {
+        var encoded = new TextEncoder().encode(t.input);
+        assert_array_equals([].slice.call(encoded), t.expected);
+        assert_equals(new TextDecoder('utf-8').decode(encoded), t.decoded);
+    }, 'Invalid surrogates encoded into UTF-8: ' + t.name);
+});
diff --git a/test/fixtures/wpt/encoding/big5-encoder.html b/test/fixtures/wpt/encoding/big5-encoder.html
@@ -0,0 +1,33 @@
+<!doctype html>
+<meta charset=big5> <!-- test breaks if the server overrides this -->
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<div id=log></div>
+<script>
+ function encode(input, output, desc) {
+   test(function() {
+     var a = document.createElement("a"); // <a> uses document encoding for URL's query
+     // Append and prepend X to test for off-by-one errors
+     a.href = "https://example.com/?X" + input + "X";
+     assert_equals(a.search.substr(1), "X" + output + "X"); // remove leading "?"
+   }, "big5 encoder: " + desc);
+ }
+
+ encode("ab", "ab", "very basic")
+ // edge cases
+ encode("\u9EA6", "%26%2340614%3B", "Highest-pointer BMP character excluded from encoder");
+ encode("\uD858\uDE6B", "%26%23156267%3B", "Highest-pointer character excluded from encoder");
+ encode("\u3000", "%A1@", "Lowest-pointer character included in encoder");
+ encode("\u20AC", "%A3%E1", "Euro; the highest-pointer character before a range of 30 unmapped pointers");
+ encode("\u4E00", "%A4@", "The lowest-pointer character after the range of 30 unmapped pointers");
+ encode("\uD85D\uDE07", "%C8%A4", "The highest-pointer character before a range of 41 unmapped pointers");
+ encode("\uFFE2", "%C8%CD", "The lowest-pointer character after the range of 41 unmapped pointers");
+ encode("\u79D4", "%FE%FE", "The last character in the index");
+ // not in index
+ encode("\u2603", "%26%239731%3B", "The canonical BMP test character that is not in the index");
+ encode("\uD83D\uDCA9", "%26%23128169%3B", "The canonical astral test character that is not in the index");
+ // duplicate low bits
+ encode("\uD840\uDFB5", "%FDj", "A Plane 2 character whose low 16 bits match a BMP character that has a lower pointer");
+ // prefer last
+ encode("\u2550", "%F9%F9", "A duplicate-mapped code point that prefers the highest pointer in the encoder");
+</script>
diff --git a/test/fixtures/wpt/encoding/eof-shift_jis-ref.html b/test/fixtures/wpt/encoding/eof-shift_jis-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=shift_jis>
+<title>Shift_JIS file ending with a truncated sequence</title>
+One-byte truncated sequence:&#xFFFD;
diff --git a/test/fixtures/wpt/encoding/eof-shift_jis.html b/test/fixtures/wpt/encoding/eof-shift_jis.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=shift_jis>
+<title>Shift_JIS file ending with a truncated sequence</title>
+<link rel=match href=/encoding/eof-shift_jis-ref.html>
+One-byte truncated sequence:�
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-one-ref.html b/test/fixtures/wpt/encoding/eof-utf-8-one-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a one-byte truncated sequence</title>
+One-byte truncated sequence:&#xFFFD;
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-one.html b/test/fixtures/wpt/encoding/eof-utf-8-one.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a one-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-one-ref.html">
+One-byte truncated sequence:�
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-three-ref.html b/test/fixtures/wpt/encoding/eof-utf-8-three-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a three-byte truncated sequence</title>
+Three-byte truncated sequence:&#xFFFD;
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-three.html b/test/fixtures/wpt/encoding/eof-utf-8-three.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a three-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-three-ref.html">
+Three-byte truncated sequence:�
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-two-ref.html b/test/fixtures/wpt/encoding/eof-utf-8-two-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a two-byte truncated sequence</title>
+Two-byte truncated sequence:&#xFFFD;
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-two.html b/test/fixtures/wpt/encoding/eof-utf-8-two.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a two-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-two-ref.html">
+Two-byte truncated sequence:�
diff --git a/test/fixtures/wpt/encoding/gb18030-encoder.html b/test/fixtures/wpt/encoding/gb18030-encoder.html
@@ -0,0 +1,21 @@
+<!doctype html>
+<meta charset=gb18030> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<div id=log></div>
+<script>
+ function encode(input, output, desc) {
+   test(function() {
+     var a = document.createElement("a") // <a> uses document encoding for URL's query
+     a.href = "https://example.com/?" + input
+     assert_equals(a.search.substr(1), output) // remove leading "?"
+   }, "gb18030 encoder: " + desc)
+ }
+
+ encode("s", "s", "very basic")
+ encode("\u20AC", "%A2%E3", "Euro")
+ encode("\u4E02", "%81@", "character")
+ encode("\uE4C6", "%A1@", "PUA")
+ encode("\uE4C5", "%FE%FE", "PUA #2")
+ encode("\ud83d\udca9", "%949%DA3", "poo")
+</script>
diff --git a/test/fixtures/wpt/encoding/gbk-encoder.html b/test/fixtures/wpt/encoding/gbk-encoder.html
@@ -0,0 +1,21 @@
+<!doctype html>
+<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<div id=log></div>
+<script>
+ function encode(input, output, desc) {
+   test(function() {
+     var a = document.createElement("a") // <a> uses document encoding for URL's query
+     a.href = "https://example.com/?" + input
+     assert_equals(a.search.substr(1), output) // remove leading "?"
+   }, "gbk encoder: " + desc)
+ }
+
+ encode("s", "s", "very basic")
+ encode("\u20AC", "%80", "Euro")
+ encode("\u4E02", "%81@", "character")
+ encode("\uE4C6", "%A1@", "PUA")
+ encode("\uE4C5", "%FE%FE", "PUA #2")
+ encode("\ud83d\udca9", "%26%23128169%3B", "poo")
+</script>
diff --git a/test/fixtures/wpt/encoding/idlharness.any.js b/test/fixtures/wpt/encoding/idlharness.any.js
@@ -0,0 +1,14 @@
+// META: global=window,worker
+// META: script=/resources/WebIDLParser.js
+// META: script=/resources/idlharness.js
+
+idl_test(
+  ['encoding'],
+  [], // No deps
+  idl_array => {
+    idl_array.add_objects({
+      TextEncoder: ['new TextEncoder()'],
+      TextDecoder: ['new TextDecoder()']
+    });
+  }
+);
diff --git a/test/fixtures/wpt/encoding/iso-2022-jp-decoder.any.js b/test/fixtures/wpt/encoding/iso-2022-jp-decoder.any.js
@@ -0,0 +1,50 @@
+function decode(input, output, desc) {
+  test(function() {
+    var d = new TextDecoder("iso-2022-jp"),
+        buffer = new ArrayBuffer(input.length),
+        view = new Int8Array(buffer)
+    for(var i = 0, l = input.length; i < l; i++) {
+      view[i] = input[i]
+    }
+    assert_equals(d.decode(view), output)
+  }, "iso-2022-jp decoder: " + desc)
+}
+decode([0x1b, 0x24], "�$", "Error ESC")
+decode([0x1b, 0x24, 0x50], "�$P", "Error ESC, character")
+decode([0x1b, 0x28, 0x42, 0x50], "P", "ASCII ESC, character")
+decode([0x1b, 0x28, 0x42, 0x1b, 0x28, 0x42, 0x50], "�P", "Double ASCII ESC, character")
+decode([0x50, 0x1b, 0x28, 0x42, 0x50], "PP", "character, ASCII ESC, character")
+decode([0x5C, 0x5D, 0x7E], "\\]~", "characters")
+decode([0x0D, 0x0E, 0x0F, 0x10], "\x0D��\x10", "SO / SI")
+
+decode([0x1b, 0x28, 0x4A, 0x5C, 0x5D, 0x7E], "¥]‾", "Roman ESC, characters")
+decode([0x1b, 0x28, 0x4A, 0x0D, 0x0E, 0x0F, 0x10], "\x0D��\x10", "Roman ESC, SO / SI")
+decode([0x1b, 0x28, 0x4A, 0x1b, 0x1b, 0x28, 0x49, 0x50], "�ﾐ", "Roman ESC, error ESC, Katakana ESC")
+
+decode([0x1b, 0x28, 0x49, 0x50], "ﾐ", "Katakana ESC, character")
+decode([0x1b, 0x28, 0x49, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Katakana ESC, multibyte ESC, character")
+decode([0x1b, 0x28, 0x49, 0x1b, 0x50], "�ﾐ", "Katakana ESC, error ESC, character")
+decode([0x1b, 0x28, 0x49, 0x1b, 0x24, 0x50], "�､ﾐ", "Katakana ESC, error ESC #2, character")
+decode([0x1b, 0x28, 0x49, 0x50, 0x1b, 0x28, 0x49, 0x50], "ﾐﾐ", "Katakana ESC, character, Katakana ESC, character")
+decode([0x1b, 0x28, 0x49, 0x0D, 0x0E, 0x0F, 0x10], "����", "Katakana ESC, SO / SI")
+
+decode([0x1b, 0x24, 0x40, 0x50, 0x50], "佩", "Multibyte ESC, character")
+decode([0x1b, 0x24, 0x42, 0x50, 0x50], "佩", "Multibyte ESC #2, character")
+decode([0x1b, 0x24, 0x42, 0x1b, 0x50, 0x50], "�佩", "Multibyte ESC, error ESC, character")
+decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x40], "�", "Double multibyte ESC")
+decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Double multibyte ESC, character")
+decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x42, 0x50, 0x50], "�佩", "Double multibyte ESC #2, character")
+decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x50, 0x50], "�ば�", "Multibyte ESC, error ESC #2, character")
+
+decode([0x1b, 0x24, 0x40, 0x50, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Multibyte ESC, single byte, multibyte ESC, character")
+decode([0x1b, 0x24, 0x40, 0x20, 0x50], "��", "Multibyte ESC, lead error byte")
+decode([0x1b, 0x24, 0x40, 0x50, 0x20], "�", "Multibyte ESC, trail error byte")
+
+decode([0x50, 0x1b], "P�", "character, error ESC")
+decode([0x50, 0x1b, 0x24], "P�$", "character, error ESC #2")
+decode([0x50, 0x1b, 0x50], "P�P", "character, error ESC #3")
+decode([0x50, 0x1b, 0x28, 0x42], "P", "character, ASCII ESC")
+decode([0x50, 0x1b, 0x28, 0x4A], "P", "character, Roman ESC")
+decode([0x50, 0x1b, 0x28, 0x49], "P", "character, Katakana ESC")
+decode([0x50, 0x1b, 0x24, 0x40], "P", "character, Multibyte ESC")
+decode([0x50, 0x1b, 0x24, 0x42], "P", "character, Multibyte ESC #2")
diff --git a/test/fixtures/wpt/encoding/iso-2022-jp-encoder.html b/test/fixtures/wpt/encoding/iso-2022-jp-encoder.html
@@ -0,0 +1,19 @@
+<!doctype html>
+<meta charset=iso-2022-jp> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<div id=log></div>
+<script>
+ function encode(input, output, desc) {
+   test(function() {
+     var a = document.createElement("a") // <a> uses document encoding for URL's query
+     a.href = "https://example.com/?" + input
+     assert_equals(a.search.substr(1), output) // remove leading "?"
+   }, "iso-2022-jp encoder: " + desc)
+ }
+
+ encode("s", "s", "very basic")
+ encode("\u00A5\u203Es\\\uFF90\u4F69", "%1B(J\\~s%1B(B\\%1B$B%_PP%1B(B", "basics")
+ encode("\x0E\x0F\x1Bx", "%0E%0F%1Bx", "SO/SI ESC")
+ encode("\uFFFD", "%26%2365533%3B", "U+FFFD");
+</script>