From b2b4748089890f125cfad6caf70ee179255a46e4 Mon Sep 17 00:00:00 2001 From: Anne van Kesteren Date: Wed, 18 Sep 2024 09:49:13 +0200 Subject: [PATCH 1/2] Encoding: impact of GB18030-2022 on GBK --- .../legacy-mb-schinese/gbk/gbk-decoder.any.js | 2 + .../legacy-mb-schinese/gbk/gbk-encoder.html | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js b/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js index c0221480da156d..b7f1ca9c51e88f 100644 --- a/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js +++ b/encoding/legacy-mb-schinese/gbk/gbk-decoder.any.js @@ -1,3 +1,5 @@ +// Additional tests can be found in ../gb18030/gb18030-decoder.any.js + const gbkPointers = [ 6432, 7533, 7536, 7672, 7673, 7674, 7675, 7676, 7677, 7678, 7679, 7680, 7681, 7682, 7683, 7684, 23766, 23770, 23771, 23772, 23773, 23774, 23776, 23777, 23778, 23779, 23780, 23781, 23782, 23784, 23785, 23786, diff --git a/encoding/legacy-mb-schinese/gbk/gbk-encoder.html b/encoding/legacy-mb-schinese/gbk/gbk-encoder.html index e43cb73fea72e5..13ed925598ed8c 100644 --- a/encoding/legacy-mb-schinese/gbk/gbk-encoder.html +++ b/encoding/legacy-mb-schinese/gbk/gbk-encoder.html @@ -23,4 +23,43 @@ encode("\u00A5", "%26%23165%3B", "legacy WebKit case 1"); encode("\u22EF", "%26%238943%3B", "legacy WebKit case 2"); encode("\u301C", "%26%2312316%3B", "legacy WebKit case 3"); + encode("\u{10FFFF}", "%26%231114111%3B", "U+10FFFF"); + + // Test that GBK is not impacted by GB18030-2022 (this is still under discussion) + encode("\uFE10", "%26%2365040%3B", "GB18030-2022 1"); + encode("\uFE12", "%26%2365042%3B", "GB18030-2022 2"); + encode("\uFE11", "%26%2365041%3B", "GB18030-2022 3"); + encode("\uFE13", "%26%2365043%3B", "GB18030-2022 4"); + encode("\uFE14", "%26%2365044%3B", "GB18030-2022 5"); + encode("\uFE15", "%26%2365045%3B", "GB18030-2022 6"); + encode("\uFE16", "%26%2365046%3B", "GB18030-2022 7"); + encode("\uFE17", "%26%2365047%3B", "GB18030-2022 8"); + encode("\uFE18", "%26%2365048%3B", "GB18030-2022 9"); + encode("\uFE19", "%26%2365049%3B", "GB18030-2022 10"); + encode("\u9FB4", "%26%2340884%3B", "GB18030-2022 11"); + encode("\u9FB5", "%26%2340885%3B", "GB18030-2022 12"); + encode("\u9FB6", "%26%2340886%3B", "GB18030-2022 13"); + encode("\u9FB7", "%26%2340887%3B", "GB18030-2022 14"); + encode("\u9FB8", "%26%2340888%3B", "GB18030-2022 15"); + encode("\u9FB9", "%26%2340889%3B", "GB18030-2022 16"); + encode("\u9FBA", "%26%2340890%3B", "GB18030-2022 17"); + encode("\u9FBB", "%26%2340891%3B", "GB18030-2022 18"); + encode("\uE78D", "%A6%D9", "GB18030-2022 19"); + encode("\uE78E", "%A6%DA", "GB18030-2022 20"); + encode("\uE78F", "%A6%DB", "GB18030-2022 21"); + encode("\uE790", "%A6%DC", "GB18030-2022 22"); + encode("\uE791", "%A6%DD", "GB18030-2022 23"); + encode("\uE792", "%A6%DE", "GB18030-2022 24"); + encode("\uE793", "%A6%DF", "GB18030-2022 25"); + encode("\uE794", "%A6%EC", "GB18030-2022 26"); + encode("\uE795", "%A6%ED", "GB18030-2022 27"); + encode("\uE796", "%A6%F3", "GB18030-2022 28"); + encode("\uE81E", "%FEY", "GB18030-2022 29"); + encode("\uE826", "%FEa", "GB18030-2022 30"); + encode("\uE82B", "%FEf", "GB18030-2022 31"); + encode("\uE82C", "%FEg", "GB18030-2022 32"); + encode("\uE832", "%FEm", "GB18030-2022 33"); + encode("\uE843", "%FE~", "GB18030-2022 34"); + encode("\uE854", "%FE%90", "GB18030-2022 35"); + encode("\uE864", "%FE%A0", "GB18030-2022 36"); From d057afc0a798116a72c817f7f049deb2f9898443 Mon Sep 17 00:00:00 2001 From: Anne van Kesteren Date: Wed, 18 Sep 2024 16:09:22 +0200 Subject: [PATCH 2/2] same expectations as gb18030 --- .../legacy-mb-schinese/gbk/gbk-encoder.html | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/encoding/legacy-mb-schinese/gbk/gbk-encoder.html b/encoding/legacy-mb-schinese/gbk/gbk-encoder.html index 13ed925598ed8c..11557242e3dc65 100644 --- a/encoding/legacy-mb-schinese/gbk/gbk-encoder.html +++ b/encoding/legacy-mb-schinese/gbk/gbk-encoder.html @@ -25,25 +25,25 @@ encode("\u301C", "%26%2312316%3B", "legacy WebKit case 3"); encode("\u{10FFFF}", "%26%231114111%3B", "U+10FFFF"); - // Test that GBK is not impacted by GB18030-2022 (this is still under discussion) - encode("\uFE10", "%26%2365040%3B", "GB18030-2022 1"); - encode("\uFE12", "%26%2365042%3B", "GB18030-2022 2"); - encode("\uFE11", "%26%2365041%3B", "GB18030-2022 3"); - encode("\uFE13", "%26%2365043%3B", "GB18030-2022 4"); - encode("\uFE14", "%26%2365044%3B", "GB18030-2022 5"); - encode("\uFE15", "%26%2365045%3B", "GB18030-2022 6"); - encode("\uFE16", "%26%2365046%3B", "GB18030-2022 7"); - encode("\uFE17", "%26%2365047%3B", "GB18030-2022 8"); - encode("\uFE18", "%26%2365048%3B", "GB18030-2022 9"); - encode("\uFE19", "%26%2365049%3B", "GB18030-2022 10"); - encode("\u9FB4", "%26%2340884%3B", "GB18030-2022 11"); - encode("\u9FB5", "%26%2340885%3B", "GB18030-2022 12"); - encode("\u9FB6", "%26%2340886%3B", "GB18030-2022 13"); - encode("\u9FB7", "%26%2340887%3B", "GB18030-2022 14"); - encode("\u9FB8", "%26%2340888%3B", "GB18030-2022 15"); - encode("\u9FB9", "%26%2340889%3B", "GB18030-2022 16"); - encode("\u9FBA", "%26%2340890%3B", "GB18030-2022 17"); - encode("\u9FBB", "%26%2340891%3B", "GB18030-2022 18"); + // GB18030-2022 + encode("\uFE10", "%A6%D9", "GB18030-2022 1"); + encode("\uFE12", "%A6%DA", "GB18030-2022 2"); + encode("\uFE11", "%A6%DB", "GB18030-2022 3"); + encode("\uFE13", "%A6%DC", "GB18030-2022 4"); + encode("\uFE14", "%A6%DD", "GB18030-2022 5"); + encode("\uFE15", "%A6%DE", "GB18030-2022 6"); + encode("\uFE16", "%A6%DF", "GB18030-2022 7"); + encode("\uFE17", "%A6%EC", "GB18030-2022 8"); + encode("\uFE18", "%A6%ED", "GB18030-2022 9"); + encode("\uFE19", "%A6%F3", "GB18030-2022 10"); + encode("\u9FB4", "%FEY", "GB18030-2022 11"); + encode("\u9FB5", "%FEa", "GB18030-2022 12"); + encode("\u9FB6", "%FEf", "GB18030-2022 13"); + encode("\u9FB7", "%FEg", "GB18030-2022 14"); + encode("\u9FB8", "%FEm", "GB18030-2022 15"); + encode("\u9FB9", "%FE~", "GB18030-2022 16"); + encode("\u9FBA", "%FE%90", "GB18030-2022 17"); + encode("\u9FBB", "%FE%A0", "GB18030-2022 18"); encode("\uE78D", "%A6%D9", "GB18030-2022 19"); encode("\uE78E", "%A6%DA", "GB18030-2022 20"); encode("\uE78F", "%A6%DB", "GB18030-2022 21");