From 0bdae18de018a6ec3af37e087f964075ae2c08a4 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 15 Nov 2024 16:30:33 +0100 Subject: [PATCH] =?UTF-8?q?=E6=8F=9A=E7=90=B4=20slow=202..4=20(#782)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * UnicodeData.txt lines from L2/24-071R * lb=ID like the numerals per the proposal * Han like the numerals per the proposal * Regenerate UCD * Ideographic like the numerals * Regenerate UCD * Is Lo really appropriate here? * Rename them and make them Nl * Regenerate UCD * Revert the invariant test change * fix the invariant * 死馬 * Regenerate UCD * halb --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 +++--- .../data/ucd/dev/DerivedCoreProperties.txt | 20 ++++++++++++------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- unicodetools/data/ucd/dev/PropList.txt | 5 +++-- unicodetools/data/ucd/dev/Scripts.txt | 5 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 3 +++ .../data/ucd/dev/VerticalOrientation.txt | 5 +++-- .../dev/auxiliary/SentenceBreakProperty.txt | 5 +++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 5 +++-- .../dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 9 +++++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedName.txt | 7 +++++-- .../ucd/dev/extracted/DerivedNumericType.txt | 5 +++-- .../dev/extracted/DerivedNumericValues.txt | 11 ++++++---- .../unicode/text/UCD/UnicodeInvariantTest.txt | 11 +++++----- 18 files changed, 75 insertions(+), 47 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index aa7d3e957..65de50da3 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-11-15, 13:43:01 GMT +# Date: 2024-11-15, 15:06:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2091,8 +2091,8 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 16DA0..16DA9 ; 17.0 # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY -16FF2..16FF3 ; 17.0 # [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF2..16FF6 ; 17.0 # [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS -# Total code points: 261 +# Total code points: 264 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index bd4be2002..94bb32a96 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-11-15, 13:43:27 GMT +# Date: 2024-11-15, 15:06:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1336,6 +1336,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; Alphabetic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Alphabetic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Alphabetic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -1457,7 +1458,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142935 +# Total code points: 142938 # ================================================ @@ -6907,6 +6908,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; ID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; ID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -7017,7 +7019,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141434 +# Total code points: 141437 # ================================================ @@ -8297,6 +8299,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; ID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; ID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -8445,7 +8448,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144767 +# Total code points: 144770 # ================================================ @@ -9122,6 +9125,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; XID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; XID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; XID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -9232,7 +9236,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141411 +# Total code points: 141414 # ================================================ @@ -10513,6 +10517,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; XID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; XID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; XID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -10661,7 +10666,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144748 +# Total code points: 144751 # ================================================ @@ -12734,6 +12739,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; Grapheme_Base # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Grapheme_Base # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Grapheme_Base # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -12939,7 +12945,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152953 +# Total code points: 152956 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index afa21d0f9..307e7abf1 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 13:43:35 GMT +# Date: 2024-11-15, 15:06:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2383,6 +2383,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; N # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 6ee8d51fa..a856d05be 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-11-15, 13:43:36 GMT +# Date: 2024-11-15, 15:06:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3294,6 +3294,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; NS # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index e712b4c46..bc5673c73 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-11-15, 13:43:48 GMT +# Date: 2024-11-15, 15:06:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -879,6 +879,7 @@ F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COM FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 16FF2..16FF3 ; Ideographic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Ideographic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; Ideographic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -893,7 +894,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106479 +# Total code points: 106482 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index f0140c396..9e998924e 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-11-15, 13:44:08 GMT +# Date: 2024-11-15, 15:07:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1598,6 +1598,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 16FE3 ; Han # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; Han # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Han # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 20000..2A6DF ; Han # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Han # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -1608,7 +1609,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 99032 +# Total code points: 99035 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index cb5139da8..966ba1ba5 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -30541,6 +30541,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; 16FF2;CHINESE SMALL SIMPLIFIED ER;Lm;0;L;;;;;N;;;;; 16FF3;CHINESE SMALL TRADITIONAL ER;Lm;0;L;;;;;N;;;;; +16FF4;YANGQIN SIGN SLOW ONE BEAT;Nl;0;L;;;;1;N;;;;; +16FF5;YANGQIN SIGN SLOW THREE HALF BEATS;Nl;0;L;;;;3/2;N;;;;; +16FF6;YANGQIN SIGN SLOW TWO BEATS;Nl;0;L;;;;2;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; 187F7;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 99c55523f..4edfd8b9c 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-11-15, 13:44:11 GMT +# Date: 2024-11-15, 15:07:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2212,7 +2212,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FE5..16FEF ; U # Cn [11] .. 16FF0..16FF1 ; U # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; U # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER -16FF4..16FFF ; U # Cn [12] .. +16FF4..16FF6 ; U # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS +16FF7..16FFF ; U # Cn [9] .. 17000..187F7 ; U # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 187F8..187FF ; U # Cn [8] .. 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index cee1f823c..f2cdd7c5a 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-11-15, 13:44:09 GMT +# Date: 2024-11-15, 15:07:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2535,6 +2535,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; OLetter # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; OLetter # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; OLetter # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -2610,7 +2611,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137019 +# Total code points: 137022 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 234e0b7fa..4c52a387e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-11-15, 13:43:25 GMT +# Date: 2024-11-15, 15:06:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1115,6 +1115,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FE3 ; L # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; L # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -1226,7 +1227,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815159 code points not listed here. +# The above property value applies to 815156 code points not listed here. # Total code points: 1095476 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 14a42b8ea..c4c01ffa0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-11-15, 13:43:27 GMT +# Date: 2024-11-15, 15:06:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1860,6 +1860,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE3 ; 0 # Lm OLD CHINESE ITERATION MARK 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER 16FF2..16FF3 ; 0 # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; 0 # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; 0 # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; 0 # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D08 ; 0 # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 @@ -2081,7 +2082,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821320 code points not listed here. +# The above property value applies to 821317 code points not listed here. # Total code points: 1113148 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 5e3c829cf..c69b23b4b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-11-15, 13:43:29 GMT +# Date: 2024-11-15, 15:06:25 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1897,6 +1897,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; N # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2126,7 +2127,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760838 code points not listed here. +# The above property value applies to 760835 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 2df83bcc1..d1fa21d11 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-11-15, 13:43:30 GMT +# Date: 2024-11-15, 15:06:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -579,7 +579,7 @@ FFFE..FFFF ; Cn # [2] .. 16F88..16F8E ; Cn # [7] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. -16FF4..16FFF ; Cn # [12] .. +16FF7..16FFF ; Cn # [9] .. 187F8..187FF ; Cn # [8] .. 18CD6..18CFE ; Cn # [41] .. 18D09..1AFEF ; Cn # [8935] .. @@ -748,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819272 +# Total code points: 819269 # ================================================ @@ -3405,8 +3405,9 @@ A6E6..A6EF ; Nl # [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM 1034A ; Nl # GOTHIC LETTER NINE HUNDRED 103D1..103D5 ; Nl # [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED 12400..1246E ; Nl # [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +16FF4..16FF6 ; Nl # [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -# Total code points: 236 +# Total code points: 239 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 30f586a5a..1af810dfe 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-11-15, 13:43:32 GMT +# Date: 2024-11-15, 15:06:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757392 code points not listed here. -# Total code points: 894860 +# The above property value applies to 757389 code points not listed here. +# Total code points: 894857 # ================================================ @@ -1776,6 +1776,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 113D4..113D5 ; ID # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA 113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL +16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18D00..18D08 ; ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 @@ -1867,7 +1868,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172421 +# Total code points: 172424 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 391287961..165570f71 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-11-15, 13:43:32 GMT +# Date: 2024-11-15, 15:06:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37212,6 +37212,9 @@ FFFD ; REPLACEMENT CHARACTER 16FF1 ; VIETNAMESE ALTERNATE READING MARK NHAY 16FF2 ; CHINESE SMALL SIMPLIFIED ER 16FF3 ; CHINESE SMALL TRADITIONAL ER +16FF4 ; YANGQIN SIGN SLOW ONE BEAT +16FF5 ; YANGQIN SIGN SLOW THREE HALF BEATS +16FF6 ; YANGQIN SIGN SLOW TWO BEATS 17000..187F7 ; TANGUT IDEOGRAPH-* 18800 ; TANGUT COMPONENT-001 18801 ; TANGUT COMPONENT-002 @@ -45628,6 +45631,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155259 +# Total code points: 155262 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt index 74f42dda2..c66993789 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt @@ -1,5 +1,5 @@ # DerivedNumericType-17.0.0.txt -# Date: 2024-11-14, 22:51:17 GMT +# Date: 2024-11-15, 15:06:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -165,6 +165,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 12400..1246E ; Numeric # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 16B5B..16B61 ; Numeric # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS 16E80..16E96 ; Numeric # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16FF4..16FF6 ; Numeric # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 1D2C0..1D2D3 ; Numeric # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; Numeric # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D360..1D378 ; Numeric # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE @@ -192,7 +193,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 2626D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2626D 2F890 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 1114 +# Total code points: 1117 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt index 8ca76d328..93c6b84e3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt @@ -1,5 +1,5 @@ # DerivedNumericValues-17.0.0.txt -# Date: 2024-11-14, 22:51:17 GMT +# Date: 2024-11-15, 15:06:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -548,6 +548,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 16DA1 ; 1.0 ; ; 1 # Nd CHISOI DIGIT ONE 16E81 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE 16E94 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE ALTERNATE FORM +16FF4 ; 1.0 ; ; 1 # Nl YANGQIN SIGN SLOW ONE BEAT 1CCF1 ; 1.0 ; ; 1 # Nd OUTLINED DIGIT ONE 1D2C1 ; 1.0 ; ; 1 # No KAKTOVIK NUMERAL ONE 1D2E1 ; 1.0 ; ; 1 # No MAYAN NUMERAL ONE @@ -573,13 +574,14 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 154 +# Total code points: 155 # ================================================ 0F2B ; 1.5 ; ; 3/2 # No TIBETAN DIGIT HALF TWO +16FF5 ; 1.5 ; ; 3/2 # Nl YANGQIN SIGN SLOW THREE HALF BEATS -# Total code points: 1 +# Total code points: 2 # ================================================ @@ -712,6 +714,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 16DA2 ; 2.0 ; ; 2 # Nd CHISOI DIGIT TWO 16E82 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO 16E95 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO ALTERNATE FORM +16FF6 ; 2.0 ; ; 2 # Nl YANGQIN SIGN SLOW TWO BEATS 1CCF2 ; 2.0 ; ; 2 # Nd OUTLINED DIGIT TWO 1D2C2 ; 2.0 ; ; 2 # No KAKTOVIK NUMERAL TWO 1D2E2 ; 2.0 ; ; 2 # No MAYAN NUMERAL TWO @@ -737,7 +740,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 156 +# Total code points: 157 # ================================================ diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 51d2d14ce..b45806a9f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -1202,12 +1202,13 @@ $punct ⊇ [[\u0021-\u007E] - [0-9 A-Z a-z]] # The Khitan Small Script filler is a Nonspacing Mark. # The other characters are numerals (the Hangzhou ten through thirty are compatibility decomposable, # but not the one through nine) and have Script=Han. -Let $NonOtherLetterIdeographs := [ - \N{KHITAN SMALL SCRIPT FILLER} - 〇 〡-〩 〸-〺 - \p{NAME=/^CHINESE SMALL (SIMPLIFIED|TRADITIONAL) ER$/} +Let $NonOtherLetterIdeographs := [\p{Ideographic} - \p{gc=Lo}] +$NonOtherLetterIdeographs = [ + \N{KHITAN SMALL SCRIPT FILLER} + 〇 〡-〩 〸-〺 + \p{NAME=/^CHINESE SMALL (SIMPLIFIED|TRADITIONAL) ER$/} + \p{Name=/^YANGQIN SIGN SLOW (ONE|THREE HALF|TWO) BEATS?$/} ] -$NonOtherLetterIdeographs = [\p{Ideographic} - \p{gc=Lo}] # Ideographic closing mark, gc=Lo. Let $CommonIdeographs := [〆] $CommonIdeographs = [\p{Ideographic} & \p{sc=Common}]