diff --git a/.github/workflows/cache_retain.yml b/.github/workflows/cache_retain.yml index 471d25a21..8cfc236d1 100644 --- a/.github/workflows/cache_retain.yml +++ b/.github/workflows/cache_retain.yml @@ -30,6 +30,10 @@ jobs: retain-maven-cache: name: Run all tests with Maven runs-on: ubuntu-latest + # Only run this on the upstream repo. Otherwise, running in a personal fork will cause + # Github to disable the personal fork copy of the workflow + # (Github complains about running a scheduled workflow on a repo with > 60 days of inactivity) + if: github.ref == 'refs/heads/main' && github.repository == 'unicode-org/unicodetools' steps: - name: Checkout and setup uses: actions/checkout@v2 diff --git a/unicodetools/data/ucd/dev/ArabicShaping.txt b/unicodetools/data/ucd/dev/ArabicShaping.txt index 3c9e0ca80..6f71f9214 100644 --- a/unicodetools/data/ucd/dev/ArabicShaping.txt +++ b/unicodetools/data/ucd/dev/ArabicShaping.txt @@ -482,6 +482,7 @@ 088C; TAH WITH 3 DOTS BELOW; D; TAH 088D; KEHEH WITH VERTICAL 2 DOTS BELOW; D; GAF 088E; VERTICAL TAIL; R; VERTICAL TAIL +088F; DOTLESS NOON WITH SEPARATE RING ABOVE; D; NOON 0890; ARABIC POUND MARK ABOVE; U; No_Joining_Group 0891; ARABIC PIASTRE MARK ABOVE; U; No_Joining_Group @@ -850,6 +851,8 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group 10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL 10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH 10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF +10EC6; THIN NOON; D; THIN NOON +10EC7; DOTLESS YEH WITH 4 DOTS BELOW; D; YEH # Sogdian Characters diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index d71b11c1c..ff1048e3e 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -228,6 +228,7 @@ FFF0..FFFF; Specials 108E0..108FF; Hatran 10900..1091F; Phoenician 10920..1093F; Lydian +10940..1095C; Sidetic 10980..1099F; Meroitic Hieroglyphs 109A0..109FF; Meroitic Cursive 10A00..10A5F; Kharoshthi @@ -279,11 +280,13 @@ FFF0..FFFF; Specials 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 11AC0..11AFF; Pau Cin Hau 11B00..11B5F; Devanagari Extended-A +11B60..11B7F; Sharada Supplement 11BC0..11BFF; Sunuwar 11C00..11C6F; Bhaiksuki 11C70..11CBF; Marchen 11D00..11D5F; Masaram Gondi 11D60..11DAF; Gunjala Gondi +11DB0..11DEF; Tolong Siki 11EE0..11EFF; Makasar 11F00..11F5F; Kawi 11FB0..11FBF; Lisu Supplement @@ -302,7 +305,9 @@ FFF0..FFFF; Specials 16A70..16ACF; Tangsa 16AD0..16AFF; Bassa Vah 16B00..16B8F; Pahawh Hmong +16EA0..16EDF; Beria Erfe 16D40..16D7F; Kirat Rai +16D80..16DAF; Chisoi 16E40..16E9F; Medefaidrin 16F00..16F9F; Miao 16FE0..16FFF; Ideographic Symbols and Punctuation diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 1b7a9c156..9e3c3320a 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ -# CaseFolding-16.0.0.txt -# Date: 2024-04-30, 21:48:11 GMT +# CaseFolding-17.0.0.txt +# Date: 2024-11-14, 20:19:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1243,7 +1243,10 @@ A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE; C; A7CF; # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2; C; A7D3; # LATIN CAPITAL LETTER DOUBLE THORN +A7D4; C; A7D5; # LATIN CAPITAL LETTER DOUBLE WYNN A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA @@ -1616,6 +1619,31 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O 16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI 16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y +16EA0; C; 16EBB; # BERIA ERFE CAPITAL LETTER ARKAB +16EA1; C; 16EBC; # BERIA ERFE CAPITAL LETTER BASIGNA +16EA2; C; 16EBD; # BERIA ERFE CAPITAL LETTER DARBAI +16EA3; C; 16EBE; # BERIA ERFE CAPITAL LETTER EH +16EA4; C; 16EBF; # BERIA ERFE CAPITAL LETTER FITKO +16EA5; C; 16EC0; # BERIA ERFE CAPITAL LETTER GOWAY +16EA6; C; 16EC1; # BERIA ERFE CAPITAL LETTER HIRDEABO +16EA7; C; 16EC2; # BERIA ERFE CAPITAL LETTER I +16EA8; C; 16EC3; # BERIA ERFE CAPITAL LETTER DJAI +16EA9; C; 16EC4; # BERIA ERFE CAPITAL LETTER KOBO +16EAA; C; 16EC5; # BERIA ERFE CAPITAL LETTER LAKKO +16EAB; C; 16EC6; # BERIA ERFE CAPITAL LETTER MERI +16EAC; C; 16EC7; # BERIA ERFE CAPITAL LETTER NINI +16EAD; C; 16EC8; # BERIA ERFE CAPITAL LETTER GNA +16EAE; C; 16EC9; # BERIA ERFE CAPITAL LETTER NGAY +16EAF; C; 16ECA; # BERIA ERFE CAPITAL LETTER OI +16EB0; C; 16ECB; # BERIA ERFE CAPITAL LETTER PI +16EB1; C; 16ECC; # BERIA ERFE CAPITAL LETTER ERIGO +16EB2; C; 16ECD; # BERIA ERFE CAPITAL LETTER ERIGO TAMURA +16EB3; C; 16ECE; # BERIA ERFE CAPITAL LETTER SERI +16EB4; C; 16ECF; # BERIA ERFE CAPITAL LETTER SHEP +16EB5; C; 16ED0; # BERIA ERFE CAPITAL LETTER TATASOUE +16EB6; C; 16ED1; # BERIA ERFE CAPITAL LETTER UI +16EB7; C; 16ED2; # BERIA ERFE CAPITAL LETTER WASSE +16EB8; C; 16ED3; # BERIA ERFE CAPITAL LETTER AY 1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF 1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI 1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 9642eb0a1..1924ef93d 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2024-10-18, 17:33:21 GMT +# Date: 2024-11-15, 15:57:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2041,9 +2041,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 13460..143FA ; 16.0 # [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 16100..16139 ; 16.0 # [58] GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE 16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE -187F8..187FF ; 16.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF -18D09..18D1C ; 16.0 # [20] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE 1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE @@ -2059,7 +2057,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5213 +# Total code points: 5185 # ================================================ @@ -2067,9 +2065,37 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Newly assigned in Unicode 17.0.0 (September, 2025) -18D1D..18D1E ; 17.0 # [2] TANGUT IDEOGRAPH-18D1D..TANGUT IDEOGRAPH-18D1E +088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE +09FF ; 17.0 # BENGALI LETTER SANSKRIT BA +0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE +0C5C ; 17.0 # TELUGU ARCHAIC SHRII +0CDC ; 17.0 # KANNADA ARCHAIC SHRII +1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE +2B96 ; 17.0 # EQUALS SIGN WITH INFINITY ABOVE +A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE +A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; 17.0 # LATIN CAPITAL LETTER DOUBLE WYNN +A7F1 ; 17.0 # MODIFIER LETTER CAPITAL S +FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH +FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH +10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 +10EC5..10EC7 ; 17.0 # [3] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0..10ED8 ; 17.0 # [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFB ; 17.0 # [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON +11B60..11B67 ; 17.0 # [8] SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O +11DB0..11DDB ; 17.0 # [44] TOLONG SIKI LETTER I..TOLONG SIKI UNGGA +11DE0..11DE9 ; 17.0 # [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE +16D80..16D9D ; 17.0 # [30] CHISOI LETTER A..CHISOI SIGN SISO +16DA0..16DA9 ; 17.0 # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE +16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY +16FF2..16FF6 ; 17.0 # [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS +187F8..187FF ; 17.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF +18D09..18D1E ; 17.0 # [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; 17.0 # [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 -# Total code points: 4 +# Total code points: 296 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index be63b6676..8ae667717 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-10-18, 17:34:13 GMT +# Date: 2024-11-15, 15:57:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -273,8 +273,8 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 01BC..01BF ; Alphabetic # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; Alphabetic # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; Alphabetic # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; Alphabetic # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; Alphabetic # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; Alphabetic # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; Alphabetic # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; Alphabetic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; Alphabetic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; Alphabetic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -344,7 +344,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Alphabetic # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897 ; Alphabetic # Mn ARABIC PEPET 08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH @@ -386,6 +386,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 09E2..09E3 ; Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL 09F0..09F1 ; Alphabetic # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; Alphabetic # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; Alphabetic # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; Alphabetic # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; Alphabetic # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -477,7 +478,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0C4A..0C4C ; Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU 0C55..0C56 ; Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; Alphabetic # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Alphabetic # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Alphabetic # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Alphabetic # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C80 ; Alphabetic # Lo KANNADA SIGN SPACING CANDRABINDU @@ -497,7 +498,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0CCA..0CCB ; Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC ; Alphabetic # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; Alphabetic # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Alphabetic # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Alphabetic # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CF1..0CF2 ; Alphabetic # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -833,11 +834,8 @@ A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; Alphabetic # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Alphabetic # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Alphabetic # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Alphabetic # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; Alphabetic # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; Alphabetic # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Alphabetic # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Alphabetic # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; Alphabetic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1020,6 +1018,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 108F4..108F5 ; Alphabetic # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; Alphabetic # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Alphabetic # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; Alphabetic # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; Alphabetic # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; Alphabetic # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Alphabetic # Lo KHAROSHTHI LETTER A @@ -1053,7 +1052,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY +10EC5 ; Alphabetic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; Alphabetic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10EFA..10EFC ; Alphabetic # Mn [3] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1239,6 +1240,12 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11A97 ; Alphabetic # Mc SOYOMBO SIGN VISARGA 11A9D ; Alphabetic # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; Alphabetic # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B60 ; Alphabetic # Mn SHARADA VOWEL SIGN OE +11B61 ; Alphabetic # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Alphabetic # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Alphabetic # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Alphabetic # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Alphabetic # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; Alphabetic # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; Alphabetic # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; Alphabetic # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA @@ -1274,6 +1281,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11D95 ; Alphabetic # Mn GUNJALA GONDI SIGN ANUSVARA 11D96 ; Alphabetic # Mc GUNJALA GONDI SIGN VISARGA 11D98 ; Alphabetic # Lo GUNJALA GONDI OM +11DB0..11DD8 ; Alphabetic # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; Alphabetic # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; Alphabetic # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; Alphabetic # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -1310,7 +1320,12 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16D40..16D42 ; Alphabetic # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; Alphabetic # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; Alphabetic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; Alphabetic # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; Alphabetic # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; Alphabetic # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; Alphabetic # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; Alphabetic # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Alphabetic # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; Alphabetic # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; Alphabetic # Lo MIAO LETTER NASALIZATION @@ -1320,6 +1335,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FE0..16FE1 ; Alphabetic # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; Alphabetic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Alphabetic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; Alphabetic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Alphabetic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -1441,7 +1458,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142791 +# Total code points: 142970 # ================================================ @@ -1595,7 +1612,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 024B ; Lowercase # L& LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Lowercase # L& LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Lowercase # L& [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Lowercase # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Lowercase # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -2073,13 +2090,14 @@ A7C3 ; Lowercase # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Lowercase # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Lowercase # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Lowercase # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Lowercase # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Lowercase # L& LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Lowercase # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lowercase # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lowercase # L& LATIN SMALL LETTER SIGMOID S A7DB ; Lowercase # L& LATIN SMALL LETTER LAMBDA -A7F2..A7F4 ; Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lowercase # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F6 ; Lowercase # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -2105,6 +2123,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 10D70..10D85 ; Lowercase # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lowercase # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Lowercase # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Lowercase # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1D41A..1D433 ; Lowercase # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z 1D44E..1D454 ; Lowercase # L& [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G 1D456..1D467 ; Lowercase # L& [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z @@ -2139,7 +2158,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2569 +# Total code points: 2595 # ================================================ @@ -2750,7 +2769,10 @@ A7C2 ; Uppercase # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Uppercase # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Uppercase # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Uppercase # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Uppercase # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Uppercase # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Uppercase # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Uppercase # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA @@ -2767,6 +2789,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 10D50..10D65 ; Uppercase # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Uppercase # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Uppercase # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Uppercase # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z 1D434..1D44D ; Uppercase # L& [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z 1D468..1D481 ; Uppercase # L& [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z @@ -2803,7 +2826,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1978 +# Total code points: 2006 # ================================================ @@ -2821,7 +2844,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 00F8..01BA ; Cased # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL 01BC..01BF ; Cased # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C4..0293 ; Cased # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Cased # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Cased # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Cased # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Cased # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Cased # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -2911,11 +2934,8 @@ A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A7CD ; Cased # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Cased # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Cased # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; Cased # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; Cased # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -2949,6 +2969,8 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10D70..10D85 ; Cased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Cased # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Cased # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; Cased # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Cased # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1D400..1D454 ; Cased # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; Cased # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; Cased # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -2988,7 +3010,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4578 +# Total code points: 4632 # ================================================ @@ -3103,7 +3125,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0B3F ; Case_Ignorable # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; Case_Ignorable # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Case_Ignorable # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Case_Ignorable # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Case_Ignorable # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; Case_Ignorable # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Case_Ignorable # Mn TAMIL SIGN ANUSVARA 0BC0 ; Case_Ignorable # Mn TAMIL VOWEL SIGN II @@ -3194,7 +3216,8 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Case_Ignorable # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Case_Ignorable # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Case_Ignorable # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN 1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -3274,7 +3297,7 @@ A720..A721 ; Case_Ignorable # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE.. A770 ; Case_Ignorable # Lm MODIFIER LETTER US A788 ; Case_Ignorable # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Case_Ignorable # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN -A7F2..A7F4 ; Case_Ignorable # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Case_Ignorable # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Case_Ignorable # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A802 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN HASANTA @@ -3350,7 +3373,8 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; Case_Ignorable # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EFA..10EFF ; Case_Ignorable # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA @@ -3427,6 +3451,9 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11A59..11A5B ; Case_Ignorable # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Case_Ignorable # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; Case_Ignorable # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Case_Ignorable # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; Case_Ignorable # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; Case_Ignorable # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; Case_Ignorable # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Case_Ignorable # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; Case_Ignorable # Mn BHAIKSUKI SIGN VIRAMA @@ -3442,6 +3469,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11D90..11D91 ; Case_Ignorable # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI 11D95 ; Case_Ignorable # Mn GUNJALA GONDI SIGN ANUSVARA 11D97 ; Case_Ignorable # Mn GUNJALA GONDI VIRAMA +11DD9 ; Case_Ignorable # Lm TOLONG SIKI SIGN SELA 11EF3..11EF4 ; Case_Ignorable # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11F00..11F01 ; Case_Ignorable # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA 11F36..11F3A ; Case_Ignorable # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R @@ -3458,12 +3486,15 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 16B40..16B43 ; Case_Ignorable # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16D40..16D42 ; Case_Ignorable # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D6B..16D6C ; Case_Ignorable # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D98 ; Case_Ignorable # Mn CHISOI SIGN ANUSVARA +16D9D ; Case_Ignorable # Mn CHISOI SIGN SISO 16F4F ; Case_Ignorable # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Case_Ignorable # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Case_Ignorable # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; Case_Ignorable # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Case_Ignorable # Lm OLD CHINESE ITERATION MARK 16FE4 ; Case_Ignorable # Mn KHITAN SMALL SCRIPT FILLER +16FF2..16FF3 ; Case_Ignorable # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1AFF0..1AFF3 ; Case_Ignorable # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Case_Ignorable # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Case_Ignorable # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -3505,7 +3536,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2792 # ================================================ @@ -4110,7 +4141,10 @@ A7C2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ANGLICAN A7C4..A7C7 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA @@ -4127,9 +4161,10 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10D50..10D65 ; Changes_When_Lowercased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Lowercased # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Changes_When_Lowercased # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1460 +# Total code points: 1488 # ================================================ @@ -4747,7 +4782,10 @@ A7C3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER ANGLICANA A7C8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Changes_When_Uppercased # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S A7DB ; Changes_When_Uppercased # L& LATIN SMALL LETTER LAMBDA @@ -4767,9 +4805,10 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Uppercased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Uppercased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Changes_When_Uppercased # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1552 +# Total code points: 1580 # ================================================ @@ -5386,7 +5425,10 @@ A7C3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER ANGLICANA A7C8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Changes_When_Titlecased # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S A7DB ; Changes_When_Titlecased # L& LATIN SMALL LETTER LAMBDA @@ -5406,9 +5448,10 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Titlecased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Titlecased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Changes_When_Titlecased # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1479 +# Total code points: 1507 # ================================================ @@ -6022,7 +6065,10 @@ A7C2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ANGLICAN A7C4..A7C7 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA @@ -6042,9 +6088,10 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10D50..10D65 ; Changes_When_Casefolded # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Casefolded # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Changes_When_Casefolded # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1533 +# Total code points: 1561 # ================================================ @@ -6156,9 +6203,7 @@ A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H A790..A794 ; Changes_When_Casemapped # L& [5] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH PALATAL HOOK A796..A7AE ; Changes_When_Casemapped # L& [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I -A7B0..A7CD ; Changes_When_Casemapped # L& [30] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D6..A7DC ; Changes_When_Casemapped # L& [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7B0..A7DC ; Changes_When_Casemapped # L& [45] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Casemapped # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -6183,9 +6228,11 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 10D70..10D85 ; Changes_When_Casemapped # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Changes_When_Casemapped # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; Changes_When_Casemapped # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Changes_When_Casemapped # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2981 +# Total code points: 3037 # ================================================ @@ -6210,8 +6257,8 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 01BC..01BF ; ID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ID_Start # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ID_Start # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ID_Start # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; ID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; ID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -6259,7 +6306,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; ID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ID_Start # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; ID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ID_Start # Lm ARABIC SMALL FARSI YEH 0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -6280,6 +6327,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 09DF..09E1 ; ID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; ID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; ID_Start # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; ID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; ID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; ID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -6327,7 +6375,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0C2A..0C39 ; ID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; ID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; ID_Start # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -6336,7 +6384,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0CAA..0CB3 ; ID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; ID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; ID_Start # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; ID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; ID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; ID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -6561,11 +6609,8 @@ A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER I A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; ID_Start # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; ID_Start # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -6702,6 +6747,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; ID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ID_Start # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Start # Lo KHAROSHTHI LETTER A @@ -6729,6 +6775,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; ID_Start # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6821,6 +6869,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D67..11D68 ; ID_Start # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; ID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; ID_Start # Lo GUNJALA GONDI OM +11DB0..11DD8 ; ID_Start # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; ID_Start # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; ID_Start # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; ID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; ID_Start # Lo KAWI SIGN REPHA 11F04..11F10 ; ID_Start # Lo [13] KAWI LETTER A..KAWI LETTER O @@ -6846,12 +6897,18 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16D40..16D42 ; ID_Start # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; ID_Start # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; ID_Start # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; ID_Start # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; ID_Start # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; ID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; ID_Start # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; ID_Start # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; ID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; ID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; ID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -6962,7 +7019,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141301 +# Total code points: 141469 # ================================================ @@ -6991,8 +7048,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 01BC..01BF ; ID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ID_Continue # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ID_Continue # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ID_Continue # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; ID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; ID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -7068,7 +7125,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ID_Continue # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897..089F ; ID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH @@ -7116,6 +7173,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 09F0..09F1 ; ID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA 09FE ; ID_Continue # Mn BENGALI SANDHI MARK +09FF ; ID_Continue # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; ID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; ID_Continue # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; ID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -7176,7 +7234,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0B47..0B48 ; ID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; ID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; ID_Continue # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; ID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; ID_Continue # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; ID_Continue # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; ID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; ID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -7218,7 +7276,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0C4A..0C4D ; ID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; ID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; ID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; ID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; ID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -7240,7 +7298,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0CCA..0CCB ; ID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; ID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; ID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; ID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; ID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; ID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -7457,7 +7515,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; ID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; ID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; ID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; ID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -7646,11 +7705,8 @@ A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTE A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; ID_Continue # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; ID_Continue # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -7857,6 +7913,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 108F4..108F5 ; ID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ID_Continue # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Continue # Lo KHAROSHTHI LETTER A @@ -7895,7 +7952,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; ID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; ID_Continue # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10EFA..10EFF ; ID_Continue # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -8122,6 +8181,12 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11A98..11A99 ; ID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11A9D ; ID_Continue # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; ID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B60 ; ID_Continue # Mn SHARADA VOWEL SIGN OE +11B61 ; ID_Continue # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; ID_Continue # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; ID_Continue # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; ID_Continue # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; ID_Continue # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; ID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BF0..11BF9 ; ID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; ID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L @@ -8162,6 +8227,10 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11D97 ; ID_Continue # Mn GUNJALA GONDI VIRAMA 11D98 ; ID_Continue # Lo GUNJALA GONDI OM 11DA0..11DA9 ; ID_Continue # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; ID_Continue # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; ID_Continue # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; ID_Continue # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; ID_Continue # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; ID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; ID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; ID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -8211,7 +8280,14 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16D43..16D6A ; ID_Continue # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; ID_Continue # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D70..16D79 ; ID_Continue # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; ID_Continue # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; ID_Continue # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; ID_Continue # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; ID_Continue # Mn CHISOI SIGN SISO +16DA0..16DA9 ; ID_Continue # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; ID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; ID_Continue # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; ID_Continue # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; ID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; ID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; ID_Continue # Lo MIAO LETTER NASALIZATION @@ -8222,6 +8298,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FE3 ; ID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; ID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; ID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -8370,7 +8448,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144573 +# Total code points: 144802 # ================================================ @@ -8393,8 +8471,8 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 01BC..01BF ; XID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; XID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; XID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; XID_Start # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; XID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; XID_Start # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; XID_Start # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; XID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; XID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; XID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -8441,7 +8519,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; XID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; XID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; XID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; XID_Start # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; XID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; XID_Start # Lm ARABIC SMALL FARSI YEH 0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -8462,6 +8540,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 09DF..09E1 ; XID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; XID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; XID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; XID_Start # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; XID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; XID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; XID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -8509,7 +8588,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0C2A..0C39 ; XID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; XID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; XID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; XID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; XID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; XID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; XID_Start # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; XID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -8518,7 +8597,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0CAA..0CB3 ; XID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; XID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; XID_Start # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; XID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; XID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; XID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; XID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; XID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -8742,11 +8821,8 @@ A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; XID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; XID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; XID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; XID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; XID_Start # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; XID_Start # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -8888,6 +8964,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 108F4..108F5 ; XID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; XID_Start # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Start # Lo KHAROSHTHI LETTER A @@ -8915,6 +8992,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; XID_Start # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; XID_Start # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9007,6 +9086,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 11D67..11D68 ; XID_Start # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; XID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; XID_Start # Lo GUNJALA GONDI OM +11DB0..11DD8 ; XID_Start # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; XID_Start # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; XID_Start # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; XID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; XID_Start # Lo KAWI SIGN REPHA 11F04..11F10 ; XID_Start # Lo [13] KAWI LETTER A..KAWI LETTER O @@ -9032,12 +9114,18 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16D40..16D42 ; XID_Start # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; XID_Start # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; XID_Start # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; XID_Start # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; XID_Start # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; XID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; XID_Start # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; XID_Start # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; XID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; XID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; XID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; XID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; XID_Start # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -9148,7 +9236,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141278 +# Total code points: 141446 # ================================================ @@ -9174,8 +9262,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 01BC..01BF ; XID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; XID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; XID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; XID_Continue # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; XID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; XID_Continue # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; XID_Continue # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; XID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C6..02D1 ; XID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02E0..02E4 ; XID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -9250,7 +9338,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; XID_Continue # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0897..089F ; XID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH @@ -9298,6 +9386,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 09F0..09F1 ; XID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; XID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA 09FE ; XID_Continue # Mn BENGALI SANDHI MARK +09FF ; XID_Continue # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; XID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; XID_Continue # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; XID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -9358,7 +9447,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0B47..0B48 ; XID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; XID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; XID_Continue # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; XID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; XID_Continue # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; XID_Continue # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; XID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; XID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -9400,7 +9489,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0C4A..0C4D ; XID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; XID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; XID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; XID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; XID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; XID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; XID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; XID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -9422,7 +9511,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0CCA..0CCB ; XID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; XID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; XID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; XID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; XID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; XID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -9639,7 +9728,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK 1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; XID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; XID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; XID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; XID_Continue # Mc BALINESE SIGN BISAH 1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -9827,11 +9917,8 @@ A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETT A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; XID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; XID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; XID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; XID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; XID_Continue # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; XID_Continue # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -10044,6 +10131,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 108F4..108F5 ; XID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; XID_Continue # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A @@ -10082,7 +10170,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; XID_Continue # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; XID_Continue # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10EFA..10EFF ; XID_Continue # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -10309,6 +10399,12 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11A98..11A99 ; XID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11A9D ; XID_Continue # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; XID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B60 ; XID_Continue # Mn SHARADA VOWEL SIGN OE +11B61 ; XID_Continue # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; XID_Continue # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; XID_Continue # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; XID_Continue # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; XID_Continue # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; XID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BF0..11BF9 ; XID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; XID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L @@ -10349,6 +10445,10 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11D97 ; XID_Continue # Mn GUNJALA GONDI VIRAMA 11D98 ; XID_Continue # Lo GUNJALA GONDI OM 11DA0..11DA9 ; XID_Continue # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; XID_Continue # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; XID_Continue # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; XID_Continue # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; XID_Continue # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; XID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; XID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; XID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -10398,7 +10498,14 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16D43..16D6A ; XID_Continue # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; XID_Continue # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D70..16D79 ; XID_Continue # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; XID_Continue # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; XID_Continue # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; XID_Continue # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; XID_Continue # Mn CHISOI SIGN SISO +16DA0..16DA9 ; XID_Continue # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; XID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; XID_Continue # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; XID_Continue # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; XID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; XID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; XID_Continue # Lo MIAO LETTER NASALIZATION @@ -10409,6 +10516,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FE3 ; XID_Continue # Lm OLD CHINESE ITERATION MARK 16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; XID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; XID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; XID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; XID_Continue # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -10557,7 +10666,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144554 +# Total code points: 144783 # ================================================ @@ -10680,7 +10789,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .......... -# Total code points: 10554 +# Total code points: 10583 # ================================================ @@ -11582,11 +11612,15 @@ A7C7 ; NFKC_SCF; A7C8 # L& LATIN CAPITAL LETTER D WITH A7C9 ; NFKC_SCF; A7CA # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB ; NFKC_SCF; 0264 # L& LATIN CAPITAL LETTER RAMS HORN A7CC ; NFKC_SCF; A7CD # L& LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; NFKC_SCF; A7CF # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; NFKC_SCF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; NFKC_SCF; A7D3 # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; NFKC_SCF; A7D5 # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; NFKC_SCF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_SCF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; NFKC_SCF; A7DB # L& LATIN CAPITAL LETTER LAMBDA A7DC ; NFKC_SCF; 019B # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1 ; NFKC_SCF; 0073 # Lm MODIFIER LETTER CAPITAL S A7F2 ; NFKC_SCF; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; NFKC_SCF; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; NFKC_SCF; 0071 # Lm MODIFIER LETTER CAPITAL Q @@ -13318,6 +13352,31 @@ FFF0..FFF8 ; NFKC_SCF; # Cn [9] ...... -# Total code points: 10516 +# Total code points: 10545 # ================================================ @@ -16052,12 +16111,15 @@ A7C2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER ANG A7C4..A7C7 ; Changes_When_NFKC_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Changes_When_NFKC_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA A7DC ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Changes_When_NFKC_Casefolded # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Changes_When_NFKC_Casefolded # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Changes_When_NFKC_Casefolded # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK @@ -16199,6 +16261,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] ...... -# Total code points: 10554 +# Total code points: 10583 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 882507d88..c4ec8dd03 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2024-10-18, 17:34:27 GMT +# Date: 2024-11-15, 15:57:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -180,8 +180,8 @@ 0252..0260 ; N # Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK 0261 ; A # Ll LATIN SMALL LETTER SCRIPT G 0262..0293 ; N # Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL -0294 ; N # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; N # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; N # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; N # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C3 ; N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD 02C4 ; A # Sk MODIFIER LETTER UP ARROWHEAD @@ -332,7 +332,7 @@ 0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; N # Sk ARABIC RAISED ROUND DOT -0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; N # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -391,6 +391,7 @@ 09FC ; N # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; N # Po BENGALI ABBREVIATION SIGN 09FE ; N # Mn BENGALI SANDHI MARK +09FF ; N # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; N # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -454,7 +455,7 @@ 0B47..0B48 ; N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; N # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; N # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; N # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -502,7 +503,7 @@ 0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -528,7 +529,7 @@ 0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -806,7 +807,8 @@ 1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; N # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; N # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -1351,8 +1353,7 @@ 2B55 ; W # So HEAVY LARGE CIRCLE 2B56..2B59 ; A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; N # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C5F ; N # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI 2C60..2C7B ; N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V @@ -1548,11 +1549,8 @@ A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; N # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; N # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; N # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; N # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1709,13 +1707,15 @@ FB43..FB44 ; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; N # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; N # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; N # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; N # Pe ORNATE LEFT PARENTHESIS FD3F ; N # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; N # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; N # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; N # Sc RIAL SIGN FDFD..FDFF ; N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -1904,6 +1904,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1091F ; N # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; N # Po LYDIAN TRIANGULAR MARK +10940..1095C ; N # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; N # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; N # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF @@ -1964,7 +1965,11 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; N # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; N # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -2235,6 +2240,12 @@ FFFD ; A # So REPLACEMENT CHARACTER 11AB0..11ABF ; N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; N # Mn SHARADA VOWEL SIGN OE +11B61 ; N # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; N # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; N # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; N # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; N # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; N # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; N # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; N # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -2279,6 +2290,10 @@ FFFD ; A # So REPLACEMENT CHARACTER 11D97 ; N # Mn GUNJALA GONDI VIRAMA 11D98 ; N # Lo GUNJALA GONDI OM 11DA0..11DA9 ; N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; N # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; N # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; N # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; N # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -2346,9 +2361,16 @@ FFFD ; A # So REPLACEMENT CHARACTER 16D6B..16D6C ; N # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; N # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; N # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; N # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; N # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; N # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; N # Mn CHISOI SIGN SISO +16DA0..16DA9 ; N # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; N # Lu [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; N # Ll [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; N # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; N # Lo MIAO LETTER NASALIZATION @@ -2360,6 +2382,8 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FE3 ; W # Lm OLD CHINESE ITERATION MARK 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; N # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187FF ; W # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index 7379b43e5..876f0114c 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,5 +1,5 @@ -# IndicPositionalCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:21 GMT +# IndicPositionalCategory-17.0.0.txt +# Date: 2024-11-14, 19:48:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -297,6 +297,9 @@ ABEC ; Right # Mc MEETEI MAYEK LUM IYEK 11A39 ; Right # Mc ZANABAZAR SQUARE SIGN VISARGA 11A57..11A58 ; Right # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU 11A97 ; Right # Mc SOYOMBO SIGN VISARGA +11B61 ; Right # Mc SHARADA VOWEL SIGN OOE +11B65 ; Right # Mc SHARADA VOWEL SIGN SHORT O +11B67 ; Right # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Right # Mc BHAIKSUKI VOWEL SIGN AA 11C3E ; Right # Mc BHAIKSUKI SIGN VISARGA 11CA9 ; Right # Mc MARCHEN SUBJOINED LETTER YA @@ -423,7 +426,7 @@ AABB..AABC ; Visual_Order_Left # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL 0AFA..0AFF ; Top # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B01 ; Top # Mn ORIYA SIGN CANDRABINDU 0B3F ; Top # Mn ORIYA VOWEL SIGN I -0B55..0B56 ; Top # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Top # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B82 ; Top # Mn TAMIL SIGN ANUSVARA 0BC0 ; Top # Mn TAMIL VOWEL SIGN II 0BCD ; Top # Mn TAMIL SIGN VIRAMA @@ -594,6 +597,9 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11A84..11A89 ; Top # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A96 ; Top # Mn SOYOMBO SIGN ANUSVARA 11A98 ; Top # Mn SOYOMBO GEMINATION MARK +11B60 ; Top # Mn SHARADA VOWEL SIGN OE +11B64 ; Top # Mn SHARADA VOWEL SIGN SHORT E +11B66 ; Top # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C31 ; Top # Mn [2] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN II 11C38..11C3D ; Top # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11CB3 ; Top # Mn MARCHEN VOWEL SIGN E @@ -613,6 +619,7 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11F5A ; Top # Mn KAWI SIGN NUKTA 1611E..16129 ; Top # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK 1612D ; Top # Mn GURUNG KHEMA SIGN ANUSVARA +16D98 ; Top # Mn CHISOI SIGN ANUSVARA # Indic_Positional_Category=Bottom @@ -755,6 +762,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 11A52..11A53 ; Bottom # Mn [2] SOYOMBO VOWEL SIGN UE..SOYOMBO VOWEL SIGN U 11A59..11A5B ; Bottom # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A95 ; Bottom # Mn [12] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO FINAL CONSONANT SIGN -A +11B62..11B63 ; Bottom # Mn [2] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN UUE 11C32..11C36 ; Bottom # Mn [5] BHAIKSUKI VOWEL SIGN U..BHAIKSUKI VOWEL SIGN VOCALIC L 11C3F ; Bottom # Mn BHAIKSUKI SIGN VIRAMA 11C92..11CA7 ; Bottom # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA @@ -767,6 +775,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 11EF4 ; Bottom # Mn MAKASAR VOWEL SIGN U 11F38..11F3A ; Bottom # Mn [3] KAWI VOWEL SIGN U..KAWI VOWEL SIGN VOCALIC R 1612E..1612F ; Bottom # Mn [2] GURUNG KHEMA CONSONANT SIGN MEDIAL RA..GURUNG KHEMA SIGN THOLHOMA +16D9D ; Bottom # Mn CHISOI SIGN SISO # Indic_Positional_Category=Top_And_Bottom diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index dc0760462..ad5958461 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ -# IndicSyllabicCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:21 GMT +# IndicSyllabicCategory-17.0.0.txt +# Date: 2024-11-14, 19:48:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -140,6 +140,7 @@ A980..A981 ; Bindu # Mn [2] JAVANESE SIGN PANYANGGA..JAVANESE SIGN CECAK 11F00..11F01 ; Bindu # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA 1612D ; Bindu # Mn GURUNG KHEMA SIGN ANUSVARA 16D40..16D41 ; Bindu # Lm [2] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN TONPI +16D98 ; Bindu # Mn CHISOI SIGN ANUSVARA # ================================================ @@ -338,6 +339,7 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK 11F41 ; Pure_Killer # Mc KAWI SIGN KILLER 1612F ; Pure_Killer # Mn GURUNG KHEMA SIGN THOLHOMA 16D6B..16D6C ; Pure_Killer # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D9D ; Pure_Killer # Mn CHISOI SIGN SISO # ================================================ @@ -525,7 +527,7 @@ ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 0B41..0B44 ; Vowel_Dependent # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B47..0B48 ; Vowel_Dependent # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Vowel_Dependent # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU -0B55..0B56 ; Vowel_Dependent # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Vowel_Dependent # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Vowel_Dependent # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Vowel_Dependent # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0BBE..0BBF ; Vowel_Dependent # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I @@ -729,6 +731,12 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 11A51..11A56 ; Vowel_Dependent # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE 11A57..11A58 ; Vowel_Dependent # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU 11A59..11A5B ; Vowel_Dependent # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11B60 ; Vowel_Dependent # Mn SHARADA VOWEL SIGN OE +11B61 ; Vowel_Dependent # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Vowel_Dependent # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Vowel_Dependent # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Vowel_Dependent # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Vowel_Dependent # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Vowel_Dependent # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Vowel_Dependent # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3B ; Vowel_Dependent # Mn [4] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI VOWEL SIGN AU @@ -767,6 +775,12 @@ A866 ; Vowel # Lo PHAGS-PA LETTER EE A922..A925 ; Vowel # Lo [4] KAYAH LI LETTER A..KAYAH LI LETTER OO A926..A92A ; Vowel # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O 11150..11154 ; Vowel # Lo [5] MAHAJANI LETTER A..MAHAJANI LETTER O +16D80 ; Vowel # Lo CHISOI LETTER A +16D82..16D83 ; Vowel # Lo [2] CHISOI LETTER AI..CHISOI LETTER AA +16D86 ; Vowel # Lo CHISOI LETTER E +16D89 ; Vowel # Lo CHISOI LETTER I +16D8F ; Vowel # Lo CHISOI LETTER U +16D92 ; Vowel # Lo CHISOI LETTER O # ================================================ @@ -814,6 +828,7 @@ AA74..AA76 ; Consonant_Placeholder # Lo [3] MYANMAR LOGOGRAM KHAMTI OAY..MY 09DC..09DD ; Consonant # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 09DF ; Consonant # Lo BENGALI LETTER YYA 09F0..09F1 ; Consonant # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FF ; Consonant # Lo BENGALI LETTER SANSKRIT BA 0A15..0A28 ; Consonant # Lo [20] GURMUKHI LETTER KA..GURMUKHI LETTER NA 0A2A..0A30 ; Consonant # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA 0A32..0A33 ; Consonant # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA @@ -965,6 +980,13 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 11F12..11F33 ; Consonant # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA 16101..1611D ; Consonant # Lo [29] GURUNG KHEMA LETTER KA..GURUNG KHEMA LETTER SA 16D43..16D62 ; Consonant # Lo [32] KIRAT RAI LETTER A..KIRAT RAI LETTER HA +16D81 ; Consonant # Lo CHISOI LETTER BA +16D84..16D85 ; Consonant # Lo [2] CHISOI LETTER GA..CHISOI LETTER TA +16D87..16D88 ; Consonant # Lo [2] CHISOI LETTER SA..CHISOI LETTER NA +16D8A..16D8E ; Consonant # Lo [5] CHISOI LETTER KA..CHISOI LETTER RRA +16D90..16D91 ; Consonant # Lo [2] CHISOI LETTER DA..CHISOI LETTER LA +16D93..16D97 ; Consonant # Lo [5] CHISOI LETTER NYA..CHISOI LETTER PA +16D99..16D9C ; Consonant # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA # ================================================ @@ -1379,6 +1401,7 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI 11F50..11F59 ; Number # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Number # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16D70..16D79 ; Number # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Number # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE # ================================================ diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 9f5089aa6..3645e8540 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-10-18, 17:34:29 GMT +# Date: 2024-11-15, 15:57:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -132,8 +132,8 @@ 01C0..01C3 ; AL # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..024F ; AL # L& [140] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER Y WITH STROKE 0250..0293 ; AL # Ll [68] LATIN SMALL LETTER TURNED A..LATIN SMALL LETTER EZH WITH CURL -0294 ; AL # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; AL # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; AL # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; AL # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; AL # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; AL # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6 ; AL # Lm MODIFIER LETTER CIRCUMFLEX ACCENT @@ -278,7 +278,7 @@ 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; NU # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -338,6 +338,7 @@ 09FC ; AL # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; AL # Po BENGALI ABBREVIATION SIGN 09FE ; CM # Mn BENGALI SANDHI MARK +09FF ; AL # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; CM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; CM # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; AL # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -401,7 +402,7 @@ 0B47..0B48 ; CM # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; CM # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; CM # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; CM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; CM # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; CM # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; AL # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; AL # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -449,7 +450,7 @@ 0C4A..0C4D ; CM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; CM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; CM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; NU # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -475,7 +476,7 @@ 0CCA..0CCB ; CM # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; CM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; CM # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; CM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; NU # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -776,7 +777,9 @@ 1AA8..1AAD ; SA # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; CM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEA ; CM # Mn [11] COMBINING LEFT TACK ABOVE..COMBINING UPWARDS ARROW ABOVE +1AEB ; GL # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B05..1B33 ; AK # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -1317,8 +1320,7 @@ 2B4D..2B54 ; AL # So [8] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..WHITE RIGHT-POINTING PENTAGON 2B55..2B59 ; AI # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; AL # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; AL # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; AL # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C5F ; AL # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI 2C60..2C7B ; AL # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; AL # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V @@ -1574,11 +1576,8 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; AL # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; AL # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; AL # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2537,13 +2536,15 @@ FB43..FB44 ; HL # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; HL # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; AL # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; CL # Pe ORNATE LEFT PARENTHESIS FD3F ; OP # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; AL # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; AL # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; AL # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; AL # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; PO # Sc RIAL SIGN FDFD..FDFF ; AL # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -2758,6 +2759,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1091F ; BA # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; AL # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; AL # Po LYDIAN TRIANGULAR MARK +10940..1095C ; AL # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; AL # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; AL # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; AL # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF @@ -2820,7 +2822,11 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; BA # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; CM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -3115,6 +3121,12 @@ FFFD ; AI # So REPLACEMENT CHARACTER 11AB0..11ABF ; AL # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; AL # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; BB # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; CM # Mn SHARADA VOWEL SIGN OE +11B61 ; CM # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; CM # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; CM # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; CM # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; CM # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; AL # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; AL # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; NU # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -3160,6 +3172,10 @@ FFFD ; AI # So REPLACEMENT CHARACTER 11D97 ; CM # Mn GUNJALA GONDI VIRAMA 11D98 ; AL # Lo GUNJALA GONDI OM 11DA0..11DA9 ; NU # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; AL # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; AL # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; AL # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; NU # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF1 ; AS # Lo [18] MAKASAR LETTER KA..MAKASAR LETTER A 11EF2 ; BA # Lo MAKASAR ANGKA 11EF3..11EF4 ; CM # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U @@ -3255,10 +3271,17 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16D6D ; AL # Po KIRAT RAI SIGN YUPI 16D6E..16D6F ; BA # Po [2] KIRAT RAI DANDA..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; NU # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; AL # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; CM # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; AL # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; CM # Mn CHISOI SIGN SISO +16DA0..16DA9 ; NU # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; AL # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; AL # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E98 ; BA # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP 16E99..16E9A ; AL # Po [2] MEDEFAIDRIN SYMBOL AIVA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; AL # Lu [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; AL # Ll [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; AL # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; CM # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; AL # Lo MIAO LETTER NASALIZATION @@ -3270,6 +3293,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FE3 ; NS # Lm OLD CHINESE ITERATION MARK 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; NS # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187FF ; ID # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 3aae8f72e..d509d5415 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ -# NormalizationTest-16.0.0.txt -# Date: 2024-04-30, 21:48:23 GMT +# NormalizationTest-17.0.0.txt +# Date: 2024-11-14, 19:48:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2429,6 +2429,7 @@ FEFA 0334;FEFA 0334;FEFA 0334;0644 0625 0334;0644 0627 0334 0655; # (ﻺ◌̴; A69C;A69C;A69C;044A;044A; # (ꚜ; ꚜ; ꚜ; ъ; ъ; ) MODIFIER LETTER CYRILLIC HARD SIGN A69D;A69D;A69D;044C;044C; # (ꚝ; ꚝ; ꚝ; ь; ь; ) MODIFIER LETTER CYRILLIC SOFT SIGN A770;A770;A770;A76F;A76F; # (ꝰ; ꝰ; ꝰ; ꝯ; ꝯ; ) MODIFIER LETTER US +A7F1;A7F1;A7F1;0053;0053; # (꟱; ꟱; ꟱; S; S; ) MODIFIER LETTER CAPITAL S A7F2;A7F2;A7F2;0043;0043; # (ꟲ; ꟲ; ꟲ; C; C; ) MODIFIER LETTER CAPITAL C A7F3;A7F3;A7F3;0046;0046; # (ꟳ; ꟳ; ꟳ; F; F; ) MODIFIER LETTER CAPITAL F A7F4;A7F4;A7F4;0051;0051; # (ꟴ; ꟴ; ꟴ; Q; Q; ) MODIFIER LETTER CAPITAL Q @@ -18098,6 +18099,60 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 1ACD 0315 0300 05AE 0062;0061 05AE 1ACD 0300 0315 0062;0061 05AE 1ACD 0300 0315 0062;0061 05AE 1ACD 0300 0315 0062;0061 05AE 1ACD 0300 0315 0062; # (a◌ᫍ◌̕◌̀◌֮b; a◌֮◌ᫍ◌̀◌̕b; a◌֮◌ᫍ◌̀◌̕b; a◌֮◌ᫍ◌̀◌̕b; a◌֮◌ᫍ◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER INSULAR R, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1ACE 0062;00E0 05AE 1ACE 0315 0062;0061 05AE 0300 1ACE 0315 0062;00E0 05AE 1ACE 0315 0062;0061 05AE 0300 1ACE 0315 0062; # (a◌̕◌̀◌֮◌ᫎb; à◌֮◌ᫎ◌̕b; a◌֮◌̀◌ᫎ◌̕b; à◌֮◌ᫎ◌̕b; a◌֮◌̀◌ᫎ◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LATIN SMALL LETTER INSULAR T, LATIN SMALL LETTER B 0061 1ACE 0315 0300 05AE 0062;0061 05AE 1ACE 0300 0315 0062;0061 05AE 1ACE 0300 0315 0062;0061 05AE 1ACE 0300 0315 0062;0061 05AE 1ACE 0300 0315 0062; # (a◌ᫎ◌̕◌̀◌֮b; a◌֮◌ᫎ◌̀◌̕b; a◌֮◌ᫎ◌̀◌̕b; a◌֮◌ᫎ◌̀◌̕b; a◌֮◌ᫎ◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LATIN SMALL LETTER INSULAR T, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ACF 0062;00E0 05AE 1ACF 0315 0062;0061 05AE 0300 1ACF 0315 0062;00E0 05AE 1ACF 0315 0062;0061 05AE 0300 1ACF 0315 0062; # (a◌̕◌̀◌֮◌᫏b; à◌֮◌᫏◌̕b; a◌֮◌̀◌᫏◌̕b; à◌֮◌᫏◌̕b; a◌֮◌̀◌᫏◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DOUBLE CARON, LATIN SMALL LETTER B +0061 1ACF 0315 0300 05AE 0062;0061 05AE 1ACF 0300 0315 0062;0061 05AE 1ACF 0300 0315 0062;0061 05AE 1ACF 0300 0315 0062;0061 05AE 1ACF 0300 0315 0062; # (a◌᫏◌̕◌̀◌֮b; a◌֮◌᫏◌̀◌̕b; a◌֮◌᫏◌̀◌̕b; a◌֮◌᫏◌̀◌̕b; a◌֮◌᫏◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOUBLE CARON, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD0 0062;00E0 05AE 1AD0 0315 0062;0061 05AE 0300 1AD0 0315 0062;00E0 05AE 1AD0 0315 0062;0061 05AE 0300 1AD0 0315 0062; # (a◌̕◌̀◌֮◌᫐b; à◌֮◌᫐◌̕b; a◌֮◌̀◌᫐◌̕b; à◌֮◌᫐◌̕b; a◌֮◌̀◌᫐◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-ACUTE, LATIN SMALL LETTER B +0061 1AD0 0315 0300 05AE 0062;0061 05AE 1AD0 0300 0315 0062;0061 05AE 1AD0 0300 0315 0062;0061 05AE 1AD0 0300 0315 0062;0061 05AE 1AD0 0300 0315 0062; # (a◌᫐◌̕◌̀◌֮b; a◌֮◌᫐◌̀◌̕b; a◌֮◌᫐◌̀◌̕b; a◌֮◌᫐◌̀◌̕b; a◌֮◌᫐◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-ACUTE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD1 0062;00E0 05AE 1AD1 0315 0062;0061 05AE 0300 1AD1 0315 0062;00E0 05AE 1AD1 0315 0062;0061 05AE 0300 1AD1 0315 0062; # (a◌̕◌̀◌֮◌᫑b; à◌֮◌᫑◌̕b; a◌֮◌̀◌᫑◌̕b; à◌֮◌᫑◌̕b; a◌֮◌̀◌᫑◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING GRAVE-VERTICAL-LINE, LATIN SMALL LETTER B +0061 1AD1 0315 0300 05AE 0062;0061 05AE 1AD1 0300 0315 0062;0061 05AE 1AD1 0300 0315 0062;0061 05AE 1AD1 0300 0315 0062;0061 05AE 1AD1 0300 0315 0062; # (a◌᫑◌̕◌̀◌֮b; a◌֮◌᫑◌̀◌̕b; a◌֮◌᫑◌̀◌̕b; a◌֮◌᫑◌̀◌̕b; a◌֮◌᫑◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GRAVE-VERTICAL-LINE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD2 0062;00E0 05AE 1AD2 0315 0062;0061 05AE 0300 1AD2 0315 0062;00E0 05AE 1AD2 0315 0062;0061 05AE 0300 1AD2 0315 0062; # (a◌̕◌̀◌֮◌᫒b; à◌֮◌᫒◌̕b; a◌֮◌̀◌᫒◌̕b; à◌֮◌᫒◌̕b; a◌֮◌̀◌᫒◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-GRAVE, LATIN SMALL LETTER B +0061 1AD2 0315 0300 05AE 0062;0061 05AE 1AD2 0300 0315 0062;0061 05AE 1AD2 0300 0315 0062;0061 05AE 1AD2 0300 0315 0062;0061 05AE 1AD2 0300 0315 0062; # (a◌᫒◌̕◌̀◌֮b; a◌֮◌᫒◌̀◌̕b; a◌֮◌᫒◌̀◌̕b; a◌֮◌᫒◌̀◌̕b; a◌֮◌᫒◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-GRAVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD3 0062;00E0 05AE 1AD3 0315 0062;0061 05AE 0300 1AD3 0315 0062;00E0 05AE 1AD3 0315 0062;0061 05AE 0300 1AD3 0315 0062; # (a◌̕◌̀◌֮◌᫓b; à◌֮◌᫓◌̕b; a◌֮◌̀◌᫓◌̕b; à◌֮◌᫓◌̕b; a◌֮◌̀◌᫓◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING ACUTE-VERTICAL-LINE, LATIN SMALL LETTER B +0061 1AD3 0315 0300 05AE 0062;0061 05AE 1AD3 0300 0315 0062;0061 05AE 1AD3 0300 0315 0062;0061 05AE 1AD3 0300 0315 0062;0061 05AE 1AD3 0300 0315 0062; # (a◌᫓◌̕◌̀◌֮b; a◌֮◌᫓◌̀◌̕b; a◌֮◌᫓◌̀◌̕b; a◌֮◌᫓◌̀◌̕b; a◌֮◌᫓◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING ACUTE-VERTICAL-LINE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD4 0062;00E0 05AE 1AD4 0315 0062;0061 05AE 0300 1AD4 0315 0062;00E0 05AE 1AD4 0315 0062;0061 05AE 0300 1AD4 0315 0062; # (a◌̕◌̀◌֮◌᫔b; à◌֮◌᫔◌̕b; a◌֮◌̀◌᫔◌̕b; à◌֮◌᫔◌̕b; a◌֮◌̀◌᫔◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-MACRON, LATIN SMALL LETTER B +0061 1AD4 0315 0300 05AE 0062;0061 05AE 1AD4 0300 0315 0062;0061 05AE 1AD4 0300 0315 0062;0061 05AE 1AD4 0300 0315 0062;0061 05AE 1AD4 0300 0315 0062; # (a◌᫔◌̕◌̀◌֮b; a◌֮◌᫔◌̀◌̕b; a◌֮◌᫔◌̀◌̕b; a◌֮◌᫔◌̀◌̕b; a◌֮◌᫔◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-MACRON, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD5 0062;00E0 05AE 1AD5 0315 0062;0061 05AE 0300 1AD5 0315 0062;00E0 05AE 1AD5 0315 0062;0061 05AE 0300 1AD5 0315 0062; # (a◌̕◌̀◌֮◌᫕b; à◌֮◌᫕◌̕b; a◌֮◌̀◌᫕◌̕b; à◌֮◌᫕◌̕b; a◌֮◌̀◌᫕◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING MACRON-VERTICAL-LINE, LATIN SMALL LETTER B +0061 1AD5 0315 0300 05AE 0062;0061 05AE 1AD5 0300 0315 0062;0061 05AE 1AD5 0300 0315 0062;0061 05AE 1AD5 0300 0315 0062;0061 05AE 1AD5 0300 0315 0062; # (a◌᫕◌̕◌̀◌֮b; a◌֮◌᫕◌̀◌̕b; a◌֮◌᫕◌̀◌̕b; a◌֮◌᫕◌̀◌̕b; a◌֮◌᫕◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING MACRON-VERTICAL-LINE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD6 0062;00E0 05AE 1AD6 0315 0062;0061 05AE 0300 1AD6 0315 0062;00E0 05AE 1AD6 0315 0062;0061 05AE 0300 1AD6 0315 0062; # (a◌̕◌̀◌֮◌᫖b; à◌֮◌᫖◌̕b; a◌֮◌̀◌᫖◌̕b; à◌֮◌᫖◌̕b; a◌֮◌̀◌᫖◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-ACUTE-GRAVE, LATIN SMALL LETTER B +0061 1AD6 0315 0300 05AE 0062;0061 05AE 1AD6 0300 0315 0062;0061 05AE 1AD6 0300 0315 0062;0061 05AE 1AD6 0300 0315 0062;0061 05AE 1AD6 0300 0315 0062; # (a◌᫖◌̕◌̀◌֮b; a◌֮◌᫖◌̀◌̕b; a◌֮◌᫖◌̀◌̕b; a◌֮◌᫖◌̀◌̕b; a◌֮◌᫖◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-ACUTE-GRAVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD7 0062;00E0 05AE 1AD7 0315 0062;0061 05AE 0300 1AD7 0315 0062;00E0 05AE 1AD7 0315 0062;0061 05AE 0300 1AD7 0315 0062; # (a◌̕◌̀◌֮◌᫗b; à◌֮◌᫗◌̕b; a◌֮◌̀◌᫗◌̕b; à◌֮◌᫗◌̕b; a◌֮◌̀◌᫗◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING VERTICAL-LINE-GRAVE-ACUTE, LATIN SMALL LETTER B +0061 1AD7 0315 0300 05AE 0062;0061 05AE 1AD7 0300 0315 0062;0061 05AE 1AD7 0300 0315 0062;0061 05AE 1AD7 0300 0315 0062;0061 05AE 1AD7 0300 0315 0062; # (a◌᫗◌̕◌̀◌֮b; a◌֮◌᫗◌̀◌̕b; a◌֮◌᫗◌̀◌̕b; a◌֮◌᫗◌̀◌̕b; a◌֮◌᫗◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING VERTICAL-LINE-GRAVE-ACUTE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD8 0062;00E0 05AE 1AD8 0315 0062;0061 05AE 0300 1AD8 0315 0062;00E0 05AE 1AD8 0315 0062;0061 05AE 0300 1AD8 0315 0062; # (a◌̕◌̀◌֮◌᫘b; à◌֮◌᫘◌̕b; a◌֮◌̀◌᫘◌̕b; à◌֮◌᫘◌̕b; a◌֮◌̀◌᫘◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING MACRON-ACUTE-GRAVE, LATIN SMALL LETTER B +0061 1AD8 0315 0300 05AE 0062;0061 05AE 1AD8 0300 0315 0062;0061 05AE 1AD8 0300 0315 0062;0061 05AE 1AD8 0300 0315 0062;0061 05AE 1AD8 0300 0315 0062; # (a◌᫘◌̕◌̀◌֮b; a◌֮◌᫘◌̀◌̕b; a◌֮◌᫘◌̀◌̕b; a◌֮◌᫘◌̀◌̕b; a◌֮◌᫘◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING MACRON-ACUTE-GRAVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AD9 0062;00E0 05AE 1AD9 0315 0062;0061 05AE 0300 1AD9 0315 0062;00E0 05AE 1AD9 0315 0062;0061 05AE 0300 1AD9 0315 0062; # (a◌̕◌̀◌֮◌᫙b; à◌֮◌᫙◌̕b; a◌֮◌̀◌᫙◌̕b; à◌֮◌᫙◌̕b; a◌֮◌̀◌᫙◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING SHARP SIGN, LATIN SMALL LETTER B +0061 1AD9 0315 0300 05AE 0062;0061 05AE 1AD9 0300 0315 0062;0061 05AE 1AD9 0300 0315 0062;0061 05AE 1AD9 0300 0315 0062;0061 05AE 1AD9 0300 0315 0062; # (a◌᫙◌̕◌̀◌֮b; a◌֮◌᫙◌̀◌̕b; a◌֮◌᫙◌̀◌̕b; a◌֮◌᫙◌̀◌̕b; a◌֮◌᫙◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING SHARP SIGN, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ADA 0062;00E0 05AE 1ADA 0315 0062;0061 05AE 0300 1ADA 0315 0062;00E0 05AE 1ADA 0315 0062;0061 05AE 0300 1ADA 0315 0062; # (a◌̕◌̀◌֮◌᫚b; à◌֮◌᫚◌̕b; a◌֮◌̀◌᫚◌̕b; à◌֮◌᫚◌̕b; a◌֮◌̀◌᫚◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING FLAT SIGN, LATIN SMALL LETTER B +0061 1ADA 0315 0300 05AE 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062;0061 05AE 1ADA 0300 0315 0062; # (a◌᫚◌̕◌̀◌֮b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; a◌֮◌᫚◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING FLAT SIGN, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ADB 0062;00E0 05AE 1ADB 0315 0062;0061 05AE 0300 1ADB 0315 0062;00E0 05AE 1ADB 0315 0062;0061 05AE 0300 1ADB 0315 0062; # (a◌̕◌̀◌֮◌᫛b; à◌֮◌᫛◌̕b; a◌֮◌̀◌᫛◌̕b; à◌֮◌᫛◌̕b; a◌֮◌̀◌᫛◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DOWN TACK ABOVE, LATIN SMALL LETTER B +0061 1ADB 0315 0300 05AE 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062;0061 05AE 1ADB 0300 0315 0062; # (a◌᫛◌̕◌̀◌֮b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; a◌֮◌᫛◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOWN TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1ADC 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062;00E0 05AE 1ADC 0315 0062;0061 05AE 0300 1ADC 0315 0062; # (a◌̕◌̀◌֮◌᫜b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; à◌֮◌᫜◌̕b; a◌֮◌̀◌᫜◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DIAERESIS WITH RAISED LEFT DOT, LATIN SMALL LETTER B +0061 1ADC 0315 0300 05AE 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062;0061 05AE 1ADC 0300 0315 0062; # (a◌᫜◌̕◌̀◌֮b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; a◌֮◌᫜◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DIAERESIS WITH RAISED LEFT DOT, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 1ADD 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062;0061 1DFA 0316 1ADD 059A 0062; # (a◌֚◌̖◌᷺◌᫝b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; a◌᷺◌̖◌᫝◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, COMBINING DOT-AND-RING BELOW, LATIN SMALL LETTER B +0061 1ADD 059A 0316 1DFA 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062;0061 1DFA 1ADD 0316 059A 0062; # (a◌᫝◌֚◌̖◌᷺b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; a◌᷺◌᫝◌̖◌֚b; ) LATIN SMALL LETTER A, COMBINING DOT-AND-RING BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE0 0062;00E0 05AE 1AE0 0315 0062;0061 05AE 0300 1AE0 0315 0062;00E0 05AE 1AE0 0315 0062;0061 05AE 0300 1AE0 0315 0062; # (a◌̕◌̀◌֮◌᫠b; à◌֮◌᫠◌̕b; a◌֮◌̀◌᫠◌̕b; à◌֮◌᫠◌̕b; a◌֮◌̀◌᫠◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LEFT TACK ABOVE, LATIN SMALL LETTER B +0061 1AE0 0315 0300 05AE 0062;0061 05AE 1AE0 0300 0315 0062;0061 05AE 1AE0 0300 0315 0062;0061 05AE 1AE0 0300 0315 0062;0061 05AE 1AE0 0300 0315 0062; # (a◌᫠◌̕◌̀◌֮b; a◌֮◌᫠◌̀◌̕b; a◌֮◌᫠◌̀◌̕b; a◌֮◌᫠◌̀◌̕b; a◌֮◌᫠◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LEFT TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE1 0062;00E0 05AE 1AE1 0315 0062;0061 05AE 0300 1AE1 0315 0062;00E0 05AE 1AE1 0315 0062;0061 05AE 0300 1AE1 0315 0062; # (a◌̕◌̀◌֮◌᫡b; à◌֮◌᫡◌̕b; a◌֮◌̀◌᫡◌̕b; à◌֮◌᫡◌̕b; a◌֮◌̀◌᫡◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING RIGHT TACK ABOVE, LATIN SMALL LETTER B +0061 1AE1 0315 0300 05AE 0062;0061 05AE 1AE1 0300 0315 0062;0061 05AE 1AE1 0300 0315 0062;0061 05AE 1AE1 0300 0315 0062;0061 05AE 1AE1 0300 0315 0062; # (a◌᫡◌̕◌̀◌֮b; a◌֮◌᫡◌̀◌̕b; a◌֮◌᫡◌̀◌̕b; a◌֮◌᫡◌̀◌̕b; a◌֮◌᫡◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING RIGHT TACK ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE2 0062;00E0 05AE 1AE2 0315 0062;0061 05AE 0300 1AE2 0315 0062;00E0 05AE 1AE2 0315 0062;0061 05AE 0300 1AE2 0315 0062; # (a◌̕◌̀◌֮◌᫢b; à◌֮◌᫢◌̕b; a◌֮◌̀◌᫢◌̕b; à◌֮◌᫢◌̕b; a◌֮◌̀◌᫢◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING MINUS SIGN ABOVE, LATIN SMALL LETTER B +0061 1AE2 0315 0300 05AE 0062;0061 05AE 1AE2 0300 0315 0062;0061 05AE 1AE2 0300 0315 0062;0061 05AE 1AE2 0300 0315 0062;0061 05AE 1AE2 0300 0315 0062; # (a◌᫢◌̕◌̀◌֮b; a◌֮◌᫢◌̀◌̕b; a◌֮◌᫢◌̀◌̕b; a◌֮◌᫢◌̀◌̕b; a◌֮◌᫢◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING MINUS SIGN ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE3 0062;00E0 05AE 1AE3 0315 0062;0061 05AE 0300 1AE3 0315 0062;00E0 05AE 1AE3 0315 0062;0061 05AE 0300 1AE3 0315 0062; # (a◌̕◌̀◌֮◌᫣b; à◌֮◌᫣◌̕b; a◌֮◌̀◌᫣◌̕b; à◌֮◌᫣◌̕b; a◌֮◌̀◌᫣◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING INVERTED BRIDGE ABOVE, LATIN SMALL LETTER B +0061 1AE3 0315 0300 05AE 0062;0061 05AE 1AE3 0300 0315 0062;0061 05AE 1AE3 0300 0315 0062;0061 05AE 1AE3 0300 0315 0062;0061 05AE 1AE3 0300 0315 0062; # (a◌᫣◌̕◌̀◌֮b; a◌֮◌᫣◌̀◌̕b; a◌֮◌᫣◌̀◌̕b; a◌֮◌᫣◌̀◌̕b; a◌֮◌᫣◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING INVERTED BRIDGE ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE4 0062;00E0 05AE 1AE4 0315 0062;0061 05AE 0300 1AE4 0315 0062;00E0 05AE 1AE4 0315 0062;0061 05AE 0300 1AE4 0315 0062; # (a◌̕◌̀◌֮◌᫤b; à◌֮◌᫤◌̕b; a◌֮◌̀◌᫤◌̕b; à◌֮◌᫤◌̕b; a◌֮◌̀◌᫤◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING SQUARE ABOVE, LATIN SMALL LETTER B +0061 1AE4 0315 0300 05AE 0062;0061 05AE 1AE4 0300 0315 0062;0061 05AE 1AE4 0300 0315 0062;0061 05AE 1AE4 0300 0315 0062;0061 05AE 1AE4 0300 0315 0062; # (a◌᫤◌̕◌̀◌֮b; a◌֮◌᫤◌̀◌̕b; a◌֮◌᫤◌̀◌̕b; a◌֮◌᫤◌̀◌̕b; a◌֮◌᫤◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING SQUARE ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE5 0062;00E0 05AE 1AE5 0315 0062;0061 05AE 0300 1AE5 0315 0062;00E0 05AE 1AE5 0315 0062;0061 05AE 0300 1AE5 0315 0062; # (a◌̕◌̀◌֮◌᫥b; à◌֮◌᫥◌̕b; a◌֮◌̀◌᫥◌̕b; à◌֮◌᫥◌̕b; a◌֮◌̀◌᫥◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING SEAGULL ABOVE, LATIN SMALL LETTER B +0061 1AE5 0315 0300 05AE 0062;0061 05AE 1AE5 0300 0315 0062;0061 05AE 1AE5 0300 0315 0062;0061 05AE 1AE5 0300 0315 0062;0061 05AE 1AE5 0300 0315 0062; # (a◌᫥◌̕◌̀◌֮b; a◌֮◌᫥◌̀◌̕b; a◌֮◌᫥◌̀◌̕b; a◌֮◌᫥◌̀◌̕b; a◌֮◌᫥◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING SEAGULL ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 1AE6 0062;0061 1DFA 0316 1AE6 059A 0062;0061 1DFA 0316 1AE6 059A 0062;0061 1DFA 0316 1AE6 059A 0062;0061 1DFA 0316 1AE6 059A 0062; # (a◌֚◌̖◌᷺◌᫦b; a◌᷺◌̖◌᫦◌֚b; a◌᷺◌̖◌᫦◌֚b; a◌᷺◌̖◌᫦◌֚b; a◌᷺◌̖◌᫦◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, COMBINING DOUBLE ARCH BELOW, LATIN SMALL LETTER B +0061 1AE6 059A 0316 1DFA 0062;0061 1DFA 1AE6 0316 059A 0062;0061 1DFA 1AE6 0316 059A 0062;0061 1DFA 1AE6 0316 059A 0062;0061 1DFA 1AE6 0316 059A 0062; # (a◌᫦◌֚◌̖◌᷺b; a◌᷺◌᫦◌̖◌֚b; a◌᷺◌᫦◌̖◌֚b; a◌᷺◌᫦◌̖◌֚b; a◌᷺◌᫦◌̖◌֚b; ) LATIN SMALL LETTER A, COMBINING DOUBLE ARCH BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE7 0062;00E0 05AE 1AE7 0315 0062;0061 05AE 0300 1AE7 0315 0062;00E0 05AE 1AE7 0315 0062;0061 05AE 0300 1AE7 0315 0062; # (a◌̕◌̀◌֮◌᫧b; à◌֮◌᫧◌̕b; a◌֮◌̀◌᫧◌̕b; à◌֮◌᫧◌̕b; a◌֮◌̀◌᫧◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING DOUBLE ARCH ABOVE, LATIN SMALL LETTER B +0061 1AE7 0315 0300 05AE 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062;0061 05AE 1AE7 0300 0315 0062; # (a◌᫧◌̕◌̀◌֮b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; a◌֮◌᫧◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING DOUBLE ARCH ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE8 0062;00E0 05AE 1AE8 0315 0062;0061 05AE 0300 1AE8 0315 0062;00E0 05AE 1AE8 0315 0062;0061 05AE 0300 1AE8 0315 0062; # (a◌̕◌̀◌֮◌᫨b; à◌֮◌᫨◌̕b; a◌֮◌̀◌᫨◌̕b; à◌֮◌᫨◌̕b; a◌֮◌̀◌᫨◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING EQUALS SIGN ABOVE, LATIN SMALL LETTER B +0061 1AE8 0315 0300 05AE 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062;0061 05AE 1AE8 0300 0315 0062; # (a◌᫨◌̕◌̀◌֮b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; a◌֮◌᫨◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING EQUALS SIGN ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AE9 0062;00E0 05AE 1AE9 0315 0062;0061 05AE 0300 1AE9 0315 0062;00E0 05AE 1AE9 0315 0062;0061 05AE 0300 1AE9 0315 0062; # (a◌̕◌̀◌֮◌᫩b; à◌֮◌᫩◌̕b; a◌֮◌̀◌᫩◌̕b; à◌֮◌᫩◌̕b; a◌֮◌̀◌᫩◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING LEFT ANGLE CENTRED ABOVE, LATIN SMALL LETTER B +0061 1AE9 0315 0300 05AE 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062;0061 05AE 1AE9 0300 0315 0062; # (a◌᫩◌̕◌̀◌֮b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; a◌֮◌᫩◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING LEFT ANGLE CENTRED ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 1AEA 0062;00E0 05AE 1AEA 0315 0062;0061 05AE 0300 1AEA 0315 0062;00E0 05AE 1AEA 0315 0062;0061 05AE 0300 1AEA 0315 0062; # (a◌̕◌̀◌֮◌᫪b; à◌֮◌᫪◌̕b; a◌֮◌̀◌᫪◌̕b; à◌֮◌᫪◌̕b; a◌֮◌̀◌᫪◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING UPWARDS ARROW ABOVE, LATIN SMALL LETTER B +0061 1AEA 0315 0300 05AE 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062;0061 05AE 1AEA 0300 0315 0062; # (a◌᫪◌̕◌̀◌֮b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; a◌֮◌᫪◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING UPWARDS ARROW ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0345 035D 035C 1AEB 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062;0061 035C 035D 1AEB 0345 0062; # (a◌ͅ◌͝◌͜◌᫫b; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; a◌͜◌͝◌᫫◌ͅb; ) LATIN SMALL LETTER A, COMBINING GREEK YPOGEGRAMMENI, COMBINING DOUBLE BREVE, COMBINING DOUBLE BREVE BELOW, COMBINING DOUBLE RIGHTWARDS ARROW ABOVE, LATIN SMALL LETTER B +0061 1AEB 0345 035D 035C 0062;0061 035C 1AEB 035D 0345 0062;0061 035C 1AEB 035D 0345 0062;0061 035C 1AEB 035D 0345 0062;0061 035C 1AEB 035D 0345 0062; # (a◌᫫◌ͅ◌͝◌͜b; a◌͜◌᫫◌͝◌ͅb; a◌͜◌᫫◌͝◌ͅb; a◌͜◌᫫◌͝◌ͅb; a◌͜◌᫫◌͝◌ͅb; ) LATIN SMALL LETTER A, COMBINING DOUBLE RIGHTWARDS ARROW ABOVE, COMBINING GREEK YPOGEGRAMMENI, COMBINING DOUBLE BREVE, COMBINING DOUBLE BREVE BELOW, LATIN SMALL LETTER B 0061 3099 093C 16FF0 1B34 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062;0061 16FF0 093C 1B34 3099 0062; # (a◌゙◌𖿰़◌᬴b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; a𖿰◌़◌᬴◌゙b; ) LATIN SMALL LETTER A, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, BALINESE SIGN REREKAN, LATIN SMALL LETTER B 0061 1B34 3099 093C 16FF0 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062;0061 16FF0 1B34 093C 3099 0062; # (a◌᬴◌゙◌𖿰़b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; a𖿰◌᬴◌़◌゙b; ) LATIN SMALL LETTER A, BALINESE SIGN REREKAN, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, LATIN SMALL LETTER B 0061 05B0 094D 3099 1B44 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062;0061 3099 094D 1B44 05B0 0062; # (a◌ְ◌्◌゙᭄b; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; a◌゙◌्᭄◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, BALINESE ADEG ADEG, LATIN SMALL LETTER B @@ -18646,6 +18701,10 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 10EAB 0315 0300 05AE 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062; # (a◌𐺫◌̕◌̀◌֮b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING HAMZA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10EAC 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062; # (a◌̕◌̀◌֮◌𐺬b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, YEZIDI COMBINING MADDA MARK, LATIN SMALL LETTER B 0061 10EAC 0315 0300 05AE 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062; # (a◌𐺬◌̕◌̀◌֮b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING MADDA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EFA 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062;0061 1DFA 0316 10EFA 059A 0062; # (a◌֚◌̖◌᷺◌𐻺b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; a◌᷺◌̖◌𐻺◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC DOUBLE VERTICAL BAR BELOW, LATIN SMALL LETTER B +0061 10EFA 059A 0316 1DFA 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062;0061 1DFA 10EFA 0316 059A 0062; # (a◌𐻺◌֚◌̖◌᷺b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; a◌᷺◌𐻺◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC DOUBLE VERTICAL BAR BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EFB 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062;0061 1DFA 0316 10EFB 059A 0062; # (a◌֚◌̖◌᷺◌𐻻b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; a◌᷺◌̖◌𐻻◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW NOON, LATIN SMALL LETTER B +0061 10EFB 059A 0316 1DFA 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062;0061 1DFA 10EFB 0316 059A 0062; # (a◌𐻻◌֚◌̖◌᷺b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; a◌᷺◌𐻻◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW NOON, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFD 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062; # (a◌֚◌̖◌᷺◌𐻽b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD SAKTA, LATIN SMALL LETTER B 0061 10EFD 059A 0316 1DFA 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062; # (a◌𐻽◌֚◌̖◌᷺b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW WORD SAKTA, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFE 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062; # (a◌֚◌̖◌᷺◌𐻾b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD QASR, LATIN SMALL LETTER B @@ -18832,6 +18891,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 16B35 0315 0300 05AE 0062;0061 05AE 16B35 0300 0315 0062;0061 05AE 16B35 0300 0315 0062;0061 05AE 16B35 0300 0315 0062;0061 05AE 16B35 0300 0315 0062; # (a◌𖬵◌̕◌̀◌֮b; a◌֮◌𖬵◌̀◌̕b; a◌֮◌𖬵◌̀◌̕b; a◌֮◌𖬵◌̀◌̕b; a◌֮◌𖬵◌̀◌̕b; ) LATIN SMALL LETTER A, PAHAWH HMONG MARK CIM HOM, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 16B36 0062;00E0 05AE 16B36 0315 0062;0061 05AE 0300 16B36 0315 0062;00E0 05AE 16B36 0315 0062;0061 05AE 0300 16B36 0315 0062; # (a◌̕◌̀◌֮◌𖬶b; à◌֮◌𖬶◌̕b; a◌֮◌̀◌𖬶◌̕b; à◌֮◌𖬶◌̕b; a◌֮◌̀◌𖬶◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, PAHAWH HMONG MARK CIM TAUM, LATIN SMALL LETTER B 0061 16B36 0315 0300 05AE 0062;0061 05AE 16B36 0300 0315 0062;0061 05AE 16B36 0300 0315 0062;0061 05AE 16B36 0300 0315 0062;0061 05AE 16B36 0300 0315 0062; # (a◌𖬶◌̕◌̀◌֮b; a◌֮◌𖬶◌̀◌̕b; a◌֮◌𖬶◌̀◌̕b; a◌֮◌𖬶◌̀◌̕b; a◌֮◌𖬶◌̀◌̕b; ) LATIN SMALL LETTER A, PAHAWH HMONG MARK CIM TAUM, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 05B0 094D 3099 16D9D 0062;0061 3099 094D 16D9D 05B0 0062;0061 3099 094D 16D9D 05B0 0062;0061 3099 094D 16D9D 05B0 0062;0061 3099 094D 16D9D 05B0 0062; # (a◌ְ◌्◌゙◌𖶝b; a◌゙◌्◌𖶝◌ְb; a◌゙◌्◌𖶝◌ְb; a◌゙◌्◌𖶝◌ְb; a◌゙◌्◌𖶝◌ְb; ) LATIN SMALL LETTER A, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, CHISOI SIGN SISO, LATIN SMALL LETTER B +0061 16D9D 05B0 094D 3099 0062;0061 3099 16D9D 094D 05B0 0062;0061 3099 16D9D 094D 05B0 0062;0061 3099 16D9D 094D 05B0 0062;0061 3099 16D9D 094D 05B0 0062; # (a◌𖶝◌ְ◌्◌゙b; a◌゙◌𖶝◌्◌ְb; a◌゙◌𖶝◌्◌ְb; a◌゙◌𖶝◌्◌ְb; a◌゙◌𖶝◌्◌ְb; ) LATIN SMALL LETTER A, CHISOI SIGN SISO, HEBREW POINT SHEVA, DEVANAGARI SIGN VIRAMA, COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, LATIN SMALL LETTER B 0061 093C 16FF0 0334 16FF0 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062; # (a◌𖿰़◌̴𖿰b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; ) LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, VIETNAMESE ALTERNATE READING MARK CA, LATIN SMALL LETTER B 0061 16FF0 093C 16FF0 0334 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062;0061 0334 16FF0 16FF0 093C 0062; # (a𖿰◌𖿰़◌̴b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; a◌̴𖿰𖿰◌़b; ) LATIN SMALL LETTER A, VIETNAMESE ALTERNATE READING MARK CA, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, LATIN SMALL LETTER B 0061 093C 16FF0 0334 16FF1 0062;0061 0334 16FF0 16FF1 093C 0062;0061 0334 16FF0 16FF1 093C 0062;0061 0334 16FF0 16FF1 093C 0062;0061 0334 16FF0 16FF1 093C 0062; # (a◌𖿰़◌̴𖿱b; a◌̴𖿰𖿱◌़b; a◌̴𖿰𖿱◌़b; a◌̴𖿰𖿱◌़b; a◌̴𖿰𖿱◌़b; ) LATIN SMALL LETTER A, DEVANAGARI SIGN NUKTA, VIETNAMESE ALTERNATE READING MARK CA, COMBINING TILDE OVERLAY, VIETNAMESE ALTERNATE READING MARK NHAY, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index f19bcc137..a9eb0c5ab 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2024-10-18, 17:34:51 GMT +# Date: 2024-11-15, 15:58:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -702,7 +702,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E 10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY +10EFA..10EFC ; Other_Alphabetic # Mn [3] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC COMBINING ALEF OVERLAY 11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA @@ -809,6 +809,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA +11B60 ; Other_Alphabetic # Mn SHARADA VOWEL SIGN OE +11B61 ; Other_Alphabetic # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Other_Alphabetic # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Other_Alphabetic # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Other_Alphabetic # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Other_Alphabetic # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -842,6 +848,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1611E..16129 ; Other_Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK 1612A..1612C ; Other_Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 1612D..1612E ; Other_Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA +16D98 ; Other_Alphabetic # Mn CHISOI SIGN ANUSVARA 16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -858,7 +865,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1495 +# Total code points: 1506 # ================================================ @@ -871,6 +878,8 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER +16FF2..16FF3 ; Ideographic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Ideographic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; Ideographic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; Ideographic # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -885,7 +894,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106509 +# Total code points: 106514 # ================================================ @@ -947,7 +956,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 0B3C ; Diacritic # Mn ORIYA SIGN NUKTA 0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA -0B55 ; Diacritic # Mn ORIYA SIGN OVERLINE +0B53..0B55 ; Diacritic # Mn [3] ORIYA SIGN DOT ABOVE..ORIYA SIGN OVERLINE 0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA 0C3C ; Diacritic # Mn TELUGU SIGN NUKTA 0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA @@ -990,6 +999,8 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Diacritic # Me COMBINING PARENTHESES OVERLAY 1AC1..1ACB ; Diacritic # Mn [11] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING TRIPLE ACUTE ACCENT +1ACF..1ADD ; Diacritic # Mn [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Diacritic # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B34 ; Diacritic # Mn BALINESE SIGN REREKAN 1B44 ; Diacritic # Mc BALINESE ADEG ADEG 1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG @@ -1034,6 +1045,7 @@ A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F1 ; Diacritic # Lm MODIFIER LETTER CAPITAL S A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A806 ; Diacritic # Mn SYLOTI NAGRI SIGN HASANTA A82C ; Diacritic # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA @@ -1077,6 +1089,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK 10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EFA ; Diacritic # Mn ARABIC DOUBLE VERTICAL BAR BELOW 10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -1120,6 +1133,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA 11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +11DD9 ; Diacritic # Lm TOLONG SIKI SIGN SELA 11F41 ; Diacritic # Mc KAWI SIGN KILLER 11F42 ; Diacritic # Mn KAWI CONJOINER 11F5A ; Diacritic # Mn KAWI SIGN NUKTA @@ -1128,6 +1142,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16D6B..16D6C ; Diacritic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D9D ; Diacritic # Mn CHISOI SIGN SISO 16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY @@ -1150,7 +1165,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1178 +# Total code points: 1211 # ================================================ @@ -1160,7 +1175,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 07FA ; Extender # Lm NKO LAJANYALAN 0A71 ; Extender # Mn GURMUKHI ADDAK 0AFB ; Extender # Mn GUJARATI SIGN SHADDA -0B55 ; Extender # Mn ORIYA SIGN OVERLINE +0B54..0B55 ; Extender # Mn [2] ORIYA SIGN DOUBLE DOT ABOVE..ORIYA SIGN OVERLINE 0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK 0EC6 ; Extender # Lm LAO KO LA 180A ; Extender # Po MONGOLIAN NIRUGU @@ -1190,14 +1205,16 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 113D3 ; Extender # Lo TULU-TIGALARI SIGN PLUTA 115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 11A98 ; Extender # Mn SOYOMBO GEMINATION MARK +11DD9 ; Extender # Lm TOLONG SIKI SIGN SELA 16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM 16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; Extender # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E5EF ; Extender # Mn OL ONAL SIGN IKIR 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 59 +# Total code points: 63 # ================================================ @@ -1220,7 +1237,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; Other_Lowercase # Lm MODIFIER LETTER US -A7F2..A7F4 ; Other_Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Other_Lowercase # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W @@ -1230,7 +1247,7 @@ AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -# Total code points: 311 +# Total code points: 312 # ================================================ @@ -1809,9 +1826,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR 2B74..2B75 ; Pattern_Syntax # Cn [2] .. -2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B96 ; Pattern_Syntax # Cn -2B97..2BFF ; Pattern_Syntax # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; Pattern_Syntax # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER 2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET 2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index a3b3ea090..03d7a8b30 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-10-16, 17:25:13 GMT +# Date: 2024-11-15, 15:58:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -180,6 +180,7 @@ blk; Bamum_Sup ; Bamum_Supplement blk; Bassa_Vah ; Bassa_Vah blk; Batak ; Batak blk; Bengali ; Bengali +blk; Beria_Erfe ; Beria_Erfe blk; Bhaiksuki ; Bhaiksuki blk; Block_Elements ; Block_Elements blk; Bopomofo ; Bopomofo @@ -197,6 +198,7 @@ blk; Cham ; Cham blk; Cherokee ; Cherokee blk; Cherokee_Sup ; Cherokee_Supplement blk; Chess_Symbols ; Chess_Symbols +blk; Chisoi ; Chisoi blk; Chorasmian ; Chorasmian blk; CJK ; CJK_Unified_Ideographs blk; CJK_Compat ; CJK_Compatibility @@ -420,9 +422,11 @@ blk; Runic ; Runic blk; Samaritan ; Samaritan blk; Saurashtra ; Saurashtra blk; Sharada ; Sharada +blk; Sharada_Sup ; Sharada_Supplement blk; Shavian ; Shavian blk; Shorthand_Format_Controls ; Shorthand_Format_Controls blk; Siddham ; Siddham +blk; Sidetic ; Sidetic blk; Sinhala ; Sinhala blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers blk; Small_Forms ; Small_Form_Variants @@ -472,6 +476,7 @@ blk; Tibetan ; Tibetan blk; Tifinagh ; Tifinagh blk; Tirhuta ; Tirhuta blk; Todhri ; Todhri +blk; Tolong_Siki ; Tolong_Siki blk; Toto ; Toto blk; Transport_And_Map ; Transport_And_Map_Symbols blk; Tulu_Tigalari ; Tulu_Tigalari @@ -1090,6 +1095,7 @@ jg ; Taw ; Taw jg ; Teh_Marbuta ; Teh_Marbuta jg ; Teh_Marbuta_Goal ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal jg ; Teth ; Teth +jg ; Thin_Noon ; Thin_Noon jg ; Thin_Yeh ; Thin_Yeh jg ; Vertical_Tail ; Vertical_Tail jg ; Waw ; Waw @@ -1332,6 +1338,7 @@ sc ; Cans ; Canadian_Aboriginal sc ; Cari ; Carian sc ; Cham ; Cham sc ; Cher ; Cherokee +sc ; Chis ; Chisoi sc ; Chrs ; Chorasmian sc ; Copt ; Coptic ; Qaac sc ; Cpmn ; Cypro_Minoan @@ -1430,6 +1437,7 @@ sc ; Phlp ; Psalter_Pahlavi sc ; Phnx ; Phoenician sc ; Plrd ; Miao sc ; Prti ; Inscriptional_Parthian +sc ; Qaba ; Beria_Erfe sc ; Rjng ; Rejang sc ; Rohg ; Hanifi_Rohingya sc ; Runr ; Runic @@ -1440,6 +1448,7 @@ sc ; Sgnw ; SignWriting sc ; Shaw ; Shavian sc ; Shrd ; Sharada sc ; Sidd ; Siddham +sc ; Sidt ; Sidetic sc ; Sind ; Khudawadi sc ; Sinh ; Sinhala sc ; Sogd ; Sogdian @@ -1466,6 +1475,7 @@ sc ; Tibt ; Tibetan sc ; Tirh ; Tirhuta sc ; Tnsa ; Tangsa sc ; Todr ; Todhri +sc ; Tols ; Tolong_Siki sc ; Toto ; Toto sc ; Tutg ; Tulu_Tigalari sc ; Ugar ; Ugaritic diff --git a/unicodetools/data/ucd/dev/ScriptExtensions.txt b/unicodetools/data/ucd/dev/ScriptExtensions.txt index 140901a87..f63e2cd5f 100644 --- a/unicodetools/data/ucd/dev/ScriptExtensions.txt +++ b/unicodetools/data/ucd/dev/ScriptExtensions.txt @@ -1,5 +1,5 @@ -# ScriptExtensions-16.0.0.txt -# Date: 2024-07-30, 19:38:00 GMT +# ScriptExtensions-17.0.0.txt +# Date: 2024-11-14, 19:49:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -81,8 +81,8 @@ 06D4 ; Arab Rohg # Po ARABIC FULL STOP 0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh #Mn DEVANAGARI STRESS SIGN UDATTA 0952 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh #Mn DEVANAGARI STRESS SIGN ANUDATTA -0964 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh #Po DEVANAGARI DANDA -0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Gukh Guru Knda Limb Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh #Po DEVANAGARI DOUBLE DANDA +0964 ; Beng Chis Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh #Po DEVANAGARI DANDA +0965 ; Beng Chis Deva Dogr Gong Gonm Gran Gujr Gukh Guru Knda Limb Mahj Mlym Nand Onao Orya Sind Sinh Sylo Takr Taml Telu Tirh #Po DEVANAGARI DOUBLE DANDA 0966..096F ; Deva Dogr Kthi Mahj # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE 09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE 0A66..0A6F ; Guru Mult # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 44508ae85..0b3d28238 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2024-10-18, 17:35:23 GMT +# Date: 2024-11-15, 15:58:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -306,8 +306,7 @@ 2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; Common # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; Common # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER 2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET 2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET @@ -633,7 +632,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9053 +# Total code points: 9054 # ================================================ @@ -648,8 +647,8 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG 01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; Latin # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; Latin # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN @@ -676,11 +675,8 @@ A770 ; Latin # Lm MODIFIER LETTER US A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; Latin # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; Latin # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; Latin # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; Latin # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; Latin # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; Latin # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -702,7 +698,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1487 +# Total code points: 1492 # ================================================ @@ -869,7 +865,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE 0870..0887 ; Arabic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; Arabic # Sk ARABIC RAISED ROUND DOT -0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Arabic # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; Arabic # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -878,11 +874,13 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; Arabic # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; Arabic # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD40..FD4F ; Arabic # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; Arabic # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; Arabic # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; Arabic # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; Arabic # Sc RIAL SIGN FDFD..FDFF ; Arabic # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -890,7 +888,11 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; Arabic # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; Arabic # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; Arabic # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; Arabic # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; Arabic # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM @@ -926,7 +928,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1373 +# Total code points: 1413 # ================================================ @@ -1014,8 +1016,9 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 09FC ; Bengali # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; Bengali # Po BENGALI ABBREVIATION SIGN 09FE ; Bengali # Mn BENGALI SANDHI MARK +09FF ; Bengali # Lo BENGALI LETTER SANSKRIT BA -# Total code points: 96 +# Total code points: 97 # ================================================ @@ -1092,7 +1095,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; Oriya # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Oriya # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Oriya # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Oriya # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -1102,7 +1105,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0B71 ; Oriya # Lo ORIYA LETTER WA 0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS -# Total code points: 91 +# Total code points: 93 # ================================================ @@ -1155,7 +1158,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; Telugu # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Telugu # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Telugu # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -1163,7 +1166,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F ; Telugu # So TELUGU SIGN TUUMU -# Total code points: 100 +# Total code points: 101 # ================================================ @@ -1186,14 +1189,14 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; Kannada # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Kannada # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0CF3 ; Kannada # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT -# Total code points: 91 +# Total code points: 92 # ================================================ @@ -1594,6 +1597,8 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 16FE2 ; Han # Po OLD CHINESE HOOK MARK 16FE3 ; Han # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; Han # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; Han # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 20000..2A6DF ; Han # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Han # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -1604,7 +1609,7 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 99030 +# Total code points: 99035 # ================================================ @@ -1647,7 +1652,8 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE 0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT 1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Inherited # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Inherited # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Inherited # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL @@ -1676,7 +1682,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON 1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 657 +# Total code points: 684 # ================================================ @@ -2347,8 +2353,14 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 111DB ; Sharada # Po SHARADA SIGN SIDDHAM 111DC ; Sharada # Lo SHARADA HEADSTROKE 111DD..111DF ; Sharada # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +11B60 ; Sharada # Mn SHARADA VOWEL SIGN OE +11B61 ; Sharada # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Sharada # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Sharada # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Sharada # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Sharada # Mc SHARADA VOWEL SIGN CANDRA O -# Total code points: 96 +# Total code points: 104 # ================================================ @@ -3125,4 +3137,36 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # Total code points: 80 +# ================================================ + +16D80..16D97 ; Chisoi # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; Chisoi # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; Chisoi # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; Chisoi # Mn CHISOI SIGN SISO +16DA0..16DA9 ; Chisoi # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE + +# Total code points: 40 + +# ================================================ + +10940..1095C ; Sidetic # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 + +# Total code points: 29 + +# ================================================ + +11DB0..11DD8 ; Tolong_Siki # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; Tolong_Siki # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; Tolong_Siki # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; Tolong_Siki # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE + +# Total code points: 54 + +# ================================================ + +16EA0..16EB8 ; Beria_Erfe # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; Beria_Erfe # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY + +# Total code points: 50 + # EOF diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index 59284fb58..401b8d8bc 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -12405,9 +12405,12 @@ U+18D1B kTGT_MergedSrc N5217-27 U+18D1B kRSTUnicode 36.7 U+18D1C kTGT_MergedSrc N5217-28 U+18D1C kRSTUnicode 141.9 +<<<<<<< HEAD U+18D1D kTGT_MergedSrc H2021-309801 U+18D1D kRSTUnicode 106.13 U+18D1E kTGT_MergedSrc H2021-834001 U+18D1E kRSTUnicode 579.14 +======= +>>>>>>> la-vache/main # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 02663f9a2..e69059b21 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -659,7 +659,7 @@ 0292;LATIN SMALL LETTER EZH;Ll;0;L;;;;;N;LATIN SMALL LETTER YOGH;;01B7;;01B7 0293;LATIN SMALL LETTER EZH WITH CURL;Ll;0;L;;;;;N;LATIN SMALL LETTER YOGH CURL;;;; 0294;LATIN LETTER GLOTTAL STOP;Lo;0;L;;;;;N;;;;; -0295;LATIN LETTER PHARYNGEAL VOICED FRICATIVE;Ll;0;L;;;;;N;LATIN LETTER REVERSED GLOTTAL STOP;;;; +0295;LATIN LETTER PHARYNGEAL VOICED FRICATIVE;Lo;0;L;;;;;N;LATIN LETTER REVERSED GLOTTAL STOP;;;; 0296;LATIN LETTER INVERTED GLOTTAL STOP;Ll;0;L;;;;;N;;;;; 0297;LATIN LETTER STRETCHED C;Ll;0;L;;;;;N;;;;; 0298;LATIN LETTER BILABIAL CLICK;Ll;0;L;;;;;N;LATIN LETTER BULLSEYE;;;; @@ -2121,6 +2121,7 @@ 088C;ARABIC LETTER TAH WITH THREE DOTS BELOW;Lo;0;AL;;;;;N;;;;; 088D;ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 088E;ARABIC VERTICAL TAIL;Lo;0;AL;;;;;N;;;;; +088F;ARABIC LETTER NOON WITH RING ABOVE;Lo;0;AL;;;;;N;;;;; 0890;ARABIC POUND MARK ABOVE;Cf;0;AN;;;;;N;;;;; 0891;ARABIC PIASTRE MARK ABOVE;Cf;0;AN;;;;;N;;;;; 0897;ARABIC PEPET;Mn;230;NSM;;;;;N;;;;; @@ -2452,6 +2453,7 @@ 09FC;BENGALI LETTER VEDIC ANUSVARA;Lo;0;L;;;;;N;;;;; 09FD;BENGALI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; 09FE;BENGALI SANDHI MARK;Mn;230;NSM;;;;;N;;;;; +09FF;BENGALI LETTER SANSKRIT BA;Lo;0;L;;;;;N;;;;; 0A01;GURMUKHI SIGN ADAK BINDI;Mn;0;NSM;;;;;N;;;;; 0A02;GURMUKHI SIGN BINDI;Mn;0;NSM;;;;;N;;;;; 0A03;GURMUKHI SIGN VISARGA;Mc;0;L;;;;;N;;;;; @@ -2686,6 +2688,8 @@ 0B4B;ORIYA VOWEL SIGN O;Mc;0;L;0B47 0B3E;;;;N;;;;; 0B4C;ORIYA VOWEL SIGN AU;Mc;0;L;0B47 0B57;;;;N;;;;; 0B4D;ORIYA SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; +0B53;ORIYA SIGN DOT ABOVE;Mn;0;NSM;;;;;N;;;;; +0B54;ORIYA SIGN DOUBLE DOT ABOVE;Mn;0;NSM;;;;;N;;;;; 0B55;ORIYA SIGN OVERLINE;Mn;0;NSM;;;;;N;;;;; 0B56;ORIYA AI LENGTH MARK;Mn;0;NSM;;;;;N;;;;; 0B57;ORIYA AU LENGTH MARK;Mc;0;L;;;;;N;;;;; @@ -2862,6 +2866,7 @@ 0C58;TELUGU LETTER TSA;Lo;0;L;;;;;N;;;;; 0C59;TELUGU LETTER DZA;Lo;0;L;;;;;N;;;;; 0C5A;TELUGU LETTER RRRA;Lo;0;L;;;;;N;;;;; +0C5C;TELUGU ARCHAIC SHRII;Lo;0;L;;;;;N;;;;; 0C5D;TELUGU LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;; 0C60;TELUGU LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; 0C61;TELUGU LETTER VOCALIC LL;Lo;0;L;;;;;N;;;;; @@ -2958,6 +2963,7 @@ 0CCD;KANNADA SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; 0CD5;KANNADA LENGTH MARK;Mc;0;L;;;;;N;;;;; 0CD6;KANNADA AI LENGTH MARK;Mc;0;L;;;;;N;;;;; +0CDC;KANNADA ARCHAIC SHRII;Lo;0;L;;;;;N;;;;; 0CDD;KANNADA LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;; 0CDE;KANNADA LETTER FA;Lo;0;L;;;;;N;;;;; 0CE0;KANNADA LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; @@ -6137,6 +6143,33 @@ 1ACC;COMBINING LATIN SMALL LETTER INSULAR G;Mn;230;NSM;;;;;N;;;;; 1ACD;COMBINING LATIN SMALL LETTER INSULAR R;Mn;230;NSM;;;;;N;;;;; 1ACE;COMBINING LATIN SMALL LETTER INSULAR T;Mn;230;NSM;;;;;N;;;;; +1ACF;COMBINING DOUBLE CARON;Mn;230;NSM;;;;;N;;;;; +1AD0;COMBINING VERTICAL-LINE-ACUTE;Mn;230;NSM;;;;;N;;;;; +1AD1;COMBINING GRAVE-VERTICAL-LINE;Mn;230;NSM;;;;;N;;;;; +1AD2;COMBINING VERTICAL-LINE-GRAVE;Mn;230;NSM;;;;;N;;;;; +1AD3;COMBINING ACUTE-VERTICAL-LINE;Mn;230;NSM;;;;;N;;;;; +1AD4;COMBINING VERTICAL-LINE-MACRON;Mn;230;NSM;;;;;N;;;;; +1AD5;COMBINING MACRON-VERTICAL-LINE;Mn;230;NSM;;;;;N;;;;; +1AD6;COMBINING VERTICAL-LINE-ACUTE-GRAVE;Mn;230;NSM;;;;;N;;;;; +1AD7;COMBINING VERTICAL-LINE-GRAVE-ACUTE;Mn;230;NSM;;;;;N;;;;; +1AD8;COMBINING MACRON-ACUTE-GRAVE;Mn;230;NSM;;;;;N;;;;; +1AD9;COMBINING SHARP SIGN;Mn;230;NSM;;;;;N;;;;; +1ADA;COMBINING FLAT SIGN;Mn;230;NSM;;;;;N;;;;; +1ADB;COMBINING DOWN TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1ADC;COMBINING DIAERESIS WITH RAISED LEFT DOT;Mn;230;NSM;;;;;N;;;;; +1ADD;COMBINING DOT-AND-RING BELOW;Mn;220;NSM;;;;;N;;;;; +1AE0;COMBINING LEFT TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE1;COMBINING RIGHT TACK ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE2;COMBINING MINUS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE3;COMBINING INVERTED BRIDGE ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE4;COMBINING SQUARE ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE5;COMBINING SEAGULL ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE6;COMBINING DOUBLE ARCH BELOW;Mn;220;NSM;;;;;N;;;;; +1AE7;COMBINING DOUBLE ARCH ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE8;COMBINING EQUALS SIGN ABOVE;Mn;230;NSM;;;;;N;;;;; +1AE9;COMBINING LEFT ANGLE CENTRED ABOVE;Mn;230;NSM;;;;;N;;;;; +1AEA;COMBINING UPWARDS ARROW ABOVE;Mn;230;NSM;;;;;N;;;;; +1AEB;COMBINING DOUBLE RIGHTWARDS ARROW ABOVE;Mn;234;NSM;;;;;N;;;;; 1B00;BALINESE SIGN ULU RICEM;Mn;0;NSM;;;;;N;;;;; 1B01;BALINESE SIGN ULU CANDRA;Mn;0;NSM;;;;;N;;;;; 1B02;BALINESE SIGN CECEK;Mn;0;NSM;;;;;N;;;;; @@ -10239,6 +10272,7 @@ 2B93;NEWLINE RIGHT;So;0;ON;;;;;N;;;;; 2B94;FOUR CORNER ARROWS CIRCLING ANTICLOCKWISE;So;0;ON;;;;;N;;;;; 2B95;RIGHTWARDS BLACK ARROW;So;0;ON;;;;;N;;;;; +2B96;EQUALS SIGN WITH INFINITY ABOVE;So;0;ON;;;;;N;;;;; 2B97;SYMBOL FOR TYPE A ELECTRONICS;So;0;ON;;;;;N;;;;; 2B98;THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD;So;0;ON;;;;;N;;;;; 2B99;THREE-D RIGHT-LIGHTED UPWARDS EQUILATERAL ARROWHEAD;So;0;ON;;;;;N;;;;; @@ -14274,10 +14308,14 @@ A7CA;LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY;Ll;0;L;;;;;N;;;A7C9;;A7C9 A7CB;LATIN CAPITAL LETTER RAMS HORN;Lu;0;L;;;;;N;;;;0264; A7CC;LATIN CAPITAL LETTER S WITH DIAGONAL STROKE;Lu;0;L;;;;;N;;;;A7CD; A7CD;LATIN SMALL LETTER S WITH DIAGONAL STROKE;Ll;0;L;;;;;N;;;A7CC;;A7CC +A7CE;LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE;Lu;0;L;;;;;N;;;;A7CF; +A7CF;LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE;Ll;0;L;;;;;N;;;A7CE;;A7CE A7D0;LATIN CAPITAL LETTER CLOSED INSULAR G;Lu;0;L;;;;;N;;;;A7D1; A7D1;LATIN SMALL LETTER CLOSED INSULAR G;Ll;0;L;;;;;N;;;A7D0;;A7D0 -A7D3;LATIN SMALL LETTER DOUBLE THORN;Ll;0;L;;;;;N;;;;; -A7D5;LATIN SMALL LETTER DOUBLE WYNN;Ll;0;L;;;;;N;;;;; +A7D2;LATIN CAPITAL LETTER DOUBLE THORN;Lu;0;L;;;;;N;;;;A7D3; +A7D3;LATIN SMALL LETTER DOUBLE THORN;Ll;0;L;;;;;N;;;A7D2;;A7D2 +A7D4;LATIN CAPITAL LETTER DOUBLE WYNN;Lu;0;L;;;;;N;;;;A7D5; +A7D5;LATIN SMALL LETTER DOUBLE WYNN;Ll;0;L;;;;;N;;;A7D4;;A7D4 A7D6;LATIN CAPITAL LETTER MIDDLE SCOTS S;Lu;0;L;;;;;N;;;;A7D7; A7D7;LATIN SMALL LETTER MIDDLE SCOTS S;Ll;0;L;;;;;N;;;A7D6;;A7D6 A7D8;LATIN CAPITAL LETTER SIGMOID S;Lu;0;L;;;;;N;;;;A7D9; @@ -14285,6 +14323,7 @@ A7D9;LATIN SMALL LETTER SIGMOID S;Ll;0;L;;;;;N;;;A7D8;;A7D8 A7DA;LATIN CAPITAL LETTER LAMBDA;Lu;0;L;;;;;N;;;;A7DB; A7DB;LATIN SMALL LETTER LAMBDA;Ll;0;L;;;;;N;;;A7DA;;A7DA A7DC;LATIN CAPITAL LETTER LAMBDA WITH STROKE;Lu;0;L;;;;;N;;;;019B; +A7F1;MODIFIER LETTER CAPITAL S;Lm;0;L; 0053;;;;N;;;;; A7F2;MODIFIER LETTER CAPITAL C;Lm;0;L; 0043;;;;N;;;;; A7F3;MODIFIER LETTER CAPITAL F;Lm;0;L; 0046;;;;N;;;;; A7F4;MODIFIER LETTER CAPITAL Q;Lm;0;L; 0051;;;;N;;;;; @@ -15925,6 +15964,22 @@ FBBF;ARABIC SYMBOL RING;Sk;0;AL;;;;;N;;;;; FBC0;ARABIC SYMBOL SMALL TAH ABOVE;Sk;0;AL;;;;;N;;;;; FBC1;ARABIC SYMBOL SMALL TAH BELOW;Sk;0;AL;;;;;N;;;;; FBC2;ARABIC SYMBOL WASLA ABOVE;Sk;0;AL;;;;;N;;;;; +FBC3;ARABIC LIGATURE JALLA WA-ALAA;So;0;ON;;;;;N;;;;; +FBC4;ARABIC LIGATURE DAAMAT BARAKAATUHUM;So;0;ON;;;;;N;;;;; +FBC5;ARABIC LIGATURE RAHMATU ALLAAHI TAAALAA ALAYH;So;0;ON;;;;;N;;;;; +FBC6;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIM;So;0;ON;;;;;N;;;;; +FBC7;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIMAA;So;0;ON;;;;;N;;;;; +FBC8;ARABIC LIGATURE RAHIMAHUM ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBC9;ARABIC LIGATURE RAHIMAHUMAA ALLAAH;So;0;ON;;;;;N;;;;; +FBCA;ARABIC LIGATURE RAHIMAHUMAA ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCB;ARABIC LIGATURE RADI ALLAHU TAAALAA ANHUM;So;0;ON;;;;;N;;;;; +FBCC;ARABIC LIGATURE HAFIZAHU ALLAAH;So;0;ON;;;;;N;;;;; +FBCD;ARABIC LIGATURE HAFIZAHU ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCE;ARABIC LIGATURE HAFIZAHUM ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBCF;ARABIC LIGATURE HAFIZAHUMAA ALLAAHU TAAALAA;So;0;ON;;;;;N;;;;; +FBD0;ARABIC LIGATURE SALLALLAAHU TAAALAA ALAYHI WA-SALLAM;So;0;ON;;;;;N;;;;; +FBD1;ARABIC LIGATURE AJJAL ALLAAHU FARAJAHU ASH-SHAREEF;So;0;ON;;;;;N;;;;; +FBD2;ARABIC LIGATURE ALAYHI AR-RAHMAH;So;0;ON;;;;;N;;;;; FBD3;ARABIC LETTER NG ISOLATED FORM;Lo;0;AL; 06AD;;;;N;;;;; FBD4;ARABIC LETTER NG FINAL FORM;Lo;0;AL; 06AD;;;;N;;;;; FBD5;ARABIC LETTER NG INITIAL FORM;Lo;0;AL; 06AD;;;;N;;;;; @@ -16370,6 +16425,8 @@ FD8C;ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM;Lo;0;AL; 0645 FD8D;ARABIC LIGATURE MEEM WITH JEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0645 062C 0645;;;;N;;;;; FD8E;ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM;Lo;0;AL; 0645 062E 062C;;;;N;;;;; FD8F;ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM;Lo;0;AL; 0645 062E 0645;;;;N;;;;; +FD90;ARABIC LIGATURE RAHMATU ALLAAHI ALAYH;So;0;ON;;;;;N;;;;; +FD91;ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA;So;0;ON;;;;;N;;;;; FD92;ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM;Lo;0;AL; 0645 062C 062E;;;;N;;;;; FD93;ARABIC LIGATURE HEH WITH MEEM WITH JEEM INITIAL FORM;Lo;0;AL; 0647 0645 062C;;;;N;;;;; FD94;ARABIC LIGATURE HEH WITH MEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0647 0645 0645;;;;N;;;;; @@ -16424,6 +16481,13 @@ FDC4;ARABIC LIGATURE AIN WITH JEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0639 FDC5;ARABIC LIGATURE SAD WITH MEEM WITH MEEM INITIAL FORM;Lo;0;AL; 0635 0645 0645;;;;N;;;;; FDC6;ARABIC LIGATURE SEEN WITH KHAH WITH YEH FINAL FORM;Lo;0;AL; 0633 062E 064A;;;;N;;;;; FDC7;ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM;Lo;0;AL; 0646 062C 064A;;;;N;;;;; +FDC8;ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA;So;0;ON;;;;;N;;;;; +FDC9;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANH;So;0;ON;;;;;N;;;;; +FDCA;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHAA;So;0;ON;;;;;N;;;;; +FDCB;ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHUMAA;So;0;ON;;;;;N;;;;; +FDCC;ARABIC LIGATURE SALLALLAHU ALAYHI WA-ALAA AALIHEE WA-SALLAM;So;0;ON;;;;;N;;;;; +FDCD;ARABIC LIGATURE AJJAL ALLAAHU TAAALAA FARAJAHU ASH-SHAREEF;So;0;ON;;;;;N;;;;; +FDCE;ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH;So;0;ON;;;;;N;;;;; FDCF;ARABIC LIGATURE SALAAMUHU ALAYNAA;So;0;ON;;;;;N;;;;; FDF0;ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM;Lo;0;AL; 0635 0644 06D2;;;;N;;;;; FDF1;ARABIC LIGATURE QALA USED AS KORANIC STOP SIGN ISOLATED FORM;Lo;0;AL; 0642 0644 06D2;;;;N;;;;; @@ -18708,6 +18772,35 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10938;LYDIAN LETTER NN;Lo;0;R;;;;;N;;;;; 10939;LYDIAN LETTER C;Lo;0;R;;;;;N;;;;; 1093F;LYDIAN TRIANGULAR MARK;Po;0;R;;;;;N;;;;; +10940;SIDETIC LETTER N01;Lo;0;R;;;;;N;;;;; +10941;SIDETIC LETTER N02;Lo;0;R;;;;;N;;;;; +10942;SIDETIC LETTER N03;Lo;0;R;;;;;N;;;;; +10943;SIDETIC LETTER N04;Lo;0;R;;;;;N;;;;; +10944;SIDETIC LETTER N05;Lo;0;R;;;;;N;;;;; +10945;SIDETIC LETTER N06;Lo;0;R;;;;;N;;;;; +10946;SIDETIC LETTER N07;Lo;0;R;;;;;N;;;;; +10947;SIDETIC LETTER N08;Lo;0;R;;;;;N;;;;; +10948;SIDETIC LETTER N09;Lo;0;R;;;;;N;;;;; +10949;SIDETIC LETTER N10;Lo;0;R;;;;;N;;;;; +1094A;SIDETIC LETTER N11;Lo;0;R;;;;;N;;;;; +1094B;SIDETIC LETTER N12;Lo;0;R;;;;;N;;;;; +1094C;SIDETIC LETTER N13;Lo;0;R;;;;;N;;;;; +1094D;SIDETIC LETTER N14;Lo;0;R;;;;;N;;;;; +1094E;SIDETIC LETTER N15;Lo;0;R;;;;;N;;;;; +1094F;SIDETIC LETTER N16;Lo;0;R;;;;;N;;;;; +10950;SIDETIC LETTER N17;Lo;0;R;;;;;N;;;;; +10951;SIDETIC LETTER N18;Lo;0;R;;;;;N;;;;; +10952;SIDETIC LETTER N19;Lo;0;R;;;;;N;;;;; +10953;SIDETIC LETTER N20;Lo;0;R;;;;;N;;;;; +10954;SIDETIC LETTER N21;Lo;0;R;;;;;N;;;;; +10955;SIDETIC LETTER N22;Lo;0;R;;;;;N;;;;; +10956;SIDETIC LETTER N23;Lo;0;R;;;;;N;;;;; +10957;SIDETIC LETTER N24;Lo;0;R;;;;;N;;;;; +10958;SIDETIC LETTER N25;Lo;0;R;;;;;N;;;;; +10959;SIDETIC LETTER N26;Lo;0;R;;;;;N;;;;; +1095A;SIDETIC LETTER N27;Lo;0;R;;;;;N;;;;; +1095B;SIDETIC LETTER N28;Lo;0;R;;;;;N;;;;; +1095C;SIDETIC LETTER N29;Lo;0;R;;;;;N;;;;; 10980;MEROITIC HIEROGLYPHIC LETTER A;Lo;0;R;;;;;N;;;;; 10981;MEROITIC HIEROGLYPHIC LETTER E;Lo;0;R;;;;;N;;;;; 10982;MEROITIC HIEROGLYPHIC LETTER I;Lo;0;R;;;;;N;;;;; @@ -19541,6 +19634,20 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10EC5;ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW;Lm;0;AL;;;;;N;;;;; +10EC6;ARABIC LETTER THIN NOON;Lo;0;AL;;;;;N;;;;; +10EC7;ARABIC LETTER YEH WITH FOUR DOTS BELOW;Lo;0;AL;;;;;N;;;;; +10ED0;ARABIC BIBLICAL END OF VERSE;Po;0;ON;;;;;N;;;;; +10ED1;ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED2;ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED3;ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM;So;0;ON;;;;;N;;;;; +10ED4;ARABIC LIGATURE QADDASA ALLAAHU SIRRAH;So;0;ON;;;;;N;;;;; +10ED5;ARABIC LIGATURE QUDDISA SIRRAHUM;So;0;ON;;;;;N;;;;; +10ED6;ARABIC LIGATURE QUDDISA SIRRAHUMAA;So;0;ON;;;;;N;;;;; +10ED7;ARABIC LIGATURE QUDDISAT ASRAARUHUM;So;0;ON;;;;;N;;;;; +10ED8;ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH;So;0;ON;;;;;N;;;;; +10EFA;ARABIC DOUBLE VERTICAL BAR BELOW;Mn;220;NSM;;;;;N;;;;; +10EFB;ARABIC SMALL LOW NOON;Mn;220;NSM;;;;;N;;;;; 10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;; 10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;; 10EFE;ARABIC SMALL LOW WORD QASR;Mn;220;NSM;;;;;N;;;;; @@ -21521,6 +21628,14 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11B07;DEVANAGARI SIGN WESTERN NINE-LIKE BHALE;Po;0;L;;;;;N;;;;; 11B08;DEVANAGARI SIGN REVERSED NINE-LIKE BHALE;Po;0;L;;;;;N;;;;; 11B09;DEVANAGARI SIGN MINDU;Po;0;L;;;;;N;;;;; +11B60;SHARADA VOWEL SIGN OE;Mn;0;NSM;;;;;N;;;;; +11B61;SHARADA VOWEL SIGN OOE;Mc;0;L;;;;;N;;;;; +11B62;SHARADA VOWEL SIGN UE;Mn;0;NSM;;;;;N;;;;; +11B63;SHARADA VOWEL SIGN UUE;Mn;0;NSM;;;;;N;;;;; +11B64;SHARADA VOWEL SIGN SHORT E;Mn;0;NSM;;;;;N;;;;; +11B65;SHARADA VOWEL SIGN SHORT O;Mc;0;L;;;;;N;;;;; +11B66;SHARADA VOWEL SIGN CANDRA E;Mn;0;NSM;;;;;N;;;;; +11B67;SHARADA VOWEL SIGN CANDRA O;Mc;0;L;;;;;N;;;;; 11BC0;SUNUWAR LETTER DEVI;Lo;0;L;;;;;N;;;;; 11BC1;SUNUWAR LETTER TASLA;Lo;0;L;;;;;N;;;;; 11BC2;SUNUWAR LETTER EKO;Lo;0;L;;;;;N;;;;; @@ -21868,6 +21983,60 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11DA7;GUNJALA GONDI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 11DA8;GUNJALA GONDI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 11DA9;GUNJALA GONDI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11DB0;TOLONG SIKI LETTER I;Lo;0;L;;;;;N;;;;; +11DB1;TOLONG SIKI LETTER E;Lo;0;L;;;;;N;;;;; +11DB2;TOLONG SIKI LETTER U;Lo;0;L;;;;;N;;;;; +11DB3;TOLONG SIKI LETTER O;Lo;0;L;;;;;N;;;;; +11DB4;TOLONG SIKI LETTER A;Lo;0;L;;;;;N;;;;; +11DB5;TOLONG SIKI LETTER AA;Lo;0;L;;;;;N;;;;; +11DB6;TOLONG SIKI LETTER P;Lo;0;L;;;;;N;;;;; +11DB7;TOLONG SIKI LETTER PH;Lo;0;L;;;;;N;;;;; +11DB8;TOLONG SIKI LETTER B;Lo;0;L;;;;;N;;;;; +11DB9;TOLONG SIKI LETTER BH;Lo;0;L;;;;;N;;;;; +11DBA;TOLONG SIKI LETTER M;Lo;0;L;;;;;N;;;;; +11DBB;TOLONG SIKI LETTER T;Lo;0;L;;;;;N;;;;; +11DBC;TOLONG SIKI LETTER TH;Lo;0;L;;;;;N;;;;; +11DBD;TOLONG SIKI LETTER D;Lo;0;L;;;;;N;;;;; +11DBE;TOLONG SIKI LETTER DH;Lo;0;L;;;;;N;;;;; +11DBF;TOLONG SIKI LETTER N;Lo;0;L;;;;;N;;;;; +11DC0;TOLONG SIKI LETTER TT;Lo;0;L;;;;;N;;;;; +11DC1;TOLONG SIKI LETTER TTH;Lo;0;L;;;;;N;;;;; +11DC2;TOLONG SIKI LETTER DD;Lo;0;L;;;;;N;;;;; +11DC3;TOLONG SIKI LETTER DDH;Lo;0;L;;;;;N;;;;; +11DC4;TOLONG SIKI LETTER NN;Lo;0;L;;;;;N;;;;; +11DC5;TOLONG SIKI LETTER C;Lo;0;L;;;;;N;;;;; +11DC6;TOLONG SIKI LETTER CH;Lo;0;L;;;;;N;;;;; +11DC7;TOLONG SIKI LETTER J;Lo;0;L;;;;;N;;;;; +11DC8;TOLONG SIKI LETTER JH;Lo;0;L;;;;;N;;;;; +11DC9;TOLONG SIKI LETTER NY;Lo;0;L;;;;;N;;;;; +11DCA;TOLONG SIKI LETTER K;Lo;0;L;;;;;N;;;;; +11DCB;TOLONG SIKI LETTER KH;Lo;0;L;;;;;N;;;;; +11DCC;TOLONG SIKI LETTER G;Lo;0;L;;;;;N;;;;; +11DCD;TOLONG SIKI LETTER GH;Lo;0;L;;;;;N;;;;; +11DCE;TOLONG SIKI LETTER NG;Lo;0;L;;;;;N;;;;; +11DCF;TOLONG SIKI LETTER Y;Lo;0;L;;;;;N;;;;; +11DD0;TOLONG SIKI LETTER R;Lo;0;L;;;;;N;;;;; +11DD1;TOLONG SIKI LETTER L;Lo;0;L;;;;;N;;;;; +11DD2;TOLONG SIKI LETTER V;Lo;0;L;;;;;N;;;;; +11DD3;TOLONG SIKI LETTER NNY;Lo;0;L;;;;;N;;;;; +11DD4;TOLONG SIKI LETTER S;Lo;0;L;;;;;N;;;;; +11DD5;TOLONG SIKI LETTER H;Lo;0;L;;;;;N;;;;; +11DD6;TOLONG SIKI LETTER X;Lo;0;L;;;;;N;;;;; +11DD7;TOLONG SIKI LETTER RR;Lo;0;L;;;;;N;;;;; +11DD8;TOLONG SIKI LETTER RRH;Lo;0;L;;;;;N;;;;; +11DD9;TOLONG SIKI SIGN SELA;Lm;0;L;;;;;N;;;;; +11DDA;TOLONG SIKI SIGN HECAKA;Lo;0;L;;;;;N;;;;; +11DDB;TOLONG SIKI UNGGA;Lo;0;L;;;;;N;;;;; +11DE0;TOLONG SIKI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +11DE1;TOLONG SIKI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +11DE2;TOLONG SIKI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +11DE3;TOLONG SIKI DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +11DE4;TOLONG SIKI DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +11DE5;TOLONG SIKI DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +11DE6;TOLONG SIKI DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +11DE7;TOLONG SIKI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +11DE8;TOLONG SIKI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +11DE9;TOLONG SIKI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 11EE0;MAKASAR LETTER KA;Lo;0;L;;;;;N;;;;; 11EE1;MAKASAR LETTER GA;Lo;0;L;;;;;N;;;;; 11EE2;MAKASAR LETTER NGA;Lo;0;L;;;;;N;;;;; @@ -30033,6 +30202,46 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16D77;KIRAT RAI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 16D78;KIRAT RAI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 16D79;KIRAT RAI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +16D80;CHISOI LETTER A;Lo;0;L;;;;;N;;;;; +16D81;CHISOI LETTER BA;Lo;0;L;;;;;N;;;;; +16D82;CHISOI LETTER AI;Lo;0;L;;;;;N;;;;; +16D83;CHISOI LETTER AA;Lo;0;L;;;;;N;;;;; +16D84;CHISOI LETTER GA;Lo;0;L;;;;;N;;;;; +16D85;CHISOI LETTER TA;Lo;0;L;;;;;N;;;;; +16D86;CHISOI LETTER E;Lo;0;L;;;;;N;;;;; +16D87;CHISOI LETTER SA;Lo;0;L;;;;;N;;;;; +16D88;CHISOI LETTER NA;Lo;0;L;;;;;N;;;;; +16D89;CHISOI LETTER I;Lo;0;L;;;;;N;;;;; +16D8A;CHISOI LETTER KA;Lo;0;L;;;;;N;;;;; +16D8B;CHISOI LETTER RA;Lo;0;L;;;;;N;;;;; +16D8C;CHISOI LETTER MA;Lo;0;L;;;;;N;;;;; +16D8D;CHISOI LETTER HA;Lo;0;L;;;;;N;;;;; +16D8E;CHISOI LETTER RRA;Lo;0;L;;;;;N;;;;; +16D8F;CHISOI LETTER U;Lo;0;L;;;;;N;;;;; +16D90;CHISOI LETTER DA;Lo;0;L;;;;;N;;;;; +16D91;CHISOI LETTER LA;Lo;0;L;;;;;N;;;;; +16D92;CHISOI LETTER O;Lo;0;L;;;;;N;;;;; +16D93;CHISOI LETTER NYA;Lo;0;L;;;;;N;;;;; +16D94;CHISOI LETTER NGA;Lo;0;L;;;;;N;;;;; +16D95;CHISOI LETTER CA;Lo;0;L;;;;;N;;;;; +16D96;CHISOI LETTER JA;Lo;0;L;;;;;N;;;;; +16D97;CHISOI LETTER PA;Lo;0;L;;;;;N;;;;; +16D98;CHISOI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +16D99;CHISOI LETTER YA;Lo;0;L;;;;;N;;;;; +16D9A;CHISOI LETTER DDA;Lo;0;L;;;;;N;;;;; +16D9B;CHISOI LETTER TTA;Lo;0;L;;;;;N;;;;; +16D9C;CHISOI LETTER JARAHA;Lo;0;L;;;;;N;;;;; +16D9D;CHISOI SIGN SISO;Mn;9;NSM;;;;;N;;;;; +16DA0;CHISOI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +16DA1;CHISOI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +16DA2;CHISOI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +16DA3;CHISOI DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +16DA4;CHISOI DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +16DA5;CHISOI DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +16DA6;CHISOI DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +16DA7;CHISOI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +16DA8;CHISOI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +16DA9;CHISOI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 16E40;MEDEFAIDRIN CAPITAL LETTER M;Lu;0;L;;;;;N;;;;16E60; 16E41;MEDEFAIDRIN CAPITAL LETTER S;Lu;0;L;;;;;N;;;;16E61; 16E42;MEDEFAIDRIN CAPITAL LETTER V;Lu;0;L;;;;;N;;;;16E62; @@ -30124,6 +30333,56 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16E98;MEDEFAIDRIN FULL STOP;Po;0;L;;;;;N;;;;; 16E99;MEDEFAIDRIN SYMBOL AIVA;Po;0;L;;;;;N;;;;; 16E9A;MEDEFAIDRIN EXCLAMATION OH;Po;0;L;;;;;N;;;;; +16EA0;BERIA ERFE CAPITAL LETTER ARKAB;Lu;0;L;;;;;N;;;;16EBB; +16EA1;BERIA ERFE CAPITAL LETTER BASIGNA;Lu;0;L;;;;;N;;;;16EBC; +16EA2;BERIA ERFE CAPITAL LETTER DARBAI;Lu;0;L;;;;;N;;;;16EBD; +16EA3;BERIA ERFE CAPITAL LETTER EH;Lu;0;L;;;;;N;;;;16EBE; +16EA4;BERIA ERFE CAPITAL LETTER FITKO;Lu;0;L;;;;;N;;;;16EBF; +16EA5;BERIA ERFE CAPITAL LETTER GOWAY;Lu;0;L;;;;;N;;;;16EC0; +16EA6;BERIA ERFE CAPITAL LETTER HIRDEABO;Lu;0;L;;;;;N;;;;16EC1; +16EA7;BERIA ERFE CAPITAL LETTER I;Lu;0;L;;;;;N;;;;16EC2; +16EA8;BERIA ERFE CAPITAL LETTER DJAI;Lu;0;L;;;;;N;;;;16EC3; +16EA9;BERIA ERFE CAPITAL LETTER KOBO;Lu;0;L;;;;;N;;;;16EC4; +16EAA;BERIA ERFE CAPITAL LETTER LAKKO;Lu;0;L;;;;;N;;;;16EC5; +16EAB;BERIA ERFE CAPITAL LETTER MERI;Lu;0;L;;;;;N;;;;16EC6; +16EAC;BERIA ERFE CAPITAL LETTER NINI;Lu;0;L;;;;;N;;;;16EC7; +16EAD;BERIA ERFE CAPITAL LETTER GNA;Lu;0;L;;;;;N;;;;16EC8; +16EAE;BERIA ERFE CAPITAL LETTER NGAY;Lu;0;L;;;;;N;;;;16EC9; +16EAF;BERIA ERFE CAPITAL LETTER OI;Lu;0;L;;;;;N;;;;16ECA; +16EB0;BERIA ERFE CAPITAL LETTER PI;Lu;0;L;;;;;N;;;;16ECB; +16EB1;BERIA ERFE CAPITAL LETTER ERIGO;Lu;0;L;;;;;N;;;;16ECC; +16EB2;BERIA ERFE CAPITAL LETTER ERIGO TAMURA;Lu;0;L;;;;;N;;;;16ECD; +16EB3;BERIA ERFE CAPITAL LETTER SERI;Lu;0;L;;;;;N;;;;16ECE; +16EB4;BERIA ERFE CAPITAL LETTER SHEP;Lu;0;L;;;;;N;;;;16ECF; +16EB5;BERIA ERFE CAPITAL LETTER TATASOUE;Lu;0;L;;;;;N;;;;16ED0; +16EB6;BERIA ERFE CAPITAL LETTER UI;Lu;0;L;;;;;N;;;;16ED1; +16EB7;BERIA ERFE CAPITAL LETTER WASSE;Lu;0;L;;;;;N;;;;16ED2; +16EB8;BERIA ERFE CAPITAL LETTER AY;Lu;0;L;;;;;N;;;;16ED3; +16EBB;BERIA ERFE SMALL LETTER ARKAB;Ll;0;L;;;;;N;;;16EA0;;16EA0 +16EBC;BERIA ERFE SMALL LETTER BASIGNA;Ll;0;L;;;;;N;;;16EA1;;16EA1 +16EBD;BERIA ERFE SMALL LETTER DARBAI;Ll;0;L;;;;;N;;;16EA2;;16EA2 +16EBE;BERIA ERFE SMALL LETTER EH;Ll;0;L;;;;;N;;;16EA3;;16EA3 +16EBF;BERIA ERFE SMALL LETTER FITKO;Ll;0;L;;;;;N;;;16EA4;;16EA4 +16EC0;BERIA ERFE SMALL LETTER GOWAY;Ll;0;L;;;;;N;;;16EA5;;16EA5 +16EC1;BERIA ERFE SMALL LETTER HIRDEABO;Ll;0;L;;;;;N;;;16EA6;;16EA6 +16EC2;BERIA ERFE SMALL LETTER I;Ll;0;L;;;;;N;;;16EA7;;16EA7 +16EC3;BERIA ERFE SMALL LETTER DJAI;Ll;0;L;;;;;N;;;16EA8;;16EA8 +16EC4;BERIA ERFE SMALL LETTER KOBO;Ll;0;L;;;;;N;;;16EA9;;16EA9 +16EC5;BERIA ERFE SMALL LETTER LAKKO;Ll;0;L;;;;;N;;;16EAA;;16EAA +16EC6;BERIA ERFE SMALL LETTER MERI;Ll;0;L;;;;;N;;;16EAB;;16EAB +16EC7;BERIA ERFE SMALL LETTER NINI;Ll;0;L;;;;;N;;;16EAC;;16EAC +16EC8;BERIA ERFE SMALL LETTER GNA;Ll;0;L;;;;;N;;;16EAD;;16EAD +16EC9;BERIA ERFE SMALL LETTER NGAY;Ll;0;L;;;;;N;;;16EAE;;16EAE +16ECA;BERIA ERFE SMALL LETTER OI;Ll;0;L;;;;;N;;;16EAF;;16EAF +16ECB;BERIA ERFE SMALL LETTER PI;Ll;0;L;;;;;N;;;16EB0;;16EB0 +16ECC;BERIA ERFE SMALL LETTER ERIGO;Ll;0;L;;;;;N;;;16EB1;;16EB1 +16ECD;BERIA ERFE SMALL LETTER ERIGO TAMURA;Ll;0;L;;;;;N;;;16EB2;;16EB2 +16ECE;BERIA ERFE SMALL LETTER SERI;Ll;0;L;;;;;N;;;16EB3;;16EB3 +16ECF;BERIA ERFE SMALL LETTER SHEP;Ll;0;L;;;;;N;;;16EB4;;16EB4 +16ED0;BERIA ERFE SMALL LETTER TATASOUE;Ll;0;L;;;;;N;;;16EB5;;16EB5 +16ED1;BERIA ERFE SMALL LETTER UI;Ll;0;L;;;;;N;;;16EB6;;16EB6 +16ED2;BERIA ERFE SMALL LETTER WASSE;Ll;0;L;;;;;N;;;16EB7;;16EB7 +16ED3;BERIA ERFE SMALL LETTER AY;Ll;0;L;;;;;N;;;16EB8;;16EB8 16F00;MIAO LETTER PA;Lo;0;L;;;;;N;;;;; 16F01;MIAO LETTER BA;Lo;0;L;;;;;N;;;;; 16F02;MIAO LETTER YI PA;Lo;0;L;;;;;N;;;;; @@ -30280,6 +30539,11 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16FE4;KHITAN SMALL SCRIPT FILLER;Mn;0;NSM;;;;;N;;;;; 16FF0;VIETNAMESE ALTERNATE READING MARK CA;Mc;6;L;;;;;N;;;;; 16FF1;VIETNAMESE ALTERNATE READING MARK NHAY;Mc;6;L;;;;;N;;;;; +16FF2;CHINESE SMALL SIMPLIFIED ER;Lm;0;L;;;;;N;;;;; +16FF3;CHINESE SMALL TRADITIONAL ER;Lm;0;L;;;;;N;;;;; +16FF4;YANGQIN SIGN SLOW ONE BEAT;Nl;0;L;;;;1;N;;;;; +16FF5;YANGQIN SIGN SLOW THREE HALF BEATS;Nl;0;L;;;;3/2;N;;;;; +16FF6;YANGQIN SIGN SLOW TWO BEATS;Nl;0;L;;;;2;N;;;;; 17000;;Lo;0;L;;;;;N;;;;; 187FF;;Lo;0;L;;;;;N;;;;; 18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 365aba541..6e52d34f5 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2024-10-18, 17:35:26 GMT +# Date: 2024-11-15, 15:58:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -141,8 +141,8 @@ 01C0..01C3 ; R # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..024F ; R # L& [140] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER Y WITH STROKE 0250..0293 ; R # Ll [68] LATIN SMALL LETTER TURNED A..LATIN SMALL LETTER EZH WITH CURL -0294 ; R # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; R # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; R # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; R # Ll [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; R # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; R # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; R # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -267,7 +267,7 @@ 0860..086A ; R # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; R # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; R # Sk ARABIC RAISED ROUND DOT -0889..088E ; R # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; R # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; R # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; R # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; R # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -326,6 +326,7 @@ 09FC ; R # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; R # Po BENGALI ABBREVIATION SIGN 09FE ; R # Mn BENGALI SANDHI MARK +09FF ; R # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; R # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; R # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; R # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -389,7 +390,7 @@ 0B47..0B48 ; R # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; R # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; R # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; R # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; R # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; R # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; R # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; R # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -437,7 +438,7 @@ 0C4A..0C4D ; R # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; R # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; R # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; R # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; R # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; R # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; R # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; R # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -463,7 +464,7 @@ 0CCA..0CCB ; R # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; R # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; R # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; R # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; R # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; R # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; R # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; R # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -741,7 +742,8 @@ 1AA8..1AAD ; R # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; R # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; R # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; R # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; R # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; R # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; R # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; R # Mc BALINESE SIGN BISAH 1B05..1B33 ; R # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -1093,7 +1095,7 @@ 2B4D..2B4F ; R # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW 2B50..2B59 ; U # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE 2B5A..2B73 ; R # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; R # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B76..2B96 ; R # So [33] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..EQUALS SIGN WITH INFINITY ABOVE 2B97 ; U # So SYMBOL FOR TYPE A ELECTRONICS 2B98..2BB7 ; R # So [32] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..RIBBON ARROW RIGHT DOWN 2BB8..2BD1 ; U # So [26] UPWARDS WHITE ARROW FROM BAR WITH HORIZONTAL BAR..UNCERTAINTY SIGN @@ -1357,11 +1359,8 @@ A788 ; R # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; R # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; R # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; R # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; R # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; R # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; R # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; R # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; R # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; R # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; R # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; R # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; R # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; R # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1522,13 +1521,15 @@ FB43..FB44 ; R # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETT FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED FB50..FBB1 ; R # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; R # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; R # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; R # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; R # Pe ORNATE LEFT PARENTHESIS FD3F ; R # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; R # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; R # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; R # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; R # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; R # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; R # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; R # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; R # Sc RIAL SIGN FDFD..FDFF ; R # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -1725,6 +1726,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1091F ; R # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; R # Po LYDIAN TRIANGULAR MARK +10940..1095C ; R # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..1099F ; U # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 109A0..109B7 ; R # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF @@ -1785,7 +1787,11 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; R # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; R # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; R # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; R # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; R # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -2060,6 +2066,12 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 11AB0..11ABF ; U # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; R # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; R # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; R # Mn SHARADA VOWEL SIGN OE +11B61 ; R # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; R # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; R # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; R # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; R # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; R # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; R # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; R # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -2104,6 +2116,10 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 11D97 ; R # Mn GUNJALA GONDI VIRAMA 11D98 ; R # Lo GUNJALA GONDI OM 11DA0..11DA9 ; R # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; R # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; R # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; R # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; R # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; R # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; R # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; R # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -2174,9 +2190,16 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16D6B..16D6C ; R # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; R # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; R # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; R # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; R # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; R # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; R # Mn CHISOI SIGN SISO +16DA0..16DA9 ; R # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; R # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; R # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; R # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; R # Lu [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; R # Ll [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; R # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; R # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; R # Lo MIAO LETTER NASALIZATION @@ -2189,7 +2212,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FE4 ; U # Mn KHITAN SMALL SCRIPT FILLER 16FE5..16FEF ; U # Cn [11] .. 16FF0..16FF1 ; U # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -16FF2..16FFF ; U # Cn [14] .. +16FF2..16FF3 ; U # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; U # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS +16FF7..16FFF ; U # Cn [9] .. 17000..187FF ; U # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index a863397dd..48351bad9 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ -# GraphemeBreakProperty-16.0.0.txt -# Date: 2024-05-31, 18:09:38 GMT +# GraphemeBreakProperty-17.0.0.txt +# Date: 2024-11-14, 19:48:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -145,7 +145,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0B3F ; Extend # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -243,7 +243,8 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG @@ -339,7 +340,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA @@ -430,6 +431,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Extend # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; Extend # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; Extend # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA @@ -458,6 +462,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Extend # Mn CHISOI SIGN ANUSVARA +16D9D ; Extend # Mn CHISOI SIGN SISO 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER @@ -495,7 +501,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2198 +# Total code points: 2236 # ================================================ @@ -646,6 +652,9 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA 11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU 11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA +11B61 ; SpacingMark # Mc SHARADA VOWEL SIGN OOE +11B65 ; SpacingMark # Mc SHARADA VOWEL SIGN SHORT O +11B67 ; SpacingMark # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA 11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA 11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA @@ -661,7 +670,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 1612A..1612C ; SpacingMark # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI -# Total code points: 378 +# Total code points: 381 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.html index 405d0078c..619182f0b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.html @@ -7,7 +7,7 @@

Grapheme_Cluster_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-09-12, 14:11:44 GMT

+

Date: 2024-10-30, 21:25:11 GMT

This page illustrates the application of the Grapheme_Cluster_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

After the heavy blue line in the table are additional rows, either with different sample characters or for sequences. Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of LVT and T shows ×, with the rule 8.0. Checking below the table, rule 8.0 is “( LVT | T) × T”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -39,7 +39,7 @@

Table

Other÷÷÷÷×÷÷×÷÷÷÷÷×÷÷÷÷××××

Rules

-

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule GB9a is given the number 9.1.
  4. Any “treat as” or “ignore” rules are handled as discussed in UAX #29, and thus reflected in a transformation of the rules usually not visible here. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. In some cases, the numbering and form of a rule is changed due to “treat as” rules.

For the original rules, see UAX #29.

+

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule GB9a is given the number 9.1.
  4. Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.

For the original rules, see UAX #29.

@@ -294,6 +294,14 @@

Sample Strings

◌्     + +
0.2sot ÷
0.3÷ eot
36 +     +◌ૻ   +◌્   +   +◌ૻ   +

diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.txt index d10c174b6..4e55634d7 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakTest.txt @@ -1,5 +1,5 @@ -# GraphemeBreakTest-16.0.0.txt -# Date: 2024-05-02, 15:02:48 GMT +# GraphemeBreakTest-17.0.0.txt +# Date: 2024-10-30, 21:25:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1115,7 +1115,8 @@ ÷ 0061 × 094D ÷ 0924 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 003F × 094D ÷ 0924 ÷ # ÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 0915 × 094D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0AB8 × 0AFB × 0ACD × 0AB8 × 0AFB ÷ # ÷ [0.2] GUJARATI LETTER SA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] GUJARATI SIGN SHADDA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] GUJARATI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] GUJARATI LETTER SA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] GUJARATI SIGN SHADDA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] # -# Lines: 1093 +# Lines: 1094 # # EOF diff --git a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html index 4cfb8f6d9..745622907 100644 --- a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html @@ -7,7 +7,7 @@

Line_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-09-12, 14:11:46 GMT

+

Date: 2024-10-30, 21:25:12 GMT

This page illustrates the application of the Line_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of H3 and JT shows ×, with the rule 26.03. Checking below the table, rule 26.03 is “JT | H3 × JT”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -67,9 +67,9 @@

Table

BA_NotEastAsian_NonEastAsianBA_Hyphen××××÷÷÷÷×÷×÷÷÷÷÷××××××÷÷×××÷××÷×÷×÷÷÷÷÷÷÷÷×÷××÷÷÷÷×××÷×××××××××× CP_NotEastAsian_CP30×××××÷÷÷×÷×÷÷÷÷÷××××××÷÷××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××÷×××××××××× OP_NotEastAsian_OP30×××××××××××××××××××××××××××××××××××××××××××××××××××××××××××××××× -CM1_NotEastAsian_CM×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× +CM1_NotEastAsian_CM×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× ZWJ_O_ZWJ_NotEastAsian_CM×××××××××××××××××××××××××××××××××××××××××××××××××××××××××××××××× -CM1_CM×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× +CM1_CM×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× AL_NotEastAsian_AL×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× AI_NotEastAsian_AL×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× XX_NotEastAsian_AL×××××÷÷÷×÷×÷××÷÷××××××××××××××××÷××÷÷÷÷÷÷÷×÷××÷÷÷÷×××××××××××××× @@ -79,7 +79,7 @@

Table

CJ_NS÷××××÷÷÷×÷×÷÷÷÷÷××××××÷÷×××÷××××÷×÷÷÷÷÷÷÷÷×÷××÷÷÷÷×××÷×××÷÷÷÷÷××

Rules

-

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ×”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule LB21a is given the number 21.1.
  4. Any “treat as” or “ignore” rules are handled as discussed in UAX #14, and thus reflected in a transformation of the rules usually not visible here. Where it does show up, an extra variable like CM+ may appear, and the rule may be recast. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. Where a rule has multiple parts (lines), each one is numbered using hundredths, such as 21.01) × BA, 21.02) × HY, ... In some cases, the numbering and form of a rule is changed due to “treat as” rules.

For the original rules, see UAX #14.

+

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ×”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule LB21a is given the number 21.1.
  4. Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. Where a rule has multiple parts (lines), each one is numbered using hundredths, such as 21.01) × BA, 21.02) × HY, ...

For the original rules, see UAX #14.

@@ -93,13 +93,12 @@

Rules

- + + - - - + @@ -229,7 +228,7 @@

Sample Strings

diff --git a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt index 472c419c5..cc03bb2fb 100644 --- a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt +++ b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt @@ -1,5 +1,5 @@ -# LineBreakTest-16.0.0.txt -# Date: 2024-07-05, 00:45:20 GMT +# LineBreakTest-17.0.0.txt +# Date: 2024-10-11, 18:57:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -40,7 +40,7 @@ × 23E9 × 0308 × 0020 × FE15 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 23E9 × 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 23E9 × 0020 ÷ 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 23E9 × 0308 × 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 23E9 × 0308 × 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 23E9 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 23E9 ÷ AC00 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 23E9 × 0020 ÷ AC00 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -144,11 +144,11 @@ × 23E9 × 0308 × 0020 × 0085 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 23E9 × 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 23E9 × 0020 ÷ 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 23E9 × 0308 × 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 23E9 × 0308 × 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 23E9 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 23E9 × 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 23E9 × 0020 ÷ 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 23E9 × 0308 × 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 23E9 × 0308 × 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 23E9 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 23E9 ÷ 00B4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 23E9 × 0020 ÷ 00B4 ÷ # × [0.3] BLACK RIGHT-POINTING DOUBLE TRIANGLE (AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -404,7 +404,7 @@ × 3000 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3000 × 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3000 × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 3000 × 0308 × 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 3000 × 0308 × 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3000 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3000 ÷ 00B4 ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 3000 × 0020 ÷ 00B4 ÷ # × [0.3] IDEOGRAPHIC SPACE (BA) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -552,7 +552,7 @@ × 232A × 0308 × 0020 × FE15 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 232A × 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 232A × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 232A × 0308 × 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 232A × 0308 × 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 232A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 232A ÷ AC00 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 232A × 0020 ÷ AC00 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -656,11 +656,11 @@ × 232A × 0308 × 0020 × 0085 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 232A × 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 232A × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 232A × 0308 × 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 232A × 0308 × 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 232A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 232A × 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 232A × 0020 ÷ 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 232A × 0308 × 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 232A × 0308 × 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 232A × 0308 × 0020 ÷ 00AB ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 232A ÷ 00B4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 232A × 0020 ÷ 00B4 ÷ # × [0.3] RIGHT-POINTING ANGLE BRACKET (CL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -808,7 +808,7 @@ × FE15 × 0308 × 0020 × FE15 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × FE15 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE15 × 0020 ÷ 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× FE15 × 0308 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× FE15 × 0308 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE15 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE15 ÷ AC00 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × FE15 × 0020 ÷ AC00 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -912,11 +912,11 @@ × FE15 × 0308 × 0020 × 0085 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × FE15 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE15 × 0020 ÷ 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× FE15 × 0308 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× FE15 × 0308 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE15 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE15 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE15 × 0020 ÷ 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× FE15 × 0308 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× FE15 × 0308 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE15 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE15 ÷ 00B4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × FE15 × 0020 ÷ 00B4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -1320,7 +1320,7 @@ × AC00 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × AC00 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC00 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× AC00 × 0308 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× AC00 × 0308 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC00 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC00 ÷ AC00 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × AC00 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -1424,11 +1424,11 @@ × AC00 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × AC00 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC00 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× AC00 × 0308 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× AC00 × 0308 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC00 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC00 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC00 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× AC00 × 0308 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× AC00 × 0308 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC00 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC00 ÷ 00B4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × AC00 × 0020 ÷ 00B4 ÷ # × [0.3] HANGUL SYLLABLE GA (H2) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -1576,7 +1576,7 @@ × AC01 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × AC01 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC01 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× AC01 × 0308 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× AC01 × 0308 × 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC01 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × AC01 ÷ AC00 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × AC01 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -1680,11 +1680,11 @@ × AC01 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × AC01 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC01 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× AC01 × 0308 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× AC01 × 0308 × 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC01 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × AC01 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC01 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× AC01 × 0308 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× AC01 × 0308 × 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC01 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × AC01 ÷ 00B4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × AC01 × 0020 ÷ 00B4 ÷ # × [0.3] HANGUL SYLLABLE GAG (H3) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -1832,7 +1832,7 @@ × 231A × 0308 × 0020 × FE15 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 231A × 16FE4 ÷ # × [0.3] WATCH (ID) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 231A × 0020 ÷ 16FE4 ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 231A × 0308 × 16FE4 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 231A × 0308 × 16FE4 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 231A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 231A ÷ AC00 ÷ # × [0.3] WATCH (ID) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 231A × 0020 ÷ AC00 ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -1936,11 +1936,11 @@ × 231A × 0308 × 0020 × 0085 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 231A × 00A0 ÷ # × [0.3] WATCH (ID) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 231A × 0020 ÷ 00A0 ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 231A × 0308 × 00A0 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 231A × 0308 × 00A0 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 231A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 231A × 00AB ÷ # × [0.3] WATCH (ID) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 231A × 0020 ÷ 00AB ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 231A × 0308 × 00AB ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 231A × 0308 × 00AB ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 231A × 0308 × 0020 ÷ 00AB ÷ # × [0.3] WATCH (ID) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 231A ÷ 00B4 ÷ # × [0.3] WATCH (ID) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 231A × 0020 ÷ 00B4 ÷ # × [0.3] WATCH (ID) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -2088,7 +2088,7 @@ × FE19 × 0308 × 0020 × FE15 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × FE19 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE19 × 0020 ÷ 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× FE19 × 0308 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× FE19 × 0308 × 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE19 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE19 ÷ AC00 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × FE19 × 0020 ÷ AC00 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -2192,11 +2192,11 @@ × FE19 × 0308 × 0020 × 0085 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × FE19 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE19 × 0020 ÷ 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× FE19 × 0308 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× FE19 × 0308 × 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE19 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE19 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE19 × 0020 ÷ 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× FE19 × 0308 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× FE19 × 0308 × 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE19 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE19 ÷ 00B4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × FE19 × 0020 ÷ 00B4 ÷ # × [0.3] PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS (IN) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -2344,7 +2344,7 @@ × 1100 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1100 × 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1100 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1100 × 0308 × 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1100 × 0308 × 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1100 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1100 × AC00 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [26.01] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1100 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -2448,11 +2448,11 @@ × 1100 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1100 × 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1100 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1100 × 0308 × 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1100 × 0308 × 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1100 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1100 × 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1100 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 1100 × 0308 × 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 1100 × 0308 × 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1100 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1100 ÷ 00B4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 1100 × 0020 ÷ 00B4 ÷ # × [0.3] HANGUL CHOSEONG KIYEOK (JL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -2600,7 +2600,7 @@ × 3005 × 0308 × 0020 × FE15 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 3005 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3005 × 0020 ÷ 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 3005 × 0308 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 3005 × 0308 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3005 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3005 ÷ AC00 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 3005 × 0020 ÷ AC00 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -2704,11 +2704,11 @@ × 3005 × 0308 × 0020 × 0085 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 3005 × 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3005 × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 3005 × 0308 × 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 3005 × 0308 × 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3005 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3005 × 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3005 × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 3005 × 0308 × 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 3005 × 0308 × 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3005 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3005 ÷ 00B4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 3005 × 0020 ÷ 00B4 ÷ # × [0.3] IDEOGRAPHIC ITERATION MARK (NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -2856,7 +2856,7 @@ × 2329 × 0308 × 0020 × FE15 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 2329 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2329 × 0020 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [7.01] SPACE (SP_NotEastAsian) × [14.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 2329 × 0308 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 2329 × 0308 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2329 × 0308 × 0020 × 16FE4 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [14.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2329 × AC00 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [14.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 2329 × 0020 × AC00 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [7.01] SPACE (SP_NotEastAsian) × [14.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -2960,7 +2960,7 @@ × 2329 × 0308 × 0020 × 0085 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 2329 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2329 × 0020 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [7.01] SPACE (SP_NotEastAsian) × [14.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 2329 × 0308 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 2329 × 0308 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2329 × 0308 × 0020 × 00A0 ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [14.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2329 × 00AB ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [14.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2329 × 0020 × 00AB ÷ # × [0.3] LEFT-POINTING ANGLE BRACKET (OP) × [7.01] SPACE (SP_NotEastAsian) × [14.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -3112,7 +3112,7 @@ × FE6A × 0308 × 0020 × FE15 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × FE6A × 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE6A × 0020 ÷ 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× FE6A × 0308 × 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× FE6A × 0308 × 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE6A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FE6A ÷ AC00 ÷ # × [0.3] SMALL PERCENT SIGN (PO) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × FE6A × 0020 ÷ AC00 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -3216,11 +3216,11 @@ × FE6A × 0308 × 0020 × 0085 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × FE6A × 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE6A × 0020 ÷ 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× FE6A × 0308 × 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× FE6A × 0308 × 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE6A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FE6A × 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE6A × 0020 ÷ 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× FE6A × 0308 × 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× FE6A × 0308 × 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE6A × 0308 × 0020 ÷ 00AB ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FE6A ÷ 00B4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × FE6A × 0020 ÷ 00B4 ÷ # × [0.3] SMALL PERCENT SIGN (PO) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -3368,7 +3368,7 @@ × 20A9 × 0308 × 0020 × FE15 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 20A9 × 16FE4 ÷ # × [0.3] WON SIGN (PR) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 20A9 × 0020 ÷ 16FE4 ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 20A9 × 0308 × 16FE4 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 20A9 × 0308 × 16FE4 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 20A9 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 20A9 × AC00 ÷ # × [0.3] WON SIGN (PR) × [27.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 20A9 × 0020 ÷ AC00 ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -3472,11 +3472,11 @@ × 20A9 × 0308 × 0020 × 0085 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 20A9 × 00A0 ÷ # × [0.3] WON SIGN (PR) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 20A9 × 0020 ÷ 00A0 ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 20A9 × 0308 × 00A0 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 20A9 × 0308 × 00A0 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 20A9 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 20A9 × 00AB ÷ # × [0.3] WON SIGN (PR) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 20A9 × 0020 ÷ 00AB ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 20A9 × 0308 × 00AB ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 20A9 × 0308 × 00AB ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 20A9 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] WON SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 20A9 ÷ 00B4 ÷ # × [0.3] WON SIGN (PR) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 20A9 × 0020 ÷ 00B4 ÷ # × [0.3] WON SIGN (PR) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -3624,7 +3624,7 @@ × 270A × 0308 × 0020 × FE15 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 270A × 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 270A × 0020 ÷ 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 270A × 0308 × 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 270A × 0308 × 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 270A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 270A ÷ AC00 ÷ # × [0.3] RAISED FIST (EB) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 270A × 0020 ÷ AC00 ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -3728,11 +3728,11 @@ × 270A × 0308 × 0020 × 0085 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 270A × 00A0 ÷ # × [0.3] RAISED FIST (EB) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 270A × 0020 ÷ 00A0 ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 270A × 0308 × 00A0 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 270A × 0308 × 00A0 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 270A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 270A × 00AB ÷ # × [0.3] RAISED FIST (EB) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 270A × 0020 ÷ 00AB ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 270A × 0308 × 00AB ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 270A × 0308 × 00AB ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 270A × 0308 × 0020 ÷ 00AB ÷ # × [0.3] RAISED FIST (EB) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 270A ÷ 00B4 ÷ # × [0.3] RAISED FIST (EB) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 270A × 0020 ÷ 00B4 ÷ # × [0.3] RAISED FIST (EB) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -3880,7 +3880,7 @@ × 1F3FB × 0308 × 0020 × FE15 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1F3FB × 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F3FB × 0020 ÷ 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1F3FB × 0308 × 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1F3FB × 0308 × 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F3FB × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F3FB ÷ AC00 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1F3FB × 0020 ÷ AC00 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -3984,11 +3984,11 @@ × 1F3FB × 0308 × 0020 × 0085 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1F3FB × 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F3FB × 0020 ÷ 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1F3FB × 0308 × 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1F3FB × 0308 × 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F3FB × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F3FB × 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1F3FB × 0020 ÷ 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 1F3FB × 0308 × 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 1F3FB × 0308 × 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1F3FB × 0308 × 0020 ÷ 00AB ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1F3FB ÷ 00B4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 1F3FB × 0020 ÷ 00B4 ÷ # × [0.3] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (EM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -4136,7 +4136,7 @@ × 000A ÷ 0308 × 0020 × FE15 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 000A ÷ 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000A ÷ 0020 ÷ 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 000A ÷ 0308 × 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 000A ÷ 0308 × 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000A ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000A ÷ AC00 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 000A ÷ 0020 ÷ AC00 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -4240,7 +4240,7 @@ × 000A ÷ 0308 × 0020 × 0085 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 000A ÷ 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000A ÷ 0020 ÷ 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 000A ÷ 0308 × 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 000A ÷ 0308 × 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000A ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000A ÷ 00AB ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 000A ÷ 0020 ÷ 00AB ÷ # × [0.3] (LF_NotEastAsian) ÷ [5.03] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -4392,7 +4392,7 @@ × 000B ÷ 0308 × 0020 × FE15 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 000B ÷ 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000B ÷ 0020 ÷ 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 000B ÷ 0308 × 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 000B ÷ 0308 × 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000B ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000B ÷ AC00 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 000B ÷ 0020 ÷ AC00 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -4496,7 +4496,7 @@ × 000B ÷ 0308 × 0020 × 0085 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 000B ÷ 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000B ÷ 0020 ÷ 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 000B ÷ 0308 × 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 000B ÷ 0308 × 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000B ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000B ÷ 00AB ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 000B ÷ 0020 ÷ 00AB ÷ # × [0.3] (BK_NotEastAsian) ÷ [4.0] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -4648,7 +4648,7 @@ × 000D ÷ 0308 × 0020 × FE15 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 000D ÷ 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000D ÷ 0020 ÷ 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 000D ÷ 0308 × 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 000D ÷ 0308 × 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000D ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 000D ÷ AC00 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 000D ÷ 0020 ÷ AC00 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -4752,7 +4752,7 @@ × 000D ÷ 0308 × 0020 × 0085 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 000D ÷ 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000D ÷ 0020 ÷ 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 000D ÷ 0308 × 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 000D ÷ 0308 × 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000D ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 000D ÷ 00AB ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 000D ÷ 0020 ÷ 00AB ÷ # × [0.3] (CR_NotEastAsian) ÷ [5.02] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -4904,7 +4904,7 @@ × 0020 ÷ 0308 × 0020 × FE15 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0020 ÷ 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0020 × 0020 ÷ 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0020 ÷ 0308 × 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0020 ÷ 0308 × 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0020 ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0020 ÷ AC00 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0020 × 0020 ÷ AC00 ÷ # × [0.3] SPACE (SP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -5008,7 +5008,7 @@ × 0020 ÷ 0308 × 0020 × 0085 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0020 ÷ 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0020 × 0020 ÷ 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0020 ÷ 0308 × 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0020 ÷ 0308 × 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0020 ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0020 ÷ 00AB ÷ # × [0.3] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0020 × 0020 ÷ 00AB ÷ # × [0.3] SPACE (SP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -5160,7 +5160,7 @@ × 0021 × 0308 × 0020 × FE15 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0021 × 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0021 × 0020 ÷ 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0021 × 0308 × 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0021 × 0308 × 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0021 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0021 ÷ AC00 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0021 × 0020 ÷ AC00 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -5264,7 +5264,7 @@ × 0021 × 0308 × 0020 × 0085 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0021 × 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0021 × 0020 ÷ 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0021 × 0308 × 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0021 × 0308 × 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0021 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0021 × 00AB ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0021 × 0020 ÷ 00AB ÷ # × [0.3] EXCLAMATION MARK (EX_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -5416,7 +5416,7 @@ × 0022 × 0308 × 0020 × FE15 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0022 × 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0022 × 0020 ÷ 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0022 × 0308 × 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0022 × 0308 × 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0022 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0022 × AC00 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [19.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0022 × 0020 ÷ AC00 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -5520,7 +5520,7 @@ × 0022 × 0308 × 0020 × 0085 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0022 × 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0022 × 0020 ÷ 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0022 × 0308 × 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0022 × 0308 × 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0022 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0022 × 00AB ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [19.02] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0022 × 0020 ÷ 00AB ÷ # × [0.3] QUOTATION MARK (QU_QUmPi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -5672,7 +5672,7 @@ × 0024 × 0308 × 0020 × FE15 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0024 × 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0024 × 0020 ÷ 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0024 × 0308 × 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0024 × 0308 × 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0024 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0024 × AC00 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [27.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0024 × 0020 ÷ AC00 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -5776,7 +5776,7 @@ × 0024 × 0308 × 0020 × 0085 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0024 × 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0024 × 0020 ÷ 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0024 × 0308 × 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0024 × 0308 × 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0024 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0024 × 00AB ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0024 × 0020 ÷ 00AB ÷ # × [0.3] DOLLAR SIGN (PR_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -5928,7 +5928,7 @@ × 0025 × 0308 × 0020 × FE15 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0025 × 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0025 × 0020 ÷ 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0025 × 0308 × 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0025 × 0308 × 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0025 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0025 ÷ AC00 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0025 × 0020 ÷ AC00 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -6032,7 +6032,7 @@ × 0025 × 0308 × 0020 × 0085 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0025 × 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0025 × 0020 ÷ 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0025 × 0308 × 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0025 × 0308 × 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0025 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0025 × 00AB ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0025 × 0020 ÷ 00AB ÷ # × [0.3] PERCENT SIGN (PO_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -6184,7 +6184,7 @@ × 002C × 0308 × 0020 × FE15 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 002C × 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002C × 0020 ÷ 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 002C × 0308 × 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 002C × 0308 × 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002C × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002C ÷ AC00 ÷ # × [0.3] COMMA (IS_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 002C × 0020 ÷ AC00 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -6288,7 +6288,7 @@ × 002C × 0308 × 0020 × 0085 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 002C × 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002C × 0020 ÷ 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 002C × 0308 × 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 002C × 0308 × 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002C × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] COMMA (IS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002C × 00AB ÷ # × [0.3] COMMA (IS_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 002C × 0020 ÷ 00AB ÷ # × [0.3] COMMA (IS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -6696,7 +6696,7 @@ × 002F × 0308 × 0020 × FE15 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 002F × 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002F × 0020 ÷ 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 002F × 0308 × 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 002F × 0308 × 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002F × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 002F ÷ AC00 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 002F × 0020 ÷ AC00 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -6800,7 +6800,7 @@ × 002F × 0308 × 0020 × 0085 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 002F × 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002F × 0020 ÷ 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 002F × 0308 × 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 002F × 0308 × 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002F × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 002F × 00AB ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 002F × 0020 ÷ 00AB ÷ # × [0.3] SOLIDUS (SY_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -6952,7 +6952,7 @@ × 0030 × 0308 × 0020 × FE15 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0030 × 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0030 × 0020 ÷ 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0030 × 0308 × 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0030 × 0308 × 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0030 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0030 ÷ AC00 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0030 × 0020 ÷ AC00 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -7056,7 +7056,7 @@ × 0030 × 0308 × 0020 × 0085 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0030 × 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0030 × 0020 ÷ 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0030 × 0308 × 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0030 × 0308 × 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0030 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0030 × 00AB ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0030 × 0020 ÷ 00AB ÷ # × [0.3] DIGIT ZERO (NU_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -7208,7 +7208,7 @@ × 007D × 0308 × 0020 × FE15 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 007D × 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 007D × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 007D × 0308 × 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 007D × 0308 × 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 007D × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 007D ÷ AC00 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 007D × 0020 ÷ AC00 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -7312,7 +7312,7 @@ × 007D × 0308 × 0020 × 0085 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 007D × 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 007D × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 007D × 0308 × 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 007D × 0308 × 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 007D × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 007D × 00AB ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 007D × 0020 ÷ 00AB ÷ # × [0.3] RIGHT CURLY BRACKET (CL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -7464,7 +7464,7 @@ × 0085 ÷ 0308 × 0020 × FE15 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0085 ÷ 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0085 ÷ 0020 ÷ 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0085 ÷ 0308 × 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0085 ÷ 0308 × 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0085 ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0085 ÷ AC00 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0085 ÷ 0020 ÷ AC00 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -7568,7 +7568,7 @@ × 0085 ÷ 0308 × 0020 × 0085 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0085 ÷ 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0085 ÷ 0020 ÷ 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0085 ÷ 0308 × 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0085 ÷ 0308 × 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0085 ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0085 ÷ 00AB ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0085 ÷ 0020 ÷ 00AB ÷ # × [0.3] (NL_NotEastAsian) ÷ [5.04] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -7976,7 +7976,7 @@ × 00AB × 0308 × 0020 × FE15 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 00AB × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00AB × 0020 × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) × [15.11] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 00AB × 0308 × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 00AB × 0308 × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00AB × 0308 × 0020 × 16FE4 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [15.11] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00AB × AC00 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [15.11] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 00AB × 0020 × AC00 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) × [15.11] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -8080,7 +8080,7 @@ × 00AB × 0308 × 0020 × 0085 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 00AB × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00AB × 0020 × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) × [15.11] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 00AB × 0308 × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 00AB × 0308 × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00AB × 0308 × 0020 × 00A0 ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [15.11] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00AB × 00AB ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [15.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 00AB × 0020 × 00AB ÷ # × [0.3] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) × [15.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -8232,7 +8232,7 @@ × 00B4 × 0308 × 0020 × FE15 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 00B4 × 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00B4 × 0020 ÷ 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 00B4 × 0308 × 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 00B4 × 0308 × 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00B4 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00B4 × AC00 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [21.04] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 00B4 × 0020 ÷ AC00 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -8336,7 +8336,7 @@ × 00B4 × 0308 × 0020 × 0085 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 00B4 × 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00B4 × 0020 ÷ 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 00B4 × 0308 × 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 00B4 × 0308 × 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00B4 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00B4 × 00AB ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 00B4 × 0020 ÷ 00AB ÷ # × [0.3] ACUTE ACCENT (BB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -8488,7 +8488,7 @@ × 00BB × 0308 × 0020 × FE15 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 00BB × 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00BB × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 00BB × 0308 × 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 00BB × 0308 × 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00BB × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00BB × AC00 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [19.13] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 00BB × 0020 ÷ AC00 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -8592,7 +8592,7 @@ × 00BB × 0308 × 0020 × 0085 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 00BB × 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00BB × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 00BB × 0308 × 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 00BB × 0308 × 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00BB × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00BB × 00AB ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 00BB × 0020 ÷ 00AB ÷ # × [0.3] RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pf_QUmPi_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -8744,7 +8744,7 @@ × 05D0 × 0308 × 0020 × FE15 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 05D0 × 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 05D0 × 0020 ÷ 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 05D0 × 0308 × 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 05D0 × 0308 × 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 05D0 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 05D0 ÷ AC00 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 05D0 × 0020 ÷ AC00 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -8848,7 +8848,7 @@ × 05D0 × 0308 × 0020 × 0085 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 05D0 × 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 05D0 × 0020 ÷ 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 05D0 × 0308 × 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 05D0 × 0308 × 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 05D0 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 05D0 × 00AB ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 05D0 × 0020 ÷ 00AB ÷ # × [0.3] HEBREW LETTER ALEF (HL_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -9000,7 +9000,7 @@ × 1160 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1160 × 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1160 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1160 × 0308 × 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1160 × 0308 × 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1160 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1160 ÷ AC00 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1160 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -9104,7 +9104,7 @@ × 1160 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1160 × 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1160 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1160 × 0308 × 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1160 × 0308 × 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1160 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1160 × 00AB ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1160 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL JUNGSEONG FILLER (JV_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -9256,7 +9256,7 @@ × 11A8 × 0308 × 0020 × FE15 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 11A8 × 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11A8 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 11A8 × 0308 × 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 11A8 × 0308 × 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11A8 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11A8 ÷ AC00 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 11A8 × 0020 ÷ AC00 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -9360,7 +9360,7 @@ × 11A8 × 0308 × 0020 × 0085 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 11A8 × 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11A8 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 11A8 × 0308 × 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 11A8 × 0308 × 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11A8 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11A8 × 00AB ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 11A8 × 0020 ÷ 00AB ÷ # × [0.3] HANGUL JONGSEONG KIYEOK (JT_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -9512,7 +9512,7 @@ × 1B05 × 0308 × 0020 × FE15 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1B05 × 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B05 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1B05 × 0308 × 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1B05 × 0308 × 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B05 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B05 ÷ AC00 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1B05 × 0020 ÷ AC00 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -9616,7 +9616,7 @@ × 1B05 × 0308 × 0020 × 0085 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1B05 × 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B05 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1B05 × 0308 × 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1B05 × 0308 × 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B05 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B05 × 00AB ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1B05 × 0020 ÷ 00AB ÷ # × [0.3] BALINESE LETTER AKARA (AK_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -9768,7 +9768,7 @@ × 1B44 × 0308 × 0020 × FE15 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1B44 × 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B44 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1B44 × 0308 × 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1B44 × 0308 × 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B44 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B44 ÷ AC00 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1B44 × 0020 ÷ AC00 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -9872,7 +9872,7 @@ × 1B44 × 0308 × 0020 × 0085 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1B44 × 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B44 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1B44 × 0308 × 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1B44 × 0308 × 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B44 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B44 × 00AB ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1B44 × 0020 ÷ 00AB ÷ # × [0.3] BALINESE ADEG ADEG (VI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -10024,7 +10024,7 @@ × 1B50 × 0308 × 0020 × FE15 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1B50 × 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B50 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1B50 × 0308 × 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1B50 × 0308 × 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B50 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B50 ÷ AC00 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1B50 × 0020 ÷ AC00 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -10128,7 +10128,7 @@ × 1B50 × 0308 × 0020 × 0085 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1B50 × 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B50 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1B50 × 0308 × 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1B50 × 0308 × 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B50 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B50 × 00AB ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1B50 × 0020 ÷ 00AB ÷ # × [0.3] BALINESE DIGIT ZERO (AS_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -10280,7 +10280,7 @@ × 1B5C × 0308 × 0020 × FE15 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1B5C × 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B5C × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1B5C × 0308 × 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1B5C × 0308 × 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B5C × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1B5C ÷ AC00 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1B5C × 0020 ÷ AC00 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -10384,7 +10384,7 @@ × 1B5C × 0308 × 0020 × 0085 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1B5C × 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B5C × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1B5C × 0308 × 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1B5C × 0308 × 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B5C × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1B5C × 00AB ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1B5C × 0020 ÷ 00AB ÷ # × [0.3] BALINESE WINDU (ID_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -10536,7 +10536,7 @@ × 1BF2 × 0308 × 0020 × FE15 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1BF2 × 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1BF2 × 0020 ÷ 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1BF2 × 0308 × 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1BF2 × 0308 × 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1BF2 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1BF2 ÷ AC00 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1BF2 × 0020 ÷ AC00 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -10640,7 +10640,7 @@ × 1BF2 × 0308 × 0020 × 0085 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1BF2 × 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1BF2 × 0020 ÷ 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1BF2 × 0308 × 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1BF2 × 0308 × 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1BF2 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1BF2 × 00AB ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1BF2 × 0020 ÷ 00AB ÷ # × [0.3] BATAK PANGOLAT (VF_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -10792,7 +10792,7 @@ × 200B ÷ 0308 × 0020 × FE15 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 200B ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200B × 0020 ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [8.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 200B ÷ 0308 × 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 200B ÷ 0308 × 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200B ÷ 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200B ÷ AC00 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 200B × 0020 ÷ AC00 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [8.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -10896,7 +10896,7 @@ × 200B ÷ 0308 × 0020 × 0085 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 200B ÷ 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200B × 0020 ÷ 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [8.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 200B ÷ 0308 × 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 200B ÷ 0308 × 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200B ÷ 0308 × 0020 ÷ 00A0 ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200B ÷ 00AB ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) ÷ [8.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 200B × 0020 ÷ 00AB ÷ # × [0.3] ZERO WIDTH SPACE (ZW_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [8.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -11048,7 +11048,7 @@ × 2014 × 0308 × 0020 × FE15 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 2014 × 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2014 × 0020 ÷ 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 2014 × 0308 × 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 2014 × 0308 × 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2014 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2014 ÷ AC00 ÷ # × [0.3] EM DASH (B2_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 2014 × 0020 ÷ AC00 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -11152,7 +11152,7 @@ × 2014 × 0308 × 0020 × 0085 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 2014 × 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2014 × 0020 ÷ 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 2014 × 0308 × 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 2014 × 0308 × 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2014 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2014 × 00AB ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2014 × 0020 ÷ 00AB ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -11304,7 +11304,7 @@ × 2024 × 0308 × 0020 × FE15 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 2024 × 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2024 × 0020 ÷ 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 2024 × 0308 × 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 2024 × 0308 × 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2024 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2024 ÷ AC00 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 2024 × 0020 ÷ AC00 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -11408,7 +11408,7 @@ × 2024 × 0308 × 0020 × 0085 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 2024 × 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2024 × 0020 ÷ 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 2024 × 0308 × 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 2024 × 0308 × 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2024 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2024 × 00AB ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2024 × 0020 ÷ 00AB ÷ # × [0.3] ONE DOT LEADER (IN_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -11816,7 +11816,7 @@ × 261D × 0308 × 0020 × FE15 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 261D × 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 261D × 0020 ÷ 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 261D × 0308 × 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 261D × 0308 × 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 261D × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 261D ÷ AC00 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 261D × 0020 ÷ AC00 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -11920,7 +11920,7 @@ × 261D × 0308 × 0020 × 0085 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 261D × 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 261D × 0020 ÷ 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 261D × 0308 × 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 261D × 0308 × 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 261D × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 261D × 00AB ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 261D × 0020 ÷ 00AB ÷ # × [0.3] WHITE UP POINTING INDEX (EB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -12072,7 +12072,7 @@ × FFFC × 0308 × 0020 × FE15 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × FFFC × 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FFFC × 0020 ÷ 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× FFFC × 0308 × 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× FFFC × 0308 × 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FFFC × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × FFFC ÷ AC00 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) ÷ [20.02] HANGUL SYLLABLE GA (H2) ÷ [0.3] × FFFC × 0020 ÷ AC00 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -12176,7 +12176,7 @@ × FFFC × 0308 × 0020 × 0085 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × FFFC × 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FFFC × 0020 ÷ 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× FFFC × 0308 × 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× FFFC × 0308 × 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FFFC × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × FFFC × 00AB ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × FFFC × 0020 ÷ 00AB ÷ # × [0.3] OBJECT REPLACEMENT CHARACTER (CB_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -12328,7 +12328,7 @@ × 11003 × 0308 × 0020 × FE15 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 11003 × 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11003 × 0020 ÷ 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 11003 × 0308 × 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 11003 × 0308 × 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11003 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 11003 ÷ AC00 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 11003 × 0020 ÷ AC00 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -12432,7 +12432,7 @@ × 11003 × 0308 × 0020 × 0085 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 11003 × 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11003 × 0020 ÷ 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 11003 × 0308 × 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 11003 × 0308 × 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11003 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 11003 × 00AB ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 11003 × 0020 ÷ 00AB ÷ # × [0.3] BRAHMI SIGN JIHVAMULIYA (AP_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -12584,7 +12584,7 @@ × 1F1E6 × 0308 × 0020 × FE15 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 1F1E6 × 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F1E6 × 0020 ÷ 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 1F1E6 × 0308 × 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 1F1E6 × 0308 × 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F1E6 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 1F1E6 ÷ AC00 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 1F1E6 × 0020 ÷ AC00 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -12688,7 +12688,7 @@ × 1F1E6 × 0308 × 0020 × 0085 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 1F1E6 × 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F1E6 × 0020 ÷ 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 1F1E6 × 0308 × 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 1F1E6 × 0308 × 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F1E6 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 1F1E6 × 00AB ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 1F1E6 × 0020 ÷ 00AB ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER A (RI_NotEastAsian) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -13352,7 +13352,7 @@ × 0029 × 0308 × 0020 × FE15 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0029 × 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0029 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0029 × 0308 × 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0029 × 0308 × 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0029 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0029 ÷ AC00 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0029 × 0020 ÷ AC00 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -13456,7 +13456,7 @@ × 0029 × 0308 × 0020 × 0085 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0029 × 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0029 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0029 × 0308 × 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0029 × 0308 × 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0029 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0029 × 00AB ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0029 × 0020 ÷ 00AB ÷ # × [0.3] RIGHT PARENTHESIS (CP_NotEastAsian_CP30) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -13608,7 +13608,7 @@ × 0028 × 0308 × 0020 × FE15 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0028 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0028 × 0020 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [7.01] SPACE (SP_NotEastAsian) × [14.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0028 × 0308 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0028 × 0308 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0028 × 0308 × 0020 × 16FE4 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [14.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0028 × AC00 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [14.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0028 × 0020 × AC00 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [7.01] SPACE (SP_NotEastAsian) × [14.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -13712,7 +13712,7 @@ × 0028 × 0308 × 0020 × 0085 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0028 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0028 × 0020 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [7.01] SPACE (SP_NotEastAsian) × [14.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0028 × 0308 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0028 × 0308 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0028 × 0308 × 0020 × 00A0 ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [14.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0028 × 00AB ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [14.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0028 × 0020 × 00AB ÷ # × [0.3] LEFT PARENTHESIS (OP_NotEastAsian_OP30) × [7.01] SPACE (SP_NotEastAsian) × [14.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -13862,9 +13862,9 @@ × 0001 × 0020 × FE15 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0001 × 0308 × FE15 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0001 × 0308 × 0020 × FE15 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] -× 0001 × 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0001 × 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0001 × 0020 ÷ 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0001 × 0308 × 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0001 × 0308 × 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0001 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0001 ÷ AC00 ÷ # × [0.3] (CM1_NotEastAsian_CM) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0001 × 0020 ÷ AC00 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -13966,9 +13966,9 @@ × 0001 × 0020 × 0085 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0001 × 0308 × 0085 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0001 × 0308 × 0020 × 0085 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] -× 0001 × 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0001 × 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0001 × 0020 ÷ 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0001 × 0308 × 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0001 × 0308 × 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0001 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (CM1_NotEastAsian_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0001 × 00AB ÷ # × [0.3] (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0001 × 0020 ÷ 00AB ÷ # × [0.3] (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -14120,7 +14120,7 @@ × 200D × 0308 × 0020 × FE15 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 200D × 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200D × 0020 ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 200D × 0308 × 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 200D × 0308 × 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200D × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 200D × AC00 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 200D × 0020 ÷ AC00 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -14224,7 +14224,7 @@ × 200D × 0308 × 0020 × 0085 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 200D × 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 0020 ÷ 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 200D × 0308 × 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 200D × 0308 × 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 00AB ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 200D × 0020 ÷ 00AB ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -14374,9 +14374,9 @@ × 302A × 0020 × FE15 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 302A × 0308 × FE15 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 302A × 0308 × 0020 × FE15 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] -× 302A × 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 302A × 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 302A × 0020 ÷ 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 302A × 0308 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 302A × 0308 × 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 302A × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 302A ÷ AC00 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 302A × 0020 ÷ AC00 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -14478,9 +14478,9 @@ × 302A × 0020 × 0085 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 302A × 0308 × 0085 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 302A × 0308 × 0020 × 0085 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] -× 302A × 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 302A × 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 302A × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 302A × 0308 × 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.3] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 302A × 0308 × 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 302A × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 302A × 00AB ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 302A × 0020 ÷ 00AB ÷ # × [0.3] IDEOGRAPHIC LEVEL TONE MARK (CM1_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -14632,7 +14632,7 @@ × 0023 × 0308 × 0020 × FE15 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0023 × 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0023 × 0020 ÷ 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0023 × 0308 × 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0023 × 0308 × 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0023 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0023 ÷ AC00 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0023 × 0020 ÷ AC00 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -14736,7 +14736,7 @@ × 0023 × 0308 × 0020 × 0085 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0023 × 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0023 × 0020 ÷ 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0023 × 0308 × 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0023 × 0308 × 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0023 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0023 × 00AB ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0023 × 0020 ÷ 00AB ÷ # × [0.3] NUMBER SIGN (AL_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -14888,7 +14888,7 @@ × 00A7 × 0308 × 0020 × FE15 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 00A7 × 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00A7 × 0020 ÷ 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 00A7 × 0308 × 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 00A7 × 0308 × 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00A7 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 00A7 ÷ AC00 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 00A7 × 0020 ÷ AC00 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -14992,7 +14992,7 @@ × 00A7 × 0308 × 0020 × 0085 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 00A7 × 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00A7 × 0020 ÷ 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 00A7 × 0308 × 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 00A7 × 0308 × 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00A7 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 00A7 × 00AB ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 00A7 × 0020 ÷ 00AB ÷ # × [0.3] SECTION SIGN (AI_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -15144,7 +15144,7 @@ × 50005 × 0308 × 0020 × FE15 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 50005 × 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 50005 × 0020 ÷ 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 50005 × 0308 × 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 50005 × 0308 × 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 50005 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 50005 ÷ AC00 ÷ # × [0.3] (XX_NotEastAsian_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 50005 × 0020 ÷ AC00 ÷ # × [0.3] (XX_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -15248,7 +15248,7 @@ × 50005 × 0308 × 0020 × 0085 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 50005 × 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 50005 × 0020 ÷ 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 50005 × 0308 × 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 50005 × 0308 × 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 50005 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] (XX_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 50005 × 00AB ÷ # × [0.3] (XX_NotEastAsian_AL) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 50005 × 0020 ÷ 00AB ÷ # × [0.3] (XX_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -15400,7 +15400,7 @@ × 0E01 × 0308 × 0020 × FE15 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 0E01 × 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0E01 × 0020 ÷ 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 0E01 × 0308 × 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 0E01 × 0308 × 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0E01 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 0E01 ÷ AC00 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 0E01 × 0020 ÷ AC00 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -15504,7 +15504,7 @@ × 0E01 × 0308 × 0020 × 0085 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 0E01 × 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0E01 × 0020 ÷ 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 0E01 × 0308 × 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 0E01 × 0308 × 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0E01 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 0E01 × 00AB ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 0E01 × 0020 ÷ 00AB ÷ # × [0.3] THAI CHARACTER KO KAI (SA_NotEastAsian_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -15656,7 +15656,7 @@ × 2757 × 0308 × 0020 × FE15 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 2757 × 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2757 × 0020 ÷ 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 2757 × 0308 × 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 2757 × 0308 × 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2757 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 2757 ÷ AC00 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 2757 × 0020 ÷ AC00 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -15760,11 +15760,11 @@ × 2757 × 0308 × 0020 × 0085 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 2757 × 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2757 × 0020 ÷ 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 2757 × 0308 × 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 2757 × 0308 × 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2757 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 2757 × 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2757 × 0020 ÷ 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 2757 × 0308 × 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 2757 × 0308 × 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2757 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 2757 ÷ 00B4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 2757 × 0020 ÷ 00B4 ÷ # × [0.3] HEAVY EXCLAMATION MARK SYMBOL (AI_AL) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -15912,7 +15912,7 @@ × 17D6 × 0308 × 0020 × FE15 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 17D6 × 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 17D6 × 0020 ÷ 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 17D6 × 0308 × 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 17D6 × 0308 × 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 17D6 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 17D6 ÷ AC00 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 17D6 × 0020 ÷ AC00 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -16016,7 +16016,7 @@ × 17D6 × 0308 × 0020 × 0085 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 17D6 × 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 17D6 × 0020 ÷ 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 17D6 × 0308 × 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 17D6 × 0308 × 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 17D6 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 17D6 × 00AB ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 17D6 × 0020 ÷ 00AB ÷ # × [0.3] KHMER SIGN CAMNUC PII KUUH (NS_NotEastAsian_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] @@ -16168,7 +16168,7 @@ × 3041 × 0308 × 0020 × FE15 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [13.01] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK (EX) ÷ [0.3] × 3041 × 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3041 × 0020 ÷ 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] -× 3041 × 0308 × 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] +× 3041 × 0308 × 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3041 × 0308 × 0020 ÷ 16FE4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] KHITAN SMALL SCRIPT FILLER (GL) ÷ [0.3] × 3041 ÷ AC00 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) ÷ [999.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] × 3041 × 0020 ÷ AC00 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] HANGUL SYLLABLE GA (H2) ÷ [0.3] @@ -16272,11 +16272,11 @@ × 3041 × 0308 × 0020 × 0085 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) × [6.0] (NL_NotEastAsian) ÷ [0.3] × 3041 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3041 × 0020 ÷ 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] -× 3041 × 0308 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 3041 × 0308 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3041 × 0308 × 0020 ÷ 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 3041 × 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3041 × 0020 ÷ 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] -× 3041 × 0308 × 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.1] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] +× 3041 × 0308 × 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [19.11] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3041 × 0308 × 0020 ÷ 00AB ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (QU_QU_Pi_QUmPf_NotEastAsian) ÷ [0.3] × 3041 ÷ 00B4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) ÷ [999.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] × 3041 × 0020 ÷ 00B4 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [7.01] SPACE (SP_NotEastAsian) ÷ [18.0] ACUTE ACCENT (BB_NotEastAsian) ÷ [0.3] @@ -16416,7 +16416,7 @@ × 200D × 261D ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] WHITE UP POINTING INDEX (EB_NotEastAsian) ÷ [0.3] × 3041 × 2060 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [11.01] WORD JOINER (WJ_NotEastAsian) ÷ [0.3] × 2060 × 3041 ÷ # × [0.3] WORD JOINER (WJ_NotEastAsian) × [11.02] HIRAGANA LETTER SMALL A (CJ_NS) ÷ [0.3] -× 3041 × 0308 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.2] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] +× 3041 × 0308 × 00A0 ÷ # × [0.3] HIRAGANA LETTER SMALL A (CJ_NS) × [9.0] COMBINING DIAERESIS (CM1_NotEastAsian_CM) × [12.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 00A0 ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] NO-BREAK SPACE (GL_NotEastAsian) ÷ [0.3] × 200D × 002F ÷ # × [0.3] ZERO WIDTH JOINER (ZWJ_O_ZWJ_NotEastAsian_CM) × [8.1] SOLIDUS (SY_NotEastAsian) ÷ [0.3] × 2014 × 2014 ÷ # × [0.3] EM DASH (B2_NotEastAsian) × [17.0] EM DASH (B2_NotEastAsian) ÷ [0.3] diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index b2dbf58a3..b42d7d2a2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-10-18, 17:35:24 GMT +# Date: 2024-11-15, 15:58:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -111,7 +111,7 @@ 0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -247,7 +247,8 @@ 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -373,7 +374,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -508,6 +509,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A97 ; Extend # Mc SOYOMBO SIGN VISARGA 11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Extend # Mn SHARADA VOWEL SIGN OE +11B61 ; Extend # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Extend # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Extend # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Extend # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Extend # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -549,6 +556,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Extend # Mn CHISOI SIGN ANUSVARA +16D9D ; Extend # Mn CHISOI SIGN SISO 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Extend # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -586,7 +595,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2601 +# Total code points: 2642 # ================================================ @@ -779,7 +788,7 @@ E0001 ; Format # Cf LANGUAGE TAG 024B ; Lower # L& LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Lower # L& LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Lower # L& [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Lower # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Lower # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; Lower # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Lower # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Lower # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP @@ -1254,13 +1263,14 @@ A7C3 ; Lower # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Lower # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Lower # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Lower # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Lower # L& LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Lower # L& LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Lower # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lower # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lower # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lower # L& LATIN SMALL LETTER SIGMOID S A7DB ; Lower # L& LATIN SMALL LETTER LAMBDA -A7F2..A7F4 ; Lower # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lower # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F6 ; Lower # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lower # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lower # L& LATIN LETTER SMALL CAPITAL TURNED M @@ -1286,6 +1296,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10D70..10D85 ; Lower # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lower # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Lower # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Lower # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1D41A..1D433 ; Lower # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z 1D44E..1D454 ; Lower # L& [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G 1D456..1D467 ; Lower # L& [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z @@ -1320,7 +1331,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2522 +# Total code points: 2548 # ================================================ @@ -1929,7 +1940,10 @@ A7C2 ; Upper # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Upper # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Upper # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Upper # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Upper # L& LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Upper # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Upper # L& LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Upper # L& LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Upper # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Upper # L& LATIN CAPITAL LETTER SIGMOID S A7DA ; Upper # L& LATIN CAPITAL LETTER LAMBDA @@ -1946,6 +1960,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 10D50..10D65 ; Upper # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Upper # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Upper # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Upper # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1D400..1D419 ; Upper # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z 1D434..1D44D ; Upper # L& [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z 1D468..1D481 ; Upper # L& [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z @@ -1982,13 +1997,13 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1963 +# Total code points: 1991 # ================================================ 01BB ; OLetter # Lo LATIN LETTER TWO WITH STROKE 01C0..01C3 ; OLetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK -0294 ; OLetter # Lo LATIN LETTER GLOTTAL STOP +0294..0295 ; OLetter # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE 02B9..02BF ; OLetter # Lm [7] MODIFIER LETTER PRIME..MODIFIER LETTER LEFT HALF RING 02C6..02D1 ; OLetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02EC ; OLetter # Lm MODIFIER LETTER VOICING @@ -2022,7 +2037,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; OLetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; OLetter # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; OLetter # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; OLetter # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; OLetter # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; OLetter # Lm ARABIC SMALL FARSI YEH 0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -2043,6 +2058,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 09DF..09E1 ; OLetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; OLetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; OLetter # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; OLetter # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; OLetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; OLetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; OLetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -2090,7 +2106,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0C2A..0C39 ; OLetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; OLetter # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; OLetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2099,7 +2115,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0CAA..0CB3 ; OLetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; OLetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; OLetter # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; OLetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; OLetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; OLetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; OLetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; OLetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2366,6 +2382,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; OLetter # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; OLetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; OLetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; OLetter # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; OLetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; OLetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; OLetter # Lo KHAROSHTHI LETTER A @@ -2389,6 +2406,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; OLetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; OLetter # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2480,6 +2499,9 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D67..11D68 ; OLetter # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; OLetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; OLetter # Lo GUNJALA GONDI OM +11DB0..11DD8 ; OLetter # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; OLetter # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; OLetter # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; OLetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; OLetter # Lo KAWI SIGN REPHA 11F04..11F10 ; OLetter # Lo [13] KAWI LETTER A..KAWI LETTER O @@ -2505,11 +2527,15 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16D40..16D42 ; OLetter # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; OLetter # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; OLetter # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; OLetter # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; OLetter # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16F00..16F4A ; OLetter # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; OLetter # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; OLetter # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; OLetter # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; OLetter # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; OLetter # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -2585,7 +2611,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136940 +# Total code points: 137054 # ================================================ @@ -2654,12 +2680,14 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; Numeric # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Numeric # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; Numeric # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Numeric # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; Numeric # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -2669,7 +2697,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 775 +# Total code points: 795 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakTest.html index a698e956c..a851a3035 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakTest.html @@ -7,7 +7,7 @@

Sentence_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-09-12, 14:12:16 GMT

+

Date: 2024-10-14, 12:07:04 GMT

This page illustrates the application of the Sentence_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of ATerm and Close shows ×, with the rule 9.0. Checking below the table, rule 9.0 is “SATerm Close* × ( Close | Sp | ParaSep )”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -30,7 +30,7 @@

Table

0.2sot ×
0.3÷ eot
7.02× ZW
8.0ZW SP* ÷
8.1ZWJ_O ×
9.0[^ SP BK CR LF NL ZW] × CM
9.0(?<X>[^BK CR LF NL SP ZW]) ( CM | ZWJ )* {X}
10.0( CM | ZWJ ) A
11.01× WJ
11.02WJ ×
12.0GL ×
12.1[^ SP BA HY CM] × GL
12.2[^ BA HY CM] CM+ × GL
12.3^ CM+ × GL
12.1[^ SP BA HY] × GL
13.01× EX
13.02× CL
13.03× CP
11      -◌̈   +◌̈      
Extend_FE×××××××××××××××

Rules

-

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule SB8a is given the number 8.1.
  4. Any “treat as” or “ignore” rules are handled as discussed in UAX #29, and thus reflected in a transformation of the rules usually not visible here. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. In some cases, the numbering and form of a rule is changed due to “treat as” rules.

For the original rules and the macro values they use, see UAX #29.

+

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule SB8a is given the number 8.1.
  4. Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.

For the original rules and the macro values they use, see UAX #29.

diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e557c3d0d..668852b57 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ -# WordBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:36 GMT +# WordBreakProperty-17.0.0.txt +# Date: 2024-11-14, 22:51:52 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -147,7 +147,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; Extend # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Extend # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; Extend # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Extend # Mn TAMIL SIGN ANUSVARA @@ -283,7 +283,8 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; Extend # Mc BALINESE SIGN BISAH 1B34 ; Extend # Mn BALINESE SIGN REREKAN @@ -409,7 +410,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Extend # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -544,6 +545,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A97 ; Extend # Mc SOYOMBO SIGN VISARGA 11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Extend # Mn SHARADA VOWEL SIGN OE +11B61 ; Extend # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; Extend # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; Extend # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; Extend # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; Extend # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; Extend # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -585,6 +592,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Extend # Mn CHISOI SIGN ANUSVARA +16D9D ; Extend # Mn CHISOI SIGN SISO 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Extend # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -623,7 +632,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2605 +# Total code points: 2646 # ================================================ @@ -687,8 +696,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; ALetter # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; ALetter # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; ALetter # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -745,7 +754,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; ALetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ALetter # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; ALetter # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; ALetter # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; ALetter # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ALetter # Lm ARABIC SMALL FARSI YEH 0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -766,6 +775,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; ALetter # Lo BENGALI LETTER VEDIC ANUSVARA +09FF ; ALetter # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -813,7 +823,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0C2A..0C39 ; ALetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ALetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ALetter # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ALetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; ALetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -822,7 +832,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; ALetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ALetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; ALetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; ALetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -1002,11 +1012,8 @@ A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; ALetter # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; ALetter # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; ALetter # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; ALetter # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; ALetter # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; ALetter # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; ALetter # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ALetter # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1115,6 +1122,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 108F4..108F5 ; ALetter # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; ALetter # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; ALetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; ALetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ALetter # Lo KHAROSHTHI LETTER A @@ -1142,6 +1150,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ALetter # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; ALetter # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1232,6 +1242,9 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11D67..11D68 ; ALetter # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; ALetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; ALetter # Lo GUNJALA GONDI OM +11DB0..11DD8 ; ALetter # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; ALetter # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; ALetter # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; ALetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; ALetter # Lo KAWI SIGN REPHA 11F04..11F10 ; ALetter # Lo [13] KAWI LETTER A..KAWI LETTER O @@ -1257,7 +1270,11 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16D40..16D42 ; ALetter # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D43..16D6A ; ALetter # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; ALetter # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D80..16D97 ; ALetter # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; ALetter # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; ALetter # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EA0..16EB8 ; ALetter # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; ALetter # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; ALetter # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; ALetter # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -1355,7 +1372,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33954 # ================================================ @@ -1467,12 +1484,14 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; Numeric # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11F50..11F59 ; Numeric # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Numeric # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Numeric # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Numeric # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Numeric # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; Numeric # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Numeric # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; Numeric # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -1482,7 +1501,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 774 +# Total code points: 794 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/WordBreakTest.html index 52a647c4a..6c25af5cf 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakTest.html @@ -7,7 +7,7 @@

Word_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-09-12, 14:12:18 GMT

+

Date: 2024-10-14, 12:07:11 GMT

This page illustrates the application of the Word_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

After the heavy blue line in the table are additional rows, either with different sample characters or for sequences, such as “ALetter MidLetter”. Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of ExtendNumLet and ALetter shows ×, with the rule 13.2. Checking below the table, rule 13.2 is “ExtendNumLet × (AHLetter | Numeric | Katakana)”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -45,7 +45,7 @@

Table

0.2sot ÷
0.3÷ eot
Numeric MidNumLet Format_FE÷÷÷÷÷÷÷÷÷×÷÷÷÷÷÷÷×××

Rules

-

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule WB13a is given the number 13.1.
  4. Any “treat as” or “ignore” rules are handled as discussed in UAX #29, and thus reflected in a transformation of the rules usually not visible here. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  5. In some cases, the numbering and form of a rule is changed due to “treat as” rules.

For the original rules and the macro values they use, see UAX #29.

+

This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:

  1. The rules are cast into a form that is more like regular expressions.
  2. The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.
  3. The rules are given decimal numbers using tenths, and are written without prefix. For example, rule WB13a is given the number 13.1.
  4. Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.

For the original rules and the macro values they use, see UAX #29.

diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index d4f69462e..671fe74ae 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-10-18, 17:34:07 GMT +# Date: 2024-11-15, 15:57:39 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -67,6 +67,7 @@ # 108E0..108FF Hatran # 10900..1091F Phoenician # 10920..1093F Lydian +# 10940..1095C Sidetic # 10980..1099F Meroitic_Hieroglyphs # 109A0..109FF Meroitic_Cursive # 10A00..10A5F Kharoshthi @@ -138,8 +139,8 @@ 01BC..01BF ; L # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; L # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; L # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; L # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; L # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; L # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; L # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02B8 ; L # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02BB..02C1 ; L # Lm [7] MODIFIER LETTER TURNED COMMA..MODIFIER LETTER REVERSED GLOTTAL STOP 02D0..02D1 ; L # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON @@ -198,6 +199,7 @@ 09FA ; L # So BENGALI ISSHAR 09FC ; L # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; L # Po BENGALI ABBREVIATION SIGN +09FF ; L # Lo BENGALI LETTER SANSKRIT BA 0A03 ; L # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; L # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; L # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI @@ -273,7 +275,7 @@ 0C3D ; L # Lo TELUGU SIGN AVAGRAHA 0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; L # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; L # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE 0C77 ; L # Po TELUGU SIGN SIDDHAM @@ -294,7 +296,7 @@ 0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI 0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; L # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; L # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -662,11 +664,8 @@ A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; L # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; L # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; L # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; L # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; L # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1011,6 +1010,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; L # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B61 ; L # Mc SHARADA VOWEL SIGN OOE +11B65 ; L # Mc SHARADA VOWEL SIGN SHORT O +11B67 ; L # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; L # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; L # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; L # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -1041,6 +1043,10 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11D96 ; L # Mc GUNJALA GONDI SIGN VISARGA 11D98 ; L # Lo GUNJALA GONDI OM 11DA0..11DA9 ; L # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; L # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; L # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; L # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; L # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; L # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF5..11EF6 ; L # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O 11EF7..11EF8 ; L # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION @@ -1093,9 +1099,14 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16D6B..16D6C ; L # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; L # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; L # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; L # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; L # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16DA0..16DA9 ; L # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; L # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; L # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; L # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; L # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; L # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; L # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; L # Lo MIAO LETTER NASALIZATION 16F51..16F87 ; L # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI @@ -1103,6 +1114,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; L # Lm OLD CHINESE ITERATION MARK 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; L # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; L # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -1214,8 +1227,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815319 code points not listed here. -# Total code points: 1095513 +# The above property value applies to 815124 code points not listed here. +# Total code points: 1095476 # ================================================ @@ -1269,6 +1282,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; R # Po LYDIAN TRIANGULAR MARK +10940..1095C ; R # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1327,7 +1341,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# The above property value applies to 2087 code points not listed here. +# The above property value applies to 2058 code points not listed here. # Total code points: 3631 # ================================================ @@ -1731,8 +1745,7 @@ FF1A ; CS # Po FULLWIDTH COLON 2B45..2B46 ; ON # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; ON # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; ON # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; ON # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; ON # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; ON # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2CE5..2CEA ; ON # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CF9..2CFC ; ON # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; ON # No COPTIC FRACTION ONE HALF @@ -1846,10 +1859,12 @@ A788 ; ON # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A828..A82B ; ON # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 A874..A877 ; ON # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD AB6A..AB6B ; ON # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +FBC3..FBD2 ; ON # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FD3E ; ON # Pe ORNATE LEFT PARENTHESIS FD3F ; ON # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; ON # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH -FDCF ; ON # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FD90..FD91 ; ON # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCF ; ON # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDFD..FDFF ; ON # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FE10..FE16 ; ON # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE17 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET @@ -1936,6 +1951,8 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1091F ; ON # Po PHOENICIAN WORD SEPARATOR 10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10D6E ; ON # Pd GARAY HYPHEN +10ED0 ; ON # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; ON # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND 11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -1997,7 +2014,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 6751 +# Total code points: 6786 # ================================================ @@ -2107,7 +2124,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 0B3F ; NSM # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; NSM # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; NSM # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; NSM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; NSM # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; NSM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; NSM # Mn TAMIL SIGN ANUSVARA 0BC0 ; NSM # Mn TAMIL VOWEL SIGN II @@ -2189,7 +2206,8 @@ FFFFE..FFFFF ; BN # Cn [2] .. 1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; NSM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; NSM # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; NSM # Mn BALINESE SIGN REREKAN 1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2274,7 +2292,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; NSM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; NSM # Mn BRAHMI SIGN ANUSVARA @@ -2350,6 +2368,9 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11A59..11A5B ; NSM # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; NSM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; NSM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; NSM # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; NSM # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; NSM # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; NSM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; NSM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C92..11CA7 ; NSM # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA @@ -2376,6 +2397,8 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1612D..1612F ; NSM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; NSM # Mn CHISOI SIGN ANUSVARA +16D9D ; NSM # Mn CHISOI SIGN SISO 16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; NSM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; NSM # Mn KHITAN SMALL SCRIPT FILLER @@ -2408,7 +2431,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2028 +# Total code points: 2066 # ================================================ @@ -2442,7 +2465,7 @@ E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; AL # Lm ARABIC SMALL FARSI YEH FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM @@ -2456,6 +2479,8 @@ FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISO FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2501,8 +2526,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 293 code points not listed here. -# Total code points: 1767 +# The above property value applies to 253 code points not listed here. +# Total code points: 1731 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3884406ef..17176ce34 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-10-18, 17:34:11 GMT +# Date: 2024-11-15, 15:57:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -83,8 +83,8 @@ 01BC..01BF ; 0 # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; 0 # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; 0 # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; 0 # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; 0 # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; 0 # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; 0 # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; 0 # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; 0 # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6..02D1 ; 0 # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON @@ -182,7 +182,7 @@ 0860..086A ; 0 # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; 0 # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; 0 # Sk ARABIC RAISED ROUND DOT -0889..088E ; 0 # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; 0 # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; 0 # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 08A0..08C8 ; 0 # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; 0 # Lm ARABIC SMALL FARSI YEH @@ -232,6 +232,7 @@ 09FB ; 0 # Sc BENGALI GANDA MARK 09FC ; 0 # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; 0 # Po BENGALI ABBREVIATION SIGN +09FF ; 0 # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; 0 # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; 0 # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; 0 # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -290,7 +291,7 @@ 0B41..0B44 ; 0 # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B47..0B48 ; 0 # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; 0 # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU -0B55..0B56 ; 0 # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; 0 # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; 0 # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; 0 # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; 0 # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -335,7 +336,7 @@ 0C46..0C48 ; 0 # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI 0C4A..0C4C ; 0 # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU 0C58..0C5A ; 0 # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; 0 # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; 0 # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; 0 # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; 0 # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; 0 # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -360,7 +361,7 @@ 0CCA..0CCB ; 0 # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC ; 0 # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; 0 # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; 0 # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; 0 # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; 0 # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; 0 # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; 0 # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -911,8 +912,7 @@ 2B45..2B46 ; 0 # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B47..2B4C ; 0 # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B73 ; 0 # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; 0 # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; 0 # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; 0 # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; 0 # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; 0 # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; 0 # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -1093,11 +1093,8 @@ A788 ; 0 # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; 0 # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; 0 # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; 0 # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; 0 # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; 0 # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; 0 # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; 0 # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; 0 # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; 0 # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; 0 # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; 0 # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; 0 # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; 0 # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1234,13 +1231,15 @@ FB40..FB41 ; 0 # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH FB43..FB44 ; 0 # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; 0 # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; 0 # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; 0 # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; 0 # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; 0 # Pe ORNATE LEFT PARENTHESIS FD3F ; 0 # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; 0 # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; 0 # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; 0 # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; 0 # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; 0 # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; 0 # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; 0 # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; 0 # Sc RIAL SIGN FDFD..FDFF ; 0 # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -1421,6 +1420,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1091F ; 0 # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; 0 # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; 0 # Po LYDIAN TRIANGULAR MARK +10940..1095C ; 0 # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; 0 # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; 0 # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; 0 # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1475,6 +1475,10 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EAD ; 0 # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; 0 # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; 0 # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; 0 # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; 0 # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; 0 # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -1720,6 +1724,12 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 11A9E..11AA2 ; 0 # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; 0 # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; 0 # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; 0 # Mn SHARADA VOWEL SIGN OE +11B61 ; 0 # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; 0 # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; 0 # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; 0 # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; 0 # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; 0 # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; 0 # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; 0 # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -1763,6 +1773,10 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 11D96 ; 0 # Mc GUNJALA GONDI SIGN VISARGA 11D98 ; 0 # Lo GUNJALA GONDI OM 11DA0..11DA9 ; 0 # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; 0 # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; 0 # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; 0 # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; 0 # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; 0 # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; 0 # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; 0 # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -1826,9 +1840,15 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16D6B..16D6C ; 0 # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; 0 # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; 0 # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; 0 # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; 0 # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; 0 # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16DA0..16DA9 ; 0 # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; 0 # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; 0 # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; 0 # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; 0 # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; 0 # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; 0 # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; 0 # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; 0 # Lo MIAO LETTER NASALIZATION @@ -1839,6 +1859,8 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE2 ; 0 # Po OLD CHINESE HOOK MARK 16FE3 ; 0 # Lm OLD CHINESE ITERATION MARK 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER +16FF2..16FF3 ; 0 # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; 0 # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF..18D1E ; 0 # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; 0 # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -2060,8 +2082,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821549 code points not listed here. -# Total code points: 1113178 +# The above property value applies to 821285 code points not listed here. +# Total code points: 1113148 # ================================================ @@ -2198,8 +2220,9 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK 11F41 ; 9 # Mc KAWI SIGN KILLER 11F42 ; 9 # Mn KAWI CONJOINER 1612F ; 9 # Mn GURUNG KHEMA SIGN THOLHOMA +16D9D ; 9 # Mn CHISOI SIGN SISO -# Total code points: 69 +# Total code points: 70 # ================================================ @@ -2595,6 +2618,8 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1ABF..1AC0 ; 220 # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW 1AC3..1AC4 ; 220 # Mn [2] COMBINING LEFT PARENTHESIS BELOW LEFT..COMBINING RIGHT PARENTHESIS BELOW RIGHT 1ACA ; 220 # Mn COMBINING DOUBLE PLUS SIGN BELOW +1ADD ; 220 # Mn COMBINING DOT-AND-RING BELOW +1AE6 ; 220 # Mn COMBINING DOUBLE ARCH BELOW 1B6C ; 220 # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP 1CD5..1CD9 ; 220 # Mn [5] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER 1CDC..1CDF ; 220 # Mn [4] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE THREE DOTS BELOW @@ -2615,6 +2640,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 10A0D ; 220 # Mn KHAROSHTHI SIGN DOUBLE RING BELOW 10A3A ; 220 # Mn KHAROSHTHI SIGN DOT BELOW 10AE6 ; 220 # Mn MANICHAEAN ABBREVIATION MARK BELOW +10EFA..10EFB ; 220 # Mn [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON 10EFD..10EFF ; 220 # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F47 ; 220 # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW 10F4B ; 220 # Mn SOGDIAN COMBINING CURVE BELOW @@ -2627,7 +2653,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 182 +# Total code points: 186 # ================================================ @@ -2736,7 +2762,9 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1ABB..1ABC ; 230 # Mn [2] COMBINING PARENTHESES ABOVE..COMBINING DOUBLE PARENTHESES ABOVE 1AC1..1AC2 ; 230 # Mn [2] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING RIGHT PARENTHESIS ABOVE RIGHT 1AC5..1AC9 ; 230 # Mn [5] COMBINING SQUARE BRACKETS ABOVE..COMBINING DOUBLE PLUS SIGN ABOVE -1ACB..1ACE ; 230 # Mn [4] COMBINING TRIPLE ACUTE ACCENT..COMBINING LATIN SMALL LETTER INSULAR T +1ACB..1ADC ; 230 # Mn [18] COMBINING TRIPLE ACUTE ACCENT..COMBINING DIAERESIS WITH RAISED LEFT DOT +1AE0..1AE5 ; 230 # Mn [6] COMBINING LEFT TACK ABOVE..COMBINING SEAGULL ABOVE +1AE7..1AEA ; 230 # Mn [4] COMBINING DOUBLE ARCH ABOVE..COMBINING UPWARDS ARROW ABOVE 1B6B ; 230 # Mn BALINESE MUSICAL SYMBOL COMBINING TEGEH 1B6D..1B73 ; 230 # Mn [7] BALINESE MUSICAL SYMBOL COMBINING KEMPUL..BALINESE MUSICAL SYMBOL COMBINING GONG 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -2803,7 +2831,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 517 +# Total code points: 541 # ================================================ @@ -2835,9 +2863,10 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 035D..035E ; 234 # Mn [2] COMBINING DOUBLE BREVE..COMBINING DOUBLE MACRON 0360..0361 ; 234 # Mn [2] COMBINING DOUBLE TILDE..COMBINING DOUBLE INVERTED BREVE +1AEB ; 234 # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1DCD ; 234 # Mn COMBINING DOUBLE CIRCUMFLEX ABOVE -# Total code points: 5 +# Total code points: 6 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index a825479ac..1b8d1ae99 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ -# DerivedDecompositionType-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedDecompositionType-17.0.0.txt +# Date: 2024-11-13, 22:18:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -873,7 +873,7 @@ FEFB ; Isolated # Lo ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM 3196..319F ; Super # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK A69C..A69D ; Super # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN A770 ; Super # Lm MODIFIER LETTER US -A7F2..A7F4 ; Super # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Super # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Super # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AB5C..AB5F ; Super # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W @@ -884,7 +884,7 @@ AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W 1E06B..1E06D ; Super # Lm [3] MODIFIER LETTER CYRILLIC SMALL ES WITH DESCENDER..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1F16A..1F16C ; Super # So [3] RAISED MC SIGN..RAISED MR SIGN -# Total code points: 249 +# Total code points: 250 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index de59209f0..2216c60d6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-10-18, 17:34:16 GMT +# Date: 2024-11-15, 15:57:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -91,8 +91,8 @@ 01DD..0250 ; N # L& [116] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER TURNED A 0252..0260 ; N # L& [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK 0262..0293 ; N # L& [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL -0294 ; N # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; N # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; N # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; N # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C3 ; N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD 02C5 ; N # Sk MODIFIER LETTER DOWN ARROWHEAD @@ -223,7 +223,7 @@ 0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; N # Sk ARABIC RAISED ROUND DOT -0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; N # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF @@ -280,6 +280,7 @@ 09FC ; N # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; N # Po BENGALI ABBREVIATION SIGN 09FE ; N # Mn BENGALI SANDHI MARK +09FF ; N # Lo BENGALI LETTER SANSKRIT BA 0A01..0A02 ; N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI 0A03 ; N # Mc GURMUKHI SIGN VISARGA 0A05..0A0A ; N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU @@ -343,7 +344,7 @@ 0B47..0B48 ; N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; N # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; N # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; N # Mc ORIYA AU LENGTH MARK 0B5C..0B5D ; N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 0B5F..0B61 ; N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL @@ -391,7 +392,7 @@ 0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -417,7 +418,7 @@ 0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -691,7 +692,8 @@ 1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG 1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; N # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; N # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; N # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; N # Mc BALINESE SIGN BISAH 1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA @@ -1069,8 +1071,7 @@ 2B4D..2B4F ; N # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW 2B51..2B54 ; N # So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON 2B5A..2B73 ; N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; N # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; N # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; N # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -1182,11 +1183,8 @@ A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; N # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; N # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; N # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; N # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; N # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1330,13 +1328,15 @@ FB40..FB41 ; N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH FB43..FB44 ; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FBB1 ; N # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; N # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD3E ; N # Pe ORNATE LEFT PARENTHESIS FD3F ; N # Ps ORNATE RIGHT PARENTHESIS FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; N # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; N # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFC ; N # Sc RIAL SIGN FDFD..FDFF ; N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL @@ -1427,6 +1427,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1091F ; N # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; N # Po LYDIAN TRIANGULAR MARK +10940..1095C ; N # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; N # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1486,7 +1487,11 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EC5 ; N # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; N # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; N # Po ARABIC BIBLICAL END OF VERSE +10ED1..10ED8 ; N # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA..10EFF ; N # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1754,6 +1759,12 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 11A9E..11AA2 ; N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; N # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11B60 ; N # Mn SHARADA VOWEL SIGN OE +11B61 ; N # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; N # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; N # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; N # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; N # Mc SHARADA VOWEL SIGN CANDRA O 11BC0..11BE0 ; N # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11BE1 ; N # Po SUNUWAR SIGN PVO 11BF0..11BF9 ; N # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE @@ -1798,6 +1809,10 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 11D97 ; N # Mn GUNJALA GONDI VIRAMA 11D98 ; N # Lo GUNJALA GONDI OM 11DA0..11DA9 ; N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DB0..11DD8 ; N # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; N # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; N # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA +11DE0..11DE9 ; N # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11EE0..11EF2 ; N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA 11EF3..11EF4 ; N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U 11EF5..11EF6 ; N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O @@ -1865,15 +1880,24 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 16D6B..16D6C ; N # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D..16D6F ; N # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16D70..16D79 ; N # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16D80..16D97 ; N # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D98 ; N # Mn CHISOI SIGN ANUSVARA +16D99..16D9C ; N # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA +16D9D ; N # Mn CHISOI SIGN SISO +16DA0..16DA9 ; N # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 16E40..16E7F ; N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; N # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; N # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; N # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F50 ; N # Lo MIAO LETTER NASALIZATION 16F51..16F87 ; N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FF2..16FF3 ; N # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER +16FF4..16FF6 ; N # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL @@ -2103,7 +2127,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761067 code points not listed here. +# The above property value applies to 760803 code points not listed here. # Total code points: 792388 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 9823282a2..caa0df6ae 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-10-18, 17:34:17 GMT +# Date: 2024-11-15, 15:57:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -36,7 +36,6 @@ 085C..085D ; Cn # [2] .. 085F ; Cn # 086B..086F ; Cn # [5] .. -088F ; Cn # 0892..0896 ; Cn # [5] .. 0984 ; Cn # 098D..098E ; Cn # [2] .. @@ -51,7 +50,7 @@ 09D8..09DB ; Cn # [4] .. 09DE ; Cn # 09E4..09E5 ; Cn # [2] .. -09FF..0A00 ; Cn # [2] .. +0A00 ; Cn # 0A04 ; Cn # 0A0B..0A0E ; Cn # [4] .. 0A11..0A12 ; Cn # [2] .. @@ -91,7 +90,7 @@ 0B3A..0B3B ; Cn # [2] .. 0B45..0B46 ; Cn # [2] .. 0B49..0B4A ; Cn # [2] .. -0B4E..0B54 ; Cn # [7] .. +0B4E..0B52 ; Cn # [5] .. 0B58..0B5B ; Cn # [4] .. 0B5E ; Cn # 0B64..0B65 ; Cn # [2] .. @@ -120,7 +119,7 @@ 0C49 ; Cn # 0C4E..0C54 ; Cn # [7] .. 0C57 ; Cn # -0C5B..0C5C ; Cn # [2] .. +0C5B ; Cn # 0C5E..0C5F ; Cn # [2] .. 0C64..0C65 ; Cn # [2] .. 0C70..0C76 ; Cn # [7] .. @@ -132,7 +131,7 @@ 0CC5 ; Cn # 0CC9 ; Cn # 0CCE..0CD4 ; Cn # [7] .. -0CD7..0CDC ; Cn # [6] .. +0CD7..0CDB ; Cn # [5] .. 0CDF ; Cn # 0CE4..0CE5 ; Cn # [2] .. 0CF0 ; Cn # @@ -228,7 +227,8 @@ 1A8A..1A8F ; Cn # [6] .. 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. -1ACF..1AFF ; Cn # [49] .. +1ADE..1ADF ; Cn # [2] .. +1AEC..1AFF ; Cn # [20] .. 1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. @@ -263,7 +263,6 @@ 242A..243F ; Cn # [22] .. 244B..245F ; Cn # [21] .. 2B74..2B75 ; Cn # [2] .. -2B96 ; Cn # 2CF4..2CF8 ; Cn # [5] .. 2D26 ; Cn # 2D28..2D2C ; Cn # [5] .. @@ -294,10 +293,7 @@ A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. A62C..A63F ; Cn # [20] .. A6F8..A6FF ; Cn # [8] .. -A7CE..A7CF ; Cn # [2] .. -A7D2 ; Cn # -A7D4 ; Cn # -A7DD..A7F1 ; Cn # [21] .. +A7DD..A7F0 ; Cn # [20] .. A82D..A82F ; Cn # [3] .. A83A..A83F ; Cn # [6] .. A878..A87F ; Cn # [8] .. @@ -333,9 +329,6 @@ FB3D ; Cn # FB3F ; Cn # FB42 ; Cn # FB45 ; Cn # -FBC3..FBD2 ; Cn # [16] .. -FD90..FD91 ; Cn # [2] .. -FDC8..FDCE ; Cn # [7] .. FDD0..FDEF ; Cn # [32] .. FE1A..FE1F ; Cn # [6] .. FE53 ; Cn # @@ -407,7 +400,7 @@ FFFE..FFFF ; Cn # [2] .. 108F6..108FA ; Cn # [5] .. 1091C..1091E ; Cn # [3] .. 1093A..1093E ; Cn # [5] .. -10940..1097F ; Cn # [64] .. +1095D..1097F ; Cn # [35] .. 109B8..109BB ; Cn # [4] .. 109D0..109D1 ; Cn # [2] .. 10A04 ; Cn # @@ -439,7 +432,8 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC5..10EFB ; Cn # [55] .. +10EC8..10ECF ; Cn # [8] .. +10ED9..10EF9 ; Cn # [33] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -522,7 +516,8 @@ FFFE..FFFF ; Cn # [2] .. 11A48..11A4F ; Cn # [8] .. 11AA3..11AAF ; Cn # [13] .. 11AF9..11AFF ; Cn # [7] .. -11B0A..11BBF ; Cn # [182] .. +11B0A..11B5F ; Cn # [86] .. +11B68..11BBF ; Cn # [88] .. 11BE2..11BEF ; Cn # [14] .. 11BFA..11BFF ; Cn # [6] .. 11C09 ; Cn # @@ -544,7 +539,9 @@ FFFE..FFFF ; Cn # [2] .. 11D8F ; Cn # 11D92 ; Cn # 11D99..11D9F ; Cn # [7] .. -11DAA..11EDF ; Cn # [310] .. +11DAA..11DAF ; Cn # [6] .. +11DDC..11DDF ; Cn # [4] .. +11DEA..11EDF ; Cn # [246] .. 11EF9..11EFF ; Cn # [7] .. 11F11 ; Cn # 11F3B..11F3D ; Cn # [3] .. @@ -572,13 +569,17 @@ FFFE..FFFF ; Cn # [2] .. 16B62 ; Cn # 16B78..16B7C ; Cn # [5] .. 16B90..16D3F ; Cn # [432] .. -16D7A..16E3F ; Cn # [198] .. -16E9B..16EFF ; Cn # [101] .. +16D7A..16D7F ; Cn # [6] .. +16D9E..16D9F ; Cn # [2] .. +16DAA..16E3F ; Cn # [150] .. +16E9B..16E9F ; Cn # [5] .. +16EB9..16EBA ; Cn # [2] .. +16ED4..16EFF ; Cn # [44] .. 16F4B..16F4E ; Cn # [4] .. 16F88..16F8E ; Cn # [7] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. -16FF2..16FFF ; Cn # [14] .. +16FF7..16FFF ; Cn # [9] .. 18CD6..18CFE ; Cn # [41] .. 18D1F..18D7F ; Cn # [97] .. 18D82..1AFEF ; Cn # [8814] .. @@ -747,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819501 +# Total code points: 819237 # ================================================ @@ -1355,7 +1356,10 @@ A7C2 ; Lu # LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Lu # [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Lu # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY A7CB..A7CC ; Lu # [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7CE ; Lu # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; Lu # LATIN CAPITAL LETTER CLOSED INSULAR G +A7D2 ; Lu # LATIN CAPITAL LETTER DOUBLE THORN +A7D4 ; Lu # LATIN CAPITAL LETTER DOUBLE WYNN A7D6 ; Lu # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Lu # LATIN CAPITAL LETTER SIGMOID S A7DA ; Lu # LATIN CAPITAL LETTER LAMBDA @@ -1372,6 +1376,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 10D50..10D65 ; Lu # [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Lu # [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Lu # [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +16EA0..16EB8 ; Lu # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY 1D400..1D419 ; Lu # [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z 1D434..1D44D ; Lu # [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z 1D468..1D481 ; Lu # [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z @@ -1405,7 +1410,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1858 +# Total code points: 1886 # ================================================ @@ -1556,7 +1561,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 024B ; Ll # LATIN SMALL LETTER Q WITH HOOK TAIL 024D ; Ll # LATIN SMALL LETTER R WITH STROKE 024F..0293 ; Ll # [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL -0295..02AF ; Ll # [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0296..02AF ; Ll # [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 0371 ; Ll # GREEK SMALL LETTER HETA 0373 ; Ll # GREEK SMALL LETTER ARCHAIC SAMPI 0377 ; Ll # GREEK SMALL LETTER PAMPHYLIAN DIGAMMA @@ -2017,6 +2022,7 @@ A7C3 ; Ll # LATIN SMALL LETTER ANGLICANA W A7C8 ; Ll # LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Ll # LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CD ; Ll # LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CF ; Ll # LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D1 ; Ll # LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Ll # LATIN SMALL LETTER DOUBLE THORN A7D5 ; Ll # LATIN SMALL LETTER DOUBLE WYNN @@ -2041,6 +2047,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 10D70..10D85 ; Ll # [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Ll # [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Ll # [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16EBB..16ED3 ; Ll # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 1D41A..1D433 ; Ll # [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z 1D44E..1D454 ; Ll # [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G 1D456..1D467 ; Ll # [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z @@ -2074,7 +2081,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2258 +# Total code points: 2283 # ================================================ @@ -2143,7 +2150,7 @@ A69C..A69D ; Lm # [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER C A717..A71F ; Lm # [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A770 ; Lm # MODIFIER LETTER US A788 ; Lm # MODIFIER LETTER LOW CIRCUMFLEX ACCENT -A7F2..A7F4 ; Lm # [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F1..A7F4 ; Lm # [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F8..A7F9 ; Lm # [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A9CF ; Lm # JAVANESE PANGRANGKEP A9E6 ; Lm # MYANMAR MODIFIER LETTER SHAN REDUPLICATION @@ -2159,12 +2166,15 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 107B2..107BA ; Lm # [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10D4E ; Lm # GARAY VOWEL LENGTH MARK 10D6F ; Lm # GARAY REDUPLICATION MARK +10EC5 ; Lm # ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +11DD9 ; Lm # TOLONG SIKI SIGN SELA 16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16D40..16D42 ; Lm # [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA 16D6B..16D6C ; Lm # [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16F93..16F9F ; Lm # [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; Lm # [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Lm # OLD CHINESE ITERATION MARK +16FF2..16FF3 ; Lm # [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1AFF0..1AFF3 ; Lm # [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Lm # [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Lm # [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -2173,7 +2183,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 404 +# Total code points: 409 # ================================================ @@ -2183,7 +2193,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 00BA ; Lo # MASCULINE ORDINAL INDICATOR 01BB ; Lo # LATIN LETTER TWO WITH STROKE 01C0..01C3 ; Lo # [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK -0294 ; Lo # LATIN LETTER GLOTTAL STOP +0294..0295 ; Lo # [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE 05D0..05EA ; Lo # [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05EF..05F2 ; Lo # [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD 0620..063F ; Lo # [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE @@ -2203,7 +2213,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0840..0858 ; Lo # [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0860..086A ; Lo # [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Lo # [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0889..088E ; Lo # [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; Lo # [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; Lo # [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 0904..0939 ; Lo # [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; Lo # DEVANAGARI SIGN AVAGRAHA @@ -2222,6 +2232,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 09DF..09E1 ; Lo # [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 09F0..09F1 ; Lo # [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 09FC ; Lo # BENGALI LETTER VEDIC ANUSVARA +09FF ; Lo # BENGALI LETTER SANSKRIT BA 0A05..0A0A ; Lo # [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; Lo # [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; Lo # [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -2269,7 +2280,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0C2A..0C39 ; Lo # [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; Lo # TELUGU SIGN AVAGRAHA 0C58..0C5A ; Lo # [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Lo # TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Lo # [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Lo # [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; Lo # KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; Lo # [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2278,7 +2289,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0CAA..0CB3 ; Lo # [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; Lo # [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; Lo # KANNADA SIGN AVAGRAHA -0CDD..0CDE ; Lo # [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Lo # [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Lo # [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; Lo # [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; Lo # [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2504,6 +2515,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 108F4..108F5 ; Lo # [2] HATRAN LETTER SHIN..HATRAN LETTER TAW 10900..10915 ; Lo # [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Lo # [26] LYDIAN LETTER A..LYDIAN LETTER C +10940..1095C ; Lo # [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; Lo # [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BE..109BF ; Lo # [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Lo # KHAROSHTHI LETTER A @@ -2525,6 +2537,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC6..10EC7 ; Lo # [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2616,6 +2629,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 11D67..11D68 ; Lo # [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; Lo # [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; Lo # GUNJALA GONDI OM +11DB0..11DD8 ; Lo # [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DDA..11DDB ; Lo # [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11EE0..11EF2 ; Lo # [19] MAKASAR LETTER KA..MAKASAR ANGKA 11F02 ; Lo # KAWI SIGN REPHA 11F04..11F10 ; Lo # [13] KAWI LETTER A..KAWI LETTER O @@ -2637,6 +2652,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16B63..16B77 ; Lo # [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; Lo # [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ 16D43..16D6A ; Lo # [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D80..16D97 ; Lo # [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; Lo # [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION 17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 @@ -2708,7 +2725,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136509 +# Total code points: 136616 # ================================================ @@ -2773,7 +2790,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 0B3F ; Mn # ORIYA VOWEL SIGN I 0B41..0B44 ; Mn # [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; Mn # ORIYA SIGN VIRAMA -0B55..0B56 ; Mn # [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; Mn # [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; Mn # [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; Mn # TAMIL SIGN ANUSVARA 0BC0 ; Mn # TAMIL VOWEL SIGN II @@ -2856,7 +2873,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABF..1ACE ; Mn # [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; Mn # [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; Mn # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; Mn # BALINESE SIGN REREKAN 1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -2938,7 +2956,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; Mn # [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Mn # BRAHMI SIGN ANUSVARA @@ -3013,6 +3031,9 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11A59..11A5B ; Mn # [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; Mn # [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; Mn # [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; Mn # SHARADA VOWEL SIGN OE +11B62..11B64 ; Mn # [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; Mn # SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; Mn # [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; Mn # [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; Mn # BHAIKSUKI SIGN VIRAMA @@ -3040,6 +3061,8 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1612D..1612F ; Mn # [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Mn # [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Mn # [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; Mn # CHISOI SIGN ANUSVARA +16D9D ; Mn # CHISOI SIGN SISO 16F4F ; Mn # MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Mn # [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; Mn # KHITAN SMALL SCRIPT FILLER @@ -3072,7 +3095,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2020 +# Total code points: 2058 # ================================================ @@ -3262,6 +3285,9 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 11A39 ; Mc # ZANABAZAR SQUARE SIGN VISARGA 11A57..11A58 ; Mc # [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU 11A97 ; Mc # SOYOMBO SIGN VISARGA +11B61 ; Mc # SHARADA VOWEL SIGN OOE +11B65 ; Mc # SHARADA VOWEL SIGN SHORT O +11B67 ; Mc # SHARADA VOWEL SIGN CANDRA O 11C2F ; Mc # BHAIKSUKI VOWEL SIGN AA 11C3E ; Mc # BHAIKSUKI SIGN VISARGA 11CA9 ; Mc # MARCHEN SUBJOINED LETTER YA @@ -3281,7 +3307,7 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 468 +# Total code points: 471 # ================================================ @@ -3344,12 +3370,14 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; Nd # [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11F50..11F59 ; Nd # [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Nd # [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Nd # [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Nd # [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Nd # [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; Nd # [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Nd # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; Nd # [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Nd # [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Nd # [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -3359,7 +3387,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 760 +# Total code points: 780 # ================================================ @@ -3377,8 +3405,9 @@ A6E6..A6EF ; Nl # [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM 1034A ; Nl # GOTHIC LETTER NINE HUNDRED 103D1..103D5 ; Nl # [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED 12400..1246E ; Nl # [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +16FF4..16FF6 ; Nl # [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -# Total code points: 236 +# Total code points: 239 # ================================================ @@ -3900,6 +3929,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 10AF0..10AF6 ; Po # [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER 10B39..10B3F ; Po # [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10B99..10B9C ; Po # [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10ED0 ; Po # ARABIC BIBLICAL END OF VERSE 10F55..10F59 ; Po # [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT 10F86..10F89 ; Po # [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS 11047..1104D ; Po # [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS @@ -3951,7 +3981,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 1E5FF ; Po # OL ONAL ABBREVIATION SIGN 1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# Total code points: 640 +# Total code points: 641 # ================================================ @@ -4174,8 +4204,7 @@ FFE3 ; Sk # FULLWIDTH MACRON 2B00..2B2F ; So # [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE 2B45..2B46 ; So # [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW 2B4D..2B73 ; So # [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; So # [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; So # [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; So # [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2CE5..2CEA ; So # [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2E50..2E51 ; So # [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR 2E80..2E99 ; So # [26] CJK RADICAL REPEAT..CJK RADICAL RAP @@ -4203,8 +4232,10 @@ A828..A82B ; So # [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK- A836..A837 ; So # [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK A839 ; So # NORTH INDIC QUANTITY MARK AA77..AA79 ; So # [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +FBC3..FBD2 ; So # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FD40..FD4F ; So # [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH -FDCF ; So # ARABIC LIGATURE SALAAMUHU ALAYNAA +FD90..FD91 ; So # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA +FDC8..FDCF ; So # [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDFD..FDFF ; So # [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FFE4 ; So # FULLWIDTH BROKEN BAR FFE8 ; So # HALFWIDTH FORMS LIGHT VERTICAL @@ -4218,6 +4249,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 101D0..101FC ; So # [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND 10877..10878 ; So # [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON 10AC8 ; So # MANICHAEAN SIGN UD +10ED1..10ED8 ; So # [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 1173F ; So # AHOM SYMBOL VI 11FD5..11FDC ; So # [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI 11FE1..11FF1 ; So # [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA @@ -4283,7 +4315,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 7376 +# Total code points: 7410 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt index 17778a8a0..f24475268 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt @@ -1,5 +1,5 @@ -# DerivedJoiningGroup-16.0.0.txt -# Date: 2024-07-30, 21:15:55 GMT +# DerivedJoiningGroup-17.0.0.txt +# Date: 2024-11-14, 15:27:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -231,8 +231,9 @@ 06B9..06BC ; Noon # Lo [4] ARABIC LETTER NOON WITH DOT BELOW..ARABIC LETTER NOON WITH RING 0767..0769 ; Noon # Lo [3] ARABIC LETTER NOON WITH TWO DOTS BELOW..ARABIC LETTER NOON WITH SMALL V 0889 ; Noon # Lo ARABIC LETTER NOON WITH INVERTED SMALL V +088F ; Noon # Lo ARABIC LETTER NOON WITH RING ABOVE -# Total code points: 9 +# Total code points: 10 # ================================================ @@ -384,8 +385,9 @@ 0777 ; Yeh # Lo ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW 08A8..08A9 ; Yeh # Lo [2] ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE 08BA ; Yeh # Lo ARABIC LETTER YEH WITH TWO DOTS BELOW AND SMALL NOON ABOVE +10EC7 ; Yeh # Lo ARABIC LETTER YEH WITH FOUR DOTS BELOW -# Total code points: 10 +# Total code points: 11 # ================================================ @@ -750,4 +752,10 @@ # Total code points: 1 +# ================================================ + +10EC6 ; Thin_Noon # Lo ARABIC LETTER THIN NOON + +# Total code points: 1 + # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 3841a92cc..d505d6ad3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ -# DerivedJoiningType-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedJoiningType-17.0.0.txt +# Date: 2024-11-14, 19:48:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -70,6 +70,7 @@ 0868 ; D # Lo SYRIAC LETTER MALAYALAM LLA 0886 ; D # Lo ARABIC LETTER THIN YEH 0889..088D ; D # Lo [5] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW +088F ; D # Lo ARABIC LETTER NOON WITH RING ABOVE 08A0..08A9 ; D # Lo [10] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE 08AF..08B0 ; D # Lo [2] ARABIC LETTER SAD WITH THREE DOTS BELOW..ARABIC LETTER GAF WITH INVERTED STROKE 08B3..08B8 ; D # Lo [6] ARABIC LETTER AIN WITH THREE DOTS BELOW..ARABIC LETTER TEH WITH SMALL TEH ABOVE @@ -96,6 +97,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10D01..10D21 ; D # Lo [33] HANIFI ROHINGYA LETTER BA..HANIFI ROHINGYA VOWEL O 10D23 ; D # Lo HANIFI ROHINGYA MARK NA KHONNA 10EC3..10EC4 ; D # Lo [2] ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC6..10EC7 ; D # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW 10F30..10F32 ; D # Lo [3] SOGDIAN LETTER ALEPH..SOGDIAN LETTER GIMEL 10F34..10F44 ; D # Lo [17] SOGDIAN LETTER WAW..SOGDIAN LETTER LESH 10F51..10F53 ; D # No [3] SOGDIAN NUMBER ONE..SOGDIAN NUMBER TWENTY @@ -111,7 +113,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10FCA ; D # No CHORASMIAN NUMBER TWENTY 1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 612 +# Total code points: 615 # ================================================ @@ -265,7 +267,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 0B3F ; T # Mn ORIYA VOWEL SIGN I 0B41..0B44 ; T # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR 0B4D ; T # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; T # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; T # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B62..0B63 ; T # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; T # Mn TAMIL SIGN ANUSVARA 0BC0 ; T # Mn TAMIL VOWEL SIGN II @@ -349,7 +351,8 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; T # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; T # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; T # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEB ; T # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B34 ; T # Mn BALINESE SIGN REREKAN 1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA @@ -441,7 +444,7 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; T # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; T # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; T # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; T # Mn BRAHMI SIGN ANUSVARA @@ -516,6 +519,9 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 11A59..11A5B ; T # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK 11A8A..11A96 ; T # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A98..11A99 ; T # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; T # Mn SHARADA VOWEL SIGN OE +11B62..11B64 ; T # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B66 ; T # Mn SHARADA VOWEL SIGN CANDRA E 11C30..11C36 ; T # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; T # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA 11C3F ; T # Mn BHAIKSUKI SIGN VIRAMA @@ -544,6 +550,8 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 1612D..1612F ; T # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; T # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; T # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; T # Mn CHISOI SIGN ANUSVARA +16D9D ; T # Mn CHISOI SIGN SISO 16F4F ; T # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; T # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; T # Mn KHITAN SMALL SCRIPT FILLER @@ -581,6 +589,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2185 +# Total code points: 2223 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 4c6a5df09..4c63f1980 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-10-18, 17:34:21 GMT +# Date: 2024-11-15, 15:57:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757621 code points not listed here. -# Total code points: 895089 +# The above property value applies to 757357 code points not listed here. +# Total code points: 894825 # ================================================ @@ -312,6 +312,7 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 0F12 ; GL # Po TIBETAN MARK RGYA GRAM SHAD 0FD9..0FDA ; GL # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS 180E ; GL # Cf MONGOLIAN VOWEL SEPARATOR +1AEB ; GL # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1DCD ; GL # Mn COMBINING DOUBLE CIRCUMFLEX ABOVE 1DFC ; GL # Mn COMBINING DOUBLE INVERTED BREVE BELOW 2007 ; GL # Zs FIGURE SPACE @@ -329,7 +330,7 @@ FE2D..FE2E ; GL # Mn [2] COMBINING CONJOINING MACRON BELOW..COMBINING CYRIL 13439..1343B ; GL # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER -# Total code points: 41 +# Total code points: 42 # ================================================ @@ -356,9 +357,10 @@ FF9E..FF9F ; NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KA 16FE0..16FE1 ; NS # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE2 ; NS # Po OLD CHINESE HOOK MARK 16FE3 ; NS # Lm OLD CHINESE ITERATION MARK +16FF2..16FF3 ; NS # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 1F679..1F67B ; NS # So [3] HEAVY INTERROBANG ORNAMENT..HEAVY SANS-SERIF INTERROBANG ORNAMENT -# Total code points: 35 +# Total code points: 37 # ================================================ @@ -543,10 +545,12 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 11C50..11C59 ; NU # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; NU # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; NU # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; NU # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 16A60..16A69 ; NU # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; NU # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; NU # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; NU # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; NU # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; NU # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; NU # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; NU # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -556,7 +560,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 1E950..1E959 ; NU # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; NU # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 695 +# Total code points: 715 # ================================================ @@ -586,8 +590,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 01BC..01BF ; AL # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 01C0..01C3 ; AL # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 01C4..0293 ; AL # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL -0294 ; AL # Lo LATIN LETTER GLOTTAL STOP -0295..02AF ; AL # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0294..0295 ; AL # Lo [2] LATIN LETTER GLOTTAL STOP..LATIN LETTER PHARYNGEAL VOICED FRICATIVE +0296..02AF ; AL # L& [26] LATIN LETTER INVERTED GLOTTAL STOP..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 02B0..02C1 ; AL # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 02C2..02C5 ; AL # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C6 ; AL # Lm MODIFIER LETTER CIRCUMFLEX ACCENT @@ -665,7 +669,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0888 ; AL # Sk ARABIC RAISED ROUND DOT -0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; AL # Lm ARABIC SMALL FARSI YEH 0904..0939 ; AL # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -690,6 +694,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 09FA ; AL # So BENGALI ISSHAR 09FC ; AL # Lo BENGALI LETTER VEDIC ANUSVARA 09FD ; AL # Po BENGALI ABBREVIATION SIGN +09FF ; AL # Lo BENGALI LETTER SANSKRIT BA 0A05..0A0A ; AL # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 0A0F..0A10 ; AL # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 0A13..0A28 ; AL # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA @@ -744,7 +749,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0C2A..0C39 ; AL # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; AL # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C78..0C7E ; AL # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F ; AL # So TELUGU SIGN TUUMU @@ -755,7 +760,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0CAA..0CB3 ; AL # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; AL # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; AL # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; AL # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; AL # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -1078,8 +1083,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2B47..2B4C ; AL # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR 2B4D..2B54 ; AL # So [8] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..WHITE RIGHT-POINTING PENTAGON 2B5A..2B73 ; AL # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95 ; AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF ; AL # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2B76..2BFF ; AL # So [138] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..HELLSCHREIBER PAUSE SYMBOL 2C00..2C7B ; AL # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E 2C7C..2C7D ; AL # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; AL # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI @@ -1140,11 +1144,8 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE -A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3 ; AL # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE -A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A790..A7DC ; AL # L& [77] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1..A7F4 ; AL # Lm [4] MODIFIER LETTER CAPITAL S..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I A7F8..A7F9 ; AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -1187,11 +1188,13 @@ FB13..FB17 ; AL # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LI FB29 ; AL # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; AL # So [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD40..FD4F ; AL # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; AL # So [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF ; AL # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDC8..FDCF ; AL # So [8] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU FDFD..FDFF ; AL # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM @@ -1271,6 +1274,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10916..1091B ; AL # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE 10920..10939 ; AL # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; AL # Po LYDIAN TRIANGULAR MARK +10940..1095C ; AL # Lo [29] SIDETIC LETTER N01..SIDETIC LETTER N29 10980..109B7 ; AL # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA 109BC..109BD ; AL # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF 109BE..109BF ; AL # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN @@ -1315,6 +1319,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; AL # Lm ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6..10EC7 ; AL # Lo [2] ARABIC LETTER THIN NOON..ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED1..10ED8 ; AL # So [8] ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1404,6 +1411,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 11D67..11D68 ; AL # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D89 ; AL # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA 11D98 ; AL # Lo GUNJALA GONDI OM +11DB0..11DD8 ; AL # Lo [41] TOLONG SIKI LETTER I..TOLONG SIKI LETTER RRH +11DD9 ; AL # Lm TOLONG SIKI SIGN SELA +11DDA..11DDB ; AL # Lo [2] TOLONG SIKI SIGN HECAKA..TOLONG SIKI UNGGA 11FB0 ; AL # Lo LISU LETTER YHA 11FC0..11FD4 ; AL # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH 11FD5..11FDC ; AL # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -1438,9 +1448,13 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 16D43..16D6A ; AL # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16D6B..16D6C ; AL # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16D6D ; AL # Po KIRAT RAI SIGN YUPI +16D80..16D97 ; AL # Lo [24] CHISOI LETTER A..CHISOI LETTER PA +16D99..16D9C ; AL # Lo [4] CHISOI LETTER YA..CHISOI LETTER JARAHA 16E40..16E7F ; AL # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; AL # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E99..16E9A ; AL # Po [2] MEDEFAIDRIN SYMBOL AIVA..MEDEFAIDRIN EXCLAMATION OH +16EA0..16EB8 ; AL # L& [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY +16EBB..16ED3 ; AL # L& [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16F00..16F4A ; AL # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; AL # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -1613,7 +1627,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26679 +# Total code points: 26876 # ================================================ @@ -1762,6 +1776,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 113D4..113D5 ; ID # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA 113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL +16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18D81 ; ID # Lo [2] TANGUT COMPONENT-769..TANGUT COMPONENT-770 @@ -1853,7 +1868,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF # The above property value applies to 61865 code points not listed here. -# Total code points: 172453 +# Total code points: 172456 # ================================================ @@ -1966,7 +1981,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 0B47..0B48 ; CM # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 0B4B..0B4C ; CM # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 0B4D ; CM # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; CM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B53..0B56 ; CM # Mn [4] ORIYA SIGN DOT ABOVE..ORIYA AI LENGTH MARK 0B57 ; CM # Mc ORIYA AU LENGTH MARK 0B62..0B63 ; CM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL 0B82 ; CM # Mn TAMIL SIGN ANUSVARA @@ -2054,7 +2069,8 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1A7F ; CM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW 1ABE ; CM # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; CM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1ABF..1ADD ; CM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW +1AE0..1AEA ; CM # Mn [11] COMBINING LEFT TACK ABOVE..COMBINING UPWARDS ARROW ABOVE 1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG 1B04 ; CM # Mc BALINESE SIGN BISAH 1B34 ; CM # Mn BALINESE SIGN REREKAN @@ -2182,7 +2198,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10EFA..10EFF ; CM # Mn [6] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; CM # Mc BRAHMI SIGN CANDRABINDU @@ -2308,6 +2324,12 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 11A8A..11A96 ; CM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA 11A97 ; CM # Mc SOYOMBO SIGN VISARGA 11A98..11A99 ; CM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11B60 ; CM # Mn SHARADA VOWEL SIGN OE +11B61 ; CM # Mc SHARADA VOWEL SIGN OOE +11B62..11B64 ; CM # Mn [3] SHARADA VOWEL SIGN UE..SHARADA VOWEL SIGN SHORT E +11B65 ; CM # Mc SHARADA VOWEL SIGN SHORT O +11B66 ; CM # Mn SHARADA VOWEL SIGN CANDRA E +11B67 ; CM # Mc SHARADA VOWEL SIGN CANDRA O 11C2F ; CM # Mc BHAIKSUKI VOWEL SIGN AA 11C30..11C36 ; CM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C3D ; CM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA @@ -2348,6 +2370,8 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 1612D..1612F ; CM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; CM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; CM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D98 ; CM # Mn CHISOI SIGN ANUSVARA +16D9D ; CM # Mn CHISOI SIGN SISO 16F4F ; CM # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; CM # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; CM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -2387,7 +2411,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2470 +# Total code points: 2510 # ================================================ @@ -2504,6 +2528,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 10B39..10B3F ; BA # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 10D6E ; BA # Pd GARAY HYPHEN 10EAD ; BA # Pd YEZIDI HYPHENATION MARK +10ED0 ; BA # Po ARABIC BIBLICAL END OF VERSE 11047..11048 ; BA # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; BA # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA 11140..11143 ; BA # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK @@ -2540,7 +2565,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 1BC9F ; BA # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; BA # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 272 +# Total code points: 273 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 8ca3e6846..4b1c2cfd9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2024-10-18, 17:34:22 GMT +# Date: 2024-11-15, 15:57:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2096,6 +2096,7 @@ 088C ; ARABIC LETTER TAH WITH THREE DOTS BELOW 088D ; ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW 088E ; ARABIC VERTICAL TAIL +088F ; ARABIC LETTER NOON WITH RING ABOVE 0890 ; ARABIC POUND MARK ABOVE 0891 ; ARABIC PIASTRE MARK ABOVE 0897 ; ARABIC PEPET @@ -2427,6 +2428,7 @@ 09FC ; BENGALI LETTER VEDIC ANUSVARA 09FD ; BENGALI ABBREVIATION SIGN 09FE ; BENGALI SANDHI MARK +09FF ; BENGALI LETTER SANSKRIT BA 0A01 ; GURMUKHI SIGN ADAK BINDI 0A02 ; GURMUKHI SIGN BINDI 0A03 ; GURMUKHI SIGN VISARGA @@ -2661,6 +2663,8 @@ 0B4B ; ORIYA VOWEL SIGN O 0B4C ; ORIYA VOWEL SIGN AU 0B4D ; ORIYA SIGN VIRAMA +0B53 ; ORIYA SIGN DOT ABOVE +0B54 ; ORIYA SIGN DOUBLE DOT ABOVE 0B55 ; ORIYA SIGN OVERLINE 0B56 ; ORIYA AI LENGTH MARK 0B57 ; ORIYA AU LENGTH MARK @@ -2837,6 +2841,7 @@ 0C58 ; TELUGU LETTER TSA 0C59 ; TELUGU LETTER DZA 0C5A ; TELUGU LETTER RRRA +0C5C ; TELUGU ARCHAIC SHRII 0C5D ; TELUGU LETTER NAKAARA POLLU 0C60 ; TELUGU LETTER VOCALIC RR 0C61 ; TELUGU LETTER VOCALIC LL @@ -2933,6 +2938,7 @@ 0CCD ; KANNADA SIGN VIRAMA 0CD5 ; KANNADA LENGTH MARK 0CD6 ; KANNADA AI LENGTH MARK +0CDC ; KANNADA ARCHAIC SHRII 0CDD ; KANNADA LETTER NAKAARA POLLU 0CDE ; KANNADA LETTER FA 0CE0 ; KANNADA LETTER VOCALIC RR @@ -6112,6 +6118,33 @@ 1ACC ; COMBINING LATIN SMALL LETTER INSULAR G 1ACD ; COMBINING LATIN SMALL LETTER INSULAR R 1ACE ; COMBINING LATIN SMALL LETTER INSULAR T +1ACF ; COMBINING DOUBLE CARON +1AD0 ; COMBINING VERTICAL-LINE-ACUTE +1AD1 ; COMBINING GRAVE-VERTICAL-LINE +1AD2 ; COMBINING VERTICAL-LINE-GRAVE +1AD3 ; COMBINING ACUTE-VERTICAL-LINE +1AD4 ; COMBINING VERTICAL-LINE-MACRON +1AD5 ; COMBINING MACRON-VERTICAL-LINE +1AD6 ; COMBINING VERTICAL-LINE-ACUTE-GRAVE +1AD7 ; COMBINING VERTICAL-LINE-GRAVE-ACUTE +1AD8 ; COMBINING MACRON-ACUTE-GRAVE +1AD9 ; COMBINING SHARP SIGN +1ADA ; COMBINING FLAT SIGN +1ADB ; COMBINING DOWN TACK ABOVE +1ADC ; COMBINING DIAERESIS WITH RAISED LEFT DOT +1ADD ; COMBINING DOT-AND-RING BELOW +1AE0 ; COMBINING LEFT TACK ABOVE +1AE1 ; COMBINING RIGHT TACK ABOVE +1AE2 ; COMBINING MINUS SIGN ABOVE +1AE3 ; COMBINING INVERTED BRIDGE ABOVE +1AE4 ; COMBINING SQUARE ABOVE +1AE5 ; COMBINING SEAGULL ABOVE +1AE6 ; COMBINING DOUBLE ARCH BELOW +1AE7 ; COMBINING DOUBLE ARCH ABOVE +1AE8 ; COMBINING EQUALS SIGN ABOVE +1AE9 ; COMBINING LEFT ANGLE CENTRED ABOVE +1AEA ; COMBINING UPWARDS ARROW ABOVE +1AEB ; COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 1B00 ; BALINESE SIGN ULU RICEM 1B01 ; BALINESE SIGN ULU CANDRA 1B02 ; BALINESE SIGN CECEK @@ -10214,6 +10247,7 @@ 2B93 ; NEWLINE RIGHT 2B94 ; FOUR CORNER ARROWS CIRCLING ANTICLOCKWISE 2B95 ; RIGHTWARDS BLACK ARROW +2B96 ; EQUALS SIGN WITH INFINITY ABOVE 2B97 ; SYMBOL FOR TYPE A ELECTRONICS 2B98 ; THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD 2B99 ; THREE-D RIGHT-LIGHTED UPWARDS EQUILATERAL ARROWHEAD @@ -14247,9 +14281,13 @@ A7CA ; LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY A7CB ; LATIN CAPITAL LETTER RAMS HORN A7CC ; LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7CD ; LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7CE ; LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE +A7CF ; LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D0 ; LATIN CAPITAL LETTER CLOSED INSULAR G A7D1 ; LATIN SMALL LETTER CLOSED INSULAR G +A7D2 ; LATIN CAPITAL LETTER DOUBLE THORN A7D3 ; LATIN SMALL LETTER DOUBLE THORN +A7D4 ; LATIN CAPITAL LETTER DOUBLE WYNN A7D5 ; LATIN SMALL LETTER DOUBLE WYNN A7D6 ; LATIN CAPITAL LETTER MIDDLE SCOTS S A7D7 ; LATIN SMALL LETTER MIDDLE SCOTS S @@ -14258,6 +14296,7 @@ A7D9 ; LATIN SMALL LETTER SIGMOID S A7DA ; LATIN CAPITAL LETTER LAMBDA A7DB ; LATIN SMALL LETTER LAMBDA A7DC ; LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F1 ; MODIFIER LETTER CAPITAL S A7F2 ; MODIFIER LETTER CAPITAL C A7F3 ; MODIFIER LETTER CAPITAL F A7F4 ; MODIFIER LETTER CAPITAL Q @@ -26590,6 +26629,22 @@ FBBF ; ARABIC SYMBOL RING FBC0 ; ARABIC SYMBOL SMALL TAH ABOVE FBC1 ; ARABIC SYMBOL SMALL TAH BELOW FBC2 ; ARABIC SYMBOL WASLA ABOVE +FBC3 ; ARABIC LIGATURE JALLA WA-ALAA +FBC4 ; ARABIC LIGATURE DAAMAT BARAKAATUHUM +FBC5 ; ARABIC LIGATURE RAHMATU ALLAAHI TAAALAA ALAYH +FBC6 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIM +FBC7 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHIMAA +FBC8 ; ARABIC LIGATURE RAHIMAHUM ALLAAHU TAAALAA +FBC9 ; ARABIC LIGATURE RAHIMAHUMAA ALLAAH +FBCA ; ARABIC LIGATURE RAHIMAHUMAA ALLAAHU TAAALAA +FBCB ; ARABIC LIGATURE RADI ALLAHU TAAALAA ANHUM +FBCC ; ARABIC LIGATURE HAFIZAHU ALLAAH +FBCD ; ARABIC LIGATURE HAFIZAHU ALLAAHU TAAALAA +FBCE ; ARABIC LIGATURE HAFIZAHUM ALLAAHU TAAALAA +FBCF ; ARABIC LIGATURE HAFIZAHUMAA ALLAAHU TAAALAA +FBD0 ; ARABIC LIGATURE SALLALLAAHU TAAALAA ALAYHI WA-SALLAM +FBD1 ; ARABIC LIGATURE AJJAL ALLAAHU FARAJAHU ASH-SHAREEF +FBD2 ; ARABIC LIGATURE ALAYHI AR-RAHMAH FBD3 ; ARABIC LETTER NG ISOLATED FORM FBD4 ; ARABIC LETTER NG FINAL FORM FBD5 ; ARABIC LETTER NG INITIAL FORM @@ -27035,6 +27090,8 @@ FD8C ; ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM FD8D ; ARABIC LIGATURE MEEM WITH JEEM WITH MEEM INITIAL FORM FD8E ; ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM FD8F ; ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYH +FD91 ; ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA FD92 ; ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM FD93 ; ARABIC LIGATURE HEH WITH MEEM WITH JEEM INITIAL FORM FD94 ; ARABIC LIGATURE HEH WITH MEEM WITH MEEM INITIAL FORM @@ -27089,6 +27146,13 @@ FDC4 ; ARABIC LIGATURE AIN WITH JEEM WITH MEEM INITIAL FORM FDC5 ; ARABIC LIGATURE SAD WITH MEEM WITH MEEM INITIAL FORM FDC6 ; ARABIC LIGATURE SEEN WITH KHAH WITH YEH FINAL FORM FDC7 ; ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDC8 ; ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA +FDC9 ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANH +FDCA ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHAA +FDCB ; ARABIC LIGATURE RADI ALLAAHU TAAALAA ANHUMAA +FDCC ; ARABIC LIGATURE SALLALLAHU ALAYHI WA-ALAA AALIHEE WA-SALLAM +FDCD ; ARABIC LIGATURE AJJAL ALLAAHU TAAALAA FARAJAHU ASH-SHAREEF +FDCE ; ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH FDCF ; ARABIC LIGATURE SALAAMUHU ALAYNAA FDF0 ; ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM FDF1 ; ARABIC LIGATURE QALA USED AS KORANIC STOP SIGN ISOLATED FORM @@ -29373,6 +29437,35 @@ FFFD ; REPLACEMENT CHARACTER 10938 ; LYDIAN LETTER NN 10939 ; LYDIAN LETTER C 1093F ; LYDIAN TRIANGULAR MARK +10940 ; SIDETIC LETTER N01 +10941 ; SIDETIC LETTER N02 +10942 ; SIDETIC LETTER N03 +10943 ; SIDETIC LETTER N04 +10944 ; SIDETIC LETTER N05 +10945 ; SIDETIC LETTER N06 +10946 ; SIDETIC LETTER N07 +10947 ; SIDETIC LETTER N08 +10948 ; SIDETIC LETTER N09 +10949 ; SIDETIC LETTER N10 +1094A ; SIDETIC LETTER N11 +1094B ; SIDETIC LETTER N12 +1094C ; SIDETIC LETTER N13 +1094D ; SIDETIC LETTER N14 +1094E ; SIDETIC LETTER N15 +1094F ; SIDETIC LETTER N16 +10950 ; SIDETIC LETTER N17 +10951 ; SIDETIC LETTER N18 +10952 ; SIDETIC LETTER N19 +10953 ; SIDETIC LETTER N20 +10954 ; SIDETIC LETTER N21 +10955 ; SIDETIC LETTER N22 +10956 ; SIDETIC LETTER N23 +10957 ; SIDETIC LETTER N24 +10958 ; SIDETIC LETTER N25 +10959 ; SIDETIC LETTER N26 +1095A ; SIDETIC LETTER N27 +1095B ; SIDETIC LETTER N28 +1095C ; SIDETIC LETTER N29 10980 ; MEROITIC HIEROGLYPHIC LETTER A 10981 ; MEROITIC HIEROGLYPHIC LETTER E 10982 ; MEROITIC HIEROGLYPHIC LETTER I @@ -30206,6 +30299,20 @@ FFFD ; REPLACEMENT CHARACTER 10EC2 ; ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW 10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW 10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC5 ; ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW +10EC6 ; ARABIC LETTER THIN NOON +10EC7 ; ARABIC LETTER YEH WITH FOUR DOTS BELOW +10ED0 ; ARABIC BIBLICAL END OF VERSE +10ED1 ; ARABIC LIGATURE ALAYHAA AS-SALAATU WAS-SALAAM +10ED2 ; ARABIC LIGATURE ALAYHIM AS-SALAATU WAS-SALAAM +10ED3 ; ARABIC LIGATURE ALAYHIMAA AS-SALAATU WAS-SALAAM +10ED4 ; ARABIC LIGATURE QADDASA ALLAAHU SIRRAH +10ED5 ; ARABIC LIGATURE QUDDISA SIRRAHUM +10ED6 ; ARABIC LIGATURE QUDDISA SIRRAHUMAA +10ED7 ; ARABIC LIGATURE QUDDISAT ASRAARUHUM +10ED8 ; ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH +10EFA ; ARABIC DOUBLE VERTICAL BAR BELOW +10EFB ; ARABIC SMALL LOW NOON 10EFC ; ARABIC COMBINING ALEF OVERLAY 10EFD ; ARABIC SMALL LOW WORD SAKTA 10EFE ; ARABIC SMALL LOW WORD QASR @@ -32186,6 +32293,14 @@ FFFD ; REPLACEMENT CHARACTER 11B07 ; DEVANAGARI SIGN WESTERN NINE-LIKE BHALE 11B08 ; DEVANAGARI SIGN REVERSED NINE-LIKE BHALE 11B09 ; DEVANAGARI SIGN MINDU +11B60 ; SHARADA VOWEL SIGN OE +11B61 ; SHARADA VOWEL SIGN OOE +11B62 ; SHARADA VOWEL SIGN UE +11B63 ; SHARADA VOWEL SIGN UUE +11B64 ; SHARADA VOWEL SIGN SHORT E +11B65 ; SHARADA VOWEL SIGN SHORT O +11B66 ; SHARADA VOWEL SIGN CANDRA E +11B67 ; SHARADA VOWEL SIGN CANDRA O 11BC0 ; SUNUWAR LETTER DEVI 11BC1 ; SUNUWAR LETTER TASLA 11BC2 ; SUNUWAR LETTER EKO @@ -32533,6 +32648,60 @@ FFFD ; REPLACEMENT CHARACTER 11DA7 ; GUNJALA GONDI DIGIT SEVEN 11DA8 ; GUNJALA GONDI DIGIT EIGHT 11DA9 ; GUNJALA GONDI DIGIT NINE +11DB0 ; TOLONG SIKI LETTER I +11DB1 ; TOLONG SIKI LETTER E +11DB2 ; TOLONG SIKI LETTER U +11DB3 ; TOLONG SIKI LETTER O +11DB4 ; TOLONG SIKI LETTER A +11DB5 ; TOLONG SIKI LETTER AA +11DB6 ; TOLONG SIKI LETTER P +11DB7 ; TOLONG SIKI LETTER PH +11DB8 ; TOLONG SIKI LETTER B +11DB9 ; TOLONG SIKI LETTER BH +11DBA ; TOLONG SIKI LETTER M +11DBB ; TOLONG SIKI LETTER T +11DBC ; TOLONG SIKI LETTER TH +11DBD ; TOLONG SIKI LETTER D +11DBE ; TOLONG SIKI LETTER DH +11DBF ; TOLONG SIKI LETTER N +11DC0 ; TOLONG SIKI LETTER TT +11DC1 ; TOLONG SIKI LETTER TTH +11DC2 ; TOLONG SIKI LETTER DD +11DC3 ; TOLONG SIKI LETTER DDH +11DC4 ; TOLONG SIKI LETTER NN +11DC5 ; TOLONG SIKI LETTER C +11DC6 ; TOLONG SIKI LETTER CH +11DC7 ; TOLONG SIKI LETTER J +11DC8 ; TOLONG SIKI LETTER JH +11DC9 ; TOLONG SIKI LETTER NY +11DCA ; TOLONG SIKI LETTER K +11DCB ; TOLONG SIKI LETTER KH +11DCC ; TOLONG SIKI LETTER G +11DCD ; TOLONG SIKI LETTER GH +11DCE ; TOLONG SIKI LETTER NG +11DCF ; TOLONG SIKI LETTER Y +11DD0 ; TOLONG SIKI LETTER R +11DD1 ; TOLONG SIKI LETTER L +11DD2 ; TOLONG SIKI LETTER V +11DD3 ; TOLONG SIKI LETTER NNY +11DD4 ; TOLONG SIKI LETTER S +11DD5 ; TOLONG SIKI LETTER H +11DD6 ; TOLONG SIKI LETTER X +11DD7 ; TOLONG SIKI LETTER RR +11DD8 ; TOLONG SIKI LETTER RRH +11DD9 ; TOLONG SIKI SIGN SELA +11DDA ; TOLONG SIKI SIGN HECAKA +11DDB ; TOLONG SIKI UNGGA +11DE0 ; TOLONG SIKI DIGIT ZERO +11DE1 ; TOLONG SIKI DIGIT ONE +11DE2 ; TOLONG SIKI DIGIT TWO +11DE3 ; TOLONG SIKI DIGIT THREE +11DE4 ; TOLONG SIKI DIGIT FOUR +11DE5 ; TOLONG SIKI DIGIT FIVE +11DE6 ; TOLONG SIKI DIGIT SIX +11DE7 ; TOLONG SIKI DIGIT SEVEN +11DE8 ; TOLONG SIKI DIGIT EIGHT +11DE9 ; TOLONG SIKI DIGIT NINE 11EE0 ; MAKASAR LETTER KA 11EE1 ; MAKASAR LETTER GA 11EE2 ; MAKASAR LETTER NGA @@ -36704,6 +36873,46 @@ FFFD ; REPLACEMENT CHARACTER 16D77 ; KIRAT RAI DIGIT SEVEN 16D78 ; KIRAT RAI DIGIT EIGHT 16D79 ; KIRAT RAI DIGIT NINE +16D80 ; CHISOI LETTER A +16D81 ; CHISOI LETTER BA +16D82 ; CHISOI LETTER AI +16D83 ; CHISOI LETTER AA +16D84 ; CHISOI LETTER GA +16D85 ; CHISOI LETTER TA +16D86 ; CHISOI LETTER E +16D87 ; CHISOI LETTER SA +16D88 ; CHISOI LETTER NA +16D89 ; CHISOI LETTER I +16D8A ; CHISOI LETTER KA +16D8B ; CHISOI LETTER RA +16D8C ; CHISOI LETTER MA +16D8D ; CHISOI LETTER HA +16D8E ; CHISOI LETTER RRA +16D8F ; CHISOI LETTER U +16D90 ; CHISOI LETTER DA +16D91 ; CHISOI LETTER LA +16D92 ; CHISOI LETTER O +16D93 ; CHISOI LETTER NYA +16D94 ; CHISOI LETTER NGA +16D95 ; CHISOI LETTER CA +16D96 ; CHISOI LETTER JA +16D97 ; CHISOI LETTER PA +16D98 ; CHISOI SIGN ANUSVARA +16D99 ; CHISOI LETTER YA +16D9A ; CHISOI LETTER DDA +16D9B ; CHISOI LETTER TTA +16D9C ; CHISOI LETTER JARAHA +16D9D ; CHISOI SIGN SISO +16DA0 ; CHISOI DIGIT ZERO +16DA1 ; CHISOI DIGIT ONE +16DA2 ; CHISOI DIGIT TWO +16DA3 ; CHISOI DIGIT THREE +16DA4 ; CHISOI DIGIT FOUR +16DA5 ; CHISOI DIGIT FIVE +16DA6 ; CHISOI DIGIT SIX +16DA7 ; CHISOI DIGIT SEVEN +16DA8 ; CHISOI DIGIT EIGHT +16DA9 ; CHISOI DIGIT NINE 16E40 ; MEDEFAIDRIN CAPITAL LETTER M 16E41 ; MEDEFAIDRIN CAPITAL LETTER S 16E42 ; MEDEFAIDRIN CAPITAL LETTER V @@ -36795,6 +37004,56 @@ FFFD ; REPLACEMENT CHARACTER 16E98 ; MEDEFAIDRIN FULL STOP 16E99 ; MEDEFAIDRIN SYMBOL AIVA 16E9A ; MEDEFAIDRIN EXCLAMATION OH +16EA0 ; BERIA ERFE CAPITAL LETTER ARKAB +16EA1 ; BERIA ERFE CAPITAL LETTER BASIGNA +16EA2 ; BERIA ERFE CAPITAL LETTER DARBAI +16EA3 ; BERIA ERFE CAPITAL LETTER EH +16EA4 ; BERIA ERFE CAPITAL LETTER FITKO +16EA5 ; BERIA ERFE CAPITAL LETTER GOWAY +16EA6 ; BERIA ERFE CAPITAL LETTER HIRDEABO +16EA7 ; BERIA ERFE CAPITAL LETTER I +16EA8 ; BERIA ERFE CAPITAL LETTER DJAI +16EA9 ; BERIA ERFE CAPITAL LETTER KOBO +16EAA ; BERIA ERFE CAPITAL LETTER LAKKO +16EAB ; BERIA ERFE CAPITAL LETTER MERI +16EAC ; BERIA ERFE CAPITAL LETTER NINI +16EAD ; BERIA ERFE CAPITAL LETTER GNA +16EAE ; BERIA ERFE CAPITAL LETTER NGAY +16EAF ; BERIA ERFE CAPITAL LETTER OI +16EB0 ; BERIA ERFE CAPITAL LETTER PI +16EB1 ; BERIA ERFE CAPITAL LETTER ERIGO +16EB2 ; BERIA ERFE CAPITAL LETTER ERIGO TAMURA +16EB3 ; BERIA ERFE CAPITAL LETTER SERI +16EB4 ; BERIA ERFE CAPITAL LETTER SHEP +16EB5 ; BERIA ERFE CAPITAL LETTER TATASOUE +16EB6 ; BERIA ERFE CAPITAL LETTER UI +16EB7 ; BERIA ERFE CAPITAL LETTER WASSE +16EB8 ; BERIA ERFE CAPITAL LETTER AY +16EBB ; BERIA ERFE SMALL LETTER ARKAB +16EBC ; BERIA ERFE SMALL LETTER BASIGNA +16EBD ; BERIA ERFE SMALL LETTER DARBAI +16EBE ; BERIA ERFE SMALL LETTER EH +16EBF ; BERIA ERFE SMALL LETTER FITKO +16EC0 ; BERIA ERFE SMALL LETTER GOWAY +16EC1 ; BERIA ERFE SMALL LETTER HIRDEABO +16EC2 ; BERIA ERFE SMALL LETTER I +16EC3 ; BERIA ERFE SMALL LETTER DJAI +16EC4 ; BERIA ERFE SMALL LETTER KOBO +16EC5 ; BERIA ERFE SMALL LETTER LAKKO +16EC6 ; BERIA ERFE SMALL LETTER MERI +16EC7 ; BERIA ERFE SMALL LETTER NINI +16EC8 ; BERIA ERFE SMALL LETTER GNA +16EC9 ; BERIA ERFE SMALL LETTER NGAY +16ECA ; BERIA ERFE SMALL LETTER OI +16ECB ; BERIA ERFE SMALL LETTER PI +16ECC ; BERIA ERFE SMALL LETTER ERIGO +16ECD ; BERIA ERFE SMALL LETTER ERIGO TAMURA +16ECE ; BERIA ERFE SMALL LETTER SERI +16ECF ; BERIA ERFE SMALL LETTER SHEP +16ED0 ; BERIA ERFE SMALL LETTER TATASOUE +16ED1 ; BERIA ERFE SMALL LETTER UI +16ED2 ; BERIA ERFE SMALL LETTER WASSE +16ED3 ; BERIA ERFE SMALL LETTER AY 16F00 ; MIAO LETTER PA 16F01 ; MIAO LETTER BA 16F02 ; MIAO LETTER YI PA @@ -36951,6 +37210,11 @@ FFFD ; REPLACEMENT CHARACTER 16FE4 ; KHITAN SMALL SCRIPT FILLER 16FF0 ; VIETNAMESE ALTERNATE READING MARK CA 16FF1 ; VIETNAMESE ALTERNATE READING MARK NHAY +16FF2 ; CHINESE SMALL SIMPLIFIED ER +16FF3 ; CHINESE SMALL TRADITIONAL ER +16FF4 ; YANGQIN SIGN SLOW ONE BEAT +16FF5 ; YANGQIN SIGN SLOW THREE HALF BEATS +16FF6 ; YANGQIN SIGN SLOW TWO BEATS 17000..187FF ; TANGUT IDEOGRAPH-* 18800 ; TANGUT COMPONENT-001 18801 ; TANGUT COMPONENT-002 @@ -45369,6 +45633,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 155030 +# Total code points: 155294 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt index 8e48d0b85..c66993789 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt @@ -1,5 +1,5 @@ -# DerivedNumericType-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# DerivedNumericType-17.0.0.txt +# Date: 2024-11-15, 15:06:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -165,6 +165,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 12400..1246E ; Numeric # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 16B5B..16B61 ; Numeric # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS 16E80..16E96 ; Numeric # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16FF4..16FF6 ; Numeric # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 1D2C0..1D2D3 ; Numeric # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; Numeric # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D360..1D378 ; Numeric # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE @@ -192,7 +193,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 2626D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2626D 2F890 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 1114 +# Total code points: 1117 # ================================================ @@ -278,12 +279,14 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11DE0..11DE9 ; Decimal # Nd [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE 11F50..11F59 ; Decimal # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE 16130..16139 ; Decimal # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Decimal # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Decimal # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Decimal # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16D70..16D79 ; Decimal # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16DA0..16DA9 ; Decimal # Nd [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE 1CCF0..1CCF9 ; Decimal # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Decimal # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE @@ -293,6 +296,6 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 760 +# Total code points: 780 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt index ae1f99c0f..93c6b84e3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt @@ -1,5 +1,5 @@ -# DerivedNumericValues-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# DerivedNumericValues-17.0.0.txt +# Date: 2024-11-15, 15:06:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -110,12 +110,14 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 11C50 ; 0.0 ; ; 0 # Nd BHAIKSUKI DIGIT ZERO 11D50 ; 0.0 ; ; 0 # Nd MASARAM GONDI DIGIT ZERO 11DA0 ; 0.0 ; ; 0 # Nd GUNJALA GONDI DIGIT ZERO +11DE0 ; 0.0 ; ; 0 # Nd TOLONG SIKI DIGIT ZERO 11F50 ; 0.0 ; ; 0 # Nd KAWI DIGIT ZERO 16130 ; 0.0 ; ; 0 # Nd GURUNG KHEMA DIGIT ZERO 16A60 ; 0.0 ; ; 0 # Nd MRO DIGIT ZERO 16AC0 ; 0.0 ; ; 0 # Nd TANGSA DIGIT ZERO 16B50 ; 0.0 ; ; 0 # Nd PAHAWH HMONG DIGIT ZERO 16D70 ; 0.0 ; ; 0 # Nd KIRAT RAI DIGIT ZERO +16DA0 ; 0.0 ; ; 0 # Nd CHISOI DIGIT ZERO 16E80 ; 0.0 ; ; 0 # No MEDEFAIDRIN DIGIT ZERO 1CCF0 ; 0.0 ; ; 0 # Nd OUTLINED DIGIT ZERO 1D2C0 ; 0.0 ; ; 0 # No KAKTOVIK NUMERAL ZERO @@ -134,7 +136,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1F10B..1F10C ; 0.0 ; ; 0 # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO 1FBF0 ; 0.0 ; ; 0 # Nd SEGMENTED DIGIT ZERO -# Total code points: 96 +# Total code points: 98 # ================================================ @@ -530,6 +532,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 11C5A ; 1.0 ; ; 1 # No BHAIKSUKI NUMBER ONE 11D51 ; 1.0 ; ; 1 # Nd MASARAM GONDI DIGIT ONE 11DA1 ; 1.0 ; ; 1 # Nd GUNJALA GONDI DIGIT ONE +11DE1 ; 1.0 ; ; 1 # Nd TOLONG SIKI DIGIT ONE 11F51 ; 1.0 ; ; 1 # Nd KAWI DIGIT ONE 12415 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESH2 1241E ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESHU @@ -542,8 +545,10 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 16AC1 ; 1.0 ; ; 1 # Nd TANGSA DIGIT ONE 16B51 ; 1.0 ; ; 1 # Nd PAHAWH HMONG DIGIT ONE 16D71 ; 1.0 ; ; 1 # Nd KIRAT RAI DIGIT ONE +16DA1 ; 1.0 ; ; 1 # Nd CHISOI DIGIT ONE 16E81 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE 16E94 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE ALTERNATE FORM +16FF4 ; 1.0 ; ; 1 # Nl YANGQIN SIGN SLOW ONE BEAT 1CCF1 ; 1.0 ; ; 1 # Nd OUTLINED DIGIT ONE 1D2C1 ; 1.0 ; ; 1 # No KAKTOVIK NUMERAL ONE 1D2E1 ; 1.0 ; ; 1 # No MAYAN NUMERAL ONE @@ -569,13 +574,14 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 152 +# Total code points: 155 # ================================================ 0F2B ; 1.5 ; ; 3/2 # No TIBETAN DIGIT HALF TWO +16FF5 ; 1.5 ; ; 3/2 # Nl YANGQIN SIGN SLOW THREE HALF BEATS -# Total code points: 1 +# Total code points: 2 # ================================================ @@ -688,6 +694,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 11C5B ; 2.0 ; ; 2 # No BHAIKSUKI NUMBER TWO 11D52 ; 2.0 ; ; 2 # Nd MASARAM GONDI DIGIT TWO 11DA2 ; 2.0 ; ; 2 # Nd GUNJALA GONDI DIGIT TWO +11DE2 ; 2.0 ; ; 2 # Nd TOLONG SIKI DIGIT TWO 11F52 ; 2.0 ; ; 2 # Nd KAWI DIGIT TWO 12400 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO ASH 12416 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESH2 @@ -704,8 +711,10 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 16AC2 ; 2.0 ; ; 2 # Nd TANGSA DIGIT TWO 16B52 ; 2.0 ; ; 2 # Nd PAHAWH HMONG DIGIT TWO 16D72 ; 2.0 ; ; 2 # Nd KIRAT RAI DIGIT TWO +16DA2 ; 2.0 ; ; 2 # Nd CHISOI DIGIT TWO 16E82 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO 16E95 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO ALTERNATE FORM +16FF6 ; 2.0 ; ; 2 # Nl YANGQIN SIGN SLOW TWO BEATS 1CCF2 ; 2.0 ; ; 2 # Nd OUTLINED DIGIT TWO 1D2C2 ; 2.0 ; ; 2 # No KAKTOVIK NUMERAL TWO 1D2E2 ; 2.0 ; ; 2 # No MAYAN NUMERAL TWO @@ -731,7 +740,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 154 +# Total code points: 157 # ================================================ @@ -841,6 +850,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 11C5C ; 3.0 ; ; 3 # No BHAIKSUKI NUMBER THREE 11D53 ; 3.0 ; ; 3 # Nd MASARAM GONDI DIGIT THREE 11DA3 ; 3.0 ; ; 3 # Nd GUNJALA GONDI DIGIT THREE +11DE3 ; 3.0 ; ; 3 # Nd TOLONG SIKI DIGIT THREE 11F53 ; 3.0 ; ; 3 # Nd KAWI DIGIT THREE 12401 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE ASH 12408 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE DISH @@ -858,6 +868,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 16AC3 ; 3.0 ; ; 3 # Nd TANGSA DIGIT THREE 16B53 ; 3.0 ; ; 3 # Nd PAHAWH HMONG DIGIT THREE 16D73 ; 3.0 ; ; 3 # Nd KIRAT RAI DIGIT THREE +16DA3 ; 3.0 ; ; 3 # Nd CHISOI DIGIT THREE 16E83 ; 3.0 ; ; 3 # No MEDEFAIDRIN DIGIT THREE 16E96 ; 3.0 ; ; 3 # No MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 1CCF3 ; 3.0 ; ; 3 # Nd OUTLINED DIGIT THREE @@ -887,7 +898,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998 23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B -# Total code points: 152 +# Total code points: 154 # ================================================ @@ -991,6 +1002,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 11C5D ; 4.0 ; ; 4 # No BHAIKSUKI NUMBER FOUR 11D54 ; 4.0 ; ; 4 # Nd MASARAM GONDI DIGIT FOUR 11DA4 ; 4.0 ; ; 4 # Nd GUNJALA GONDI DIGIT FOUR +11DE4 ; 4.0 ; ; 4 # Nd TOLONG SIKI DIGIT FOUR 11F54 ; 4.0 ; ; 4 # Nd KAWI DIGIT FOUR 12402 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR ASH 12409 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR DISH @@ -1009,6 +1021,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 16AC4 ; 4.0 ; ; 4 # Nd TANGSA DIGIT FOUR 16B54 ; 4.0 ; ; 4 # Nd PAHAWH HMONG DIGIT FOUR 16D74 ; 4.0 ; ; 4 # Nd KIRAT RAI DIGIT FOUR +16DA4 ; 4.0 ; ; 4 # Nd CHISOI DIGIT FOUR 16E84 ; 4.0 ; ; 4 # No MEDEFAIDRIN DIGIT FOUR 1CCF4 ; 4.0 ; ; 4 # Nd OUTLINED DIGIT FOUR 1D2C4 ; 4.0 ; ; 4 # No KAKTOVIK NUMERAL FOUR @@ -1036,7 +1049,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2 2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D -# Total code points: 143 +# Total code points: 145 # ================================================ @@ -1144,6 +1157,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 11C5E ; 5.0 ; ; 5 # No BHAIKSUKI NUMBER FIVE 11D55 ; 5.0 ; ; 5 # Nd MASARAM GONDI DIGIT FIVE 11DA5 ; 5.0 ; ; 5 # Nd GUNJALA GONDI DIGIT FIVE +11DE5 ; 5.0 ; ; 5 # Nd TOLONG SIKI DIGIT FIVE 11F55 ; 5.0 ; ; 5 # Nd KAWI DIGIT FIVE 12403 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE ASH 1240A ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE DISH @@ -1161,6 +1175,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 16AC5 ; 5.0 ; ; 5 # Nd TANGSA DIGIT FIVE 16B55 ; 5.0 ; ; 5 # Nd PAHAWH HMONG DIGIT FIVE 16D75 ; 5.0 ; ; 5 # Nd KIRAT RAI DIGIT FIVE +16DA5 ; 5.0 ; ; 5 # Nd CHISOI DIGIT FIVE 16E85 ; 5.0 ; ; 5 # No MEDEFAIDRIN DIGIT FIVE 1CCF5 ; 5.0 ; ; 5 # Nd OUTLINED DIGIT FIVE 1D2C5 ; 5.0 ; ; 5 # No KAKTOVIK NUMERAL FIVE @@ -1187,7 +1202,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1FBF5 ; 5.0 ; ; 5 # Nd SEGMENTED DIGIT FIVE 20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121 -# Total code points: 141 +# Total code points: 143 # ================================================ @@ -1285,6 +1300,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 11C5F ; 6.0 ; ; 6 # No BHAIKSUKI NUMBER SIX 11D56 ; 6.0 ; ; 6 # Nd MASARAM GONDI DIGIT SIX 11DA6 ; 6.0 ; ; 6 # Nd GUNJALA GONDI DIGIT SIX +11DE6 ; 6.0 ; ; 6 # Nd TOLONG SIKI DIGIT SIX 11F56 ; 6.0 ; ; 6 # Nd KAWI DIGIT SIX 12404 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX ASH 1240B ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX DISH @@ -1299,6 +1315,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 16AC6 ; 6.0 ; ; 6 # Nd TANGSA DIGIT SIX 16B56 ; 6.0 ; ; 6 # Nd PAHAWH HMONG DIGIT SIX 16D76 ; 6.0 ; ; 6 # Nd KIRAT RAI DIGIT SIX +16DA6 ; 6.0 ; ; 6 # Nd CHISOI DIGIT SIX 16E86 ; 6.0 ; ; 6 # No MEDEFAIDRIN DIGIT SIX 1CCF6 ; 6.0 ; ; 6 # Nd OUTLINED DIGIT SIX 1D2C6 ; 6.0 ; ; 6 # No KAKTOVIK NUMERAL SIX @@ -1323,7 +1340,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1FBF6 ; 6.0 ; ; 6 # Nd SEGMENTED DIGIT SIX 20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA -# Total code points: 125 +# Total code points: 127 # ================================================ @@ -1420,6 +1437,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 11C60 ; 7.0 ; ; 7 # No BHAIKSUKI NUMBER SEVEN 11D57 ; 7.0 ; ; 7 # Nd MASARAM GONDI DIGIT SEVEN 11DA7 ; 7.0 ; ; 7 # Nd GUNJALA GONDI DIGIT SEVEN +11DE7 ; 7.0 ; ; 7 # Nd TOLONG SIKI DIGIT SEVEN 11F57 ; 7.0 ; ; 7 # Nd KAWI DIGIT SEVEN 12405 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN ASH 1240C ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN DISH @@ -1433,6 +1451,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 16AC7 ; 7.0 ; ; 7 # Nd TANGSA DIGIT SEVEN 16B57 ; 7.0 ; ; 7 # Nd PAHAWH HMONG DIGIT SEVEN 16D77 ; 7.0 ; ; 7 # Nd KIRAT RAI DIGIT SEVEN +16DA7 ; 7.0 ; ; 7 # Nd CHISOI DIGIT SEVEN 16E87 ; 7.0 ; ; 7 # No MEDEFAIDRIN DIGIT SEVEN 1CCF7 ; 7.0 ; ; 7 # Nd OUTLINED DIGIT SEVEN 1D2C7 ; 7.0 ; ; 7 # No KAKTOVIK NUMERAL SEVEN @@ -1457,7 +1476,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1FBF7 ; 7.0 ; ; 7 # Nd SEGMENTED DIGIT SEVEN 20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001 -# Total code points: 125 +# Total code points: 127 # ================================================ @@ -1551,6 +1570,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 11C61 ; 8.0 ; ; 8 # No BHAIKSUKI NUMBER EIGHT 11D58 ; 8.0 ; ; 8 # Nd MASARAM GONDI DIGIT EIGHT 11DA8 ; 8.0 ; ; 8 # Nd GUNJALA GONDI DIGIT EIGHT +11DE8 ; 8.0 ; ; 8 # Nd TOLONG SIKI DIGIT EIGHT 11F58 ; 8.0 ; ; 8 # Nd KAWI DIGIT EIGHT 12406 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT ASH 1240D ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT DISH @@ -1564,6 +1584,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 16AC8 ; 8.0 ; ; 8 # Nd TANGSA DIGIT EIGHT 16B58 ; 8.0 ; ; 8 # Nd PAHAWH HMONG DIGIT EIGHT 16D78 ; 8.0 ; ; 8 # Nd KIRAT RAI DIGIT EIGHT +16DA8 ; 8.0 ; ; 8 # Nd CHISOI DIGIT EIGHT 16E88 ; 8.0 ; ; 8 # No MEDEFAIDRIN DIGIT EIGHT 1CCF8 ; 8.0 ; ; 8 # Nd OUTLINED DIGIT EIGHT 1D2C8 ; 8.0 ; ; 8 # No KAKTOVIK NUMERAL EIGHT @@ -1587,7 +1608,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA 1FBF8 ; 8.0 ; ; 8 # Nd SEGMENTED DIGIT EIGHT -# Total code points: 120 +# Total code points: 122 # ================================================ @@ -1684,6 +1705,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 11C62 ; 9.0 ; ; 9 # No BHAIKSUKI NUMBER NINE 11D59 ; 9.0 ; ; 9 # Nd MASARAM GONDI DIGIT NINE 11DA9 ; 9.0 ; ; 9 # Nd GUNJALA GONDI DIGIT NINE +11DE9 ; 9.0 ; ; 9 # Nd TOLONG SIKI DIGIT NINE 11F59 ; 9.0 ; ; 9 # Nd KAWI DIGIT NINE 12407 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE ASH 1240E ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE DISH @@ -1697,6 +1719,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 16AC9 ; 9.0 ; ; 9 # Nd TANGSA DIGIT NINE 16B59 ; 9.0 ; ; 9 # Nd PAHAWH HMONG DIGIT NINE 16D79 ; 9.0 ; ; 9 # Nd KIRAT RAI DIGIT NINE +16DA9 ; 9.0 ; ; 9 # Nd CHISOI DIGIT NINE 16E89 ; 9.0 ; ; 9 # No MEDEFAIDRIN DIGIT NINE 1CCF9 ; 9.0 ; ; 9 # Nd OUTLINED DIGIT NINE 1D2C9 ; 9.0 ; ; 9 # No KAKTOVIK NUMERAL NINE @@ -1721,7 +1744,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1FBF9 ; 9.0 ; ; 9 # Nd SEGMENTED DIGIT NINE 2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 126 +# Total code points: 128 # ================================================ diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java b/unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java index e2e9c1708..9b8019062 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java +++ b/unicodetools/src/main/java/org/unicode/props/PropertyUtilities.java @@ -31,6 +31,9 @@ public String merge(String first, String second) { static final > M putNew(M map, K key, V value) { final V oldValue = map.get(key); if (oldValue != null) { + if (oldValue.equals(value)) { + return map; + } throw new UnicodePropertyException( "Key already present in Map: " + key @@ -48,6 +51,9 @@ static final UnicodeMap putNew( final V oldValue = map.get(key); if (oldValue != null && (missingSet == null || !missingSet.contains(key))) { if (merger == null) { + if (oldValue.equals(value)) { + return map; + } throw new UnicodePropertyException( "Key already present in UnicodeMap: " + Utility.hex(key) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 21e74b88e..4ec9928b4 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -204,6 +204,7 @@ public enum Block_Values implements Named { Bassa_Vah("Bassa_Vah"), Batak("Batak"), Bengali("Bengali"), + Beria_Erfe("Beria_Erfe"), Bhaiksuki("Bhaiksuki"), Block_Elements("Block_Elements"), Bopomofo("Bopomofo"), @@ -221,6 +222,7 @@ public enum Block_Values implements Named { Cherokee("Cherokee"), Cherokee_Supplement("Cherokee_Sup"), Chess_Symbols("Chess_Symbols"), + Chisoi("Chisoi"), Chorasmian("Chorasmian"), CJK_Unified_Ideographs("CJK"), CJK_Compatibility("CJK_Compat"), @@ -445,9 +447,11 @@ public enum Block_Values implements Named { Samaritan("Samaritan"), Saurashtra("Saurashtra"), Sharada("Sharada"), + Sharada_Supplement("Sharada_Sup"), Shavian("Shavian"), Shorthand_Format_Controls("Shorthand_Format_Controls"), Siddham("Siddham"), + Sidetic("Sidetic"), Sinhala("Sinhala"), Sinhala_Archaic_Numbers("Sinhala_Archaic_Numbers"), Small_Form_Variants("Small_Forms"), @@ -497,6 +501,7 @@ public enum Block_Values implements Named { Tifinagh("Tifinagh"), Tirhuta("Tirhuta"), Todhri("Todhri"), + Tolong_Siki("Tolong_Siki"), Toto("Toto"), Transport_And_Map_Symbols("Transport_And_Map"), Tulu_Tigalari("Tulu_Tigalari"), @@ -1351,6 +1356,7 @@ public enum Joining_Group_Values implements Named { Teh_Marbuta("Teh_Marbuta"), Teh_Marbuta_Goal("Teh_Marbuta_Goal", "Hamza_On_Heh_Goal"), Teth("Teth"), + Thin_Noon("Thin_Noon"), Thin_Yeh("Thin_Yeh"), Vertical_Tail("Vertical_Tail"), Waw("Waw"), @@ -1862,6 +1868,7 @@ public enum Script_Values implements Named { Carian("Cari"), Cham("Cham"), Cherokee("Cher"), + Chisoi("Chis"), Chorasmian("Chrs"), Coptic("Copt", "Qaac"), Cypro_Minoan("Cpmn"), @@ -1960,6 +1967,7 @@ public enum Script_Values implements Named { Phoenician("Phnx"), Miao("Plrd"), Inscriptional_Parthian("Prti"), + Beria_Erfe("Qaba"), Rejang("Rjng"), Hanifi_Rohingya("Rohg"), Runic("Runr"), @@ -1970,6 +1978,7 @@ public enum Script_Values implements Named { Shavian("Shaw"), Sharada("Shrd"), Siddham("Sidd"), + Sidetic("Sidt"), Khudawadi("Sind"), Sinhala("Sinh"), Sogdian("Sogd"), @@ -1996,6 +2005,7 @@ public enum Script_Values implements Named { Tirhuta("Tirh"), Tangsa("Tnsa"), Todhri("Todr"), + Tolong_Siki("Tols"), Toto("Toto"), Tulu_Tigalari("Tutg"), Ugaritic("Ugar"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateBreakTest.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateBreakTest.java index ff506b4ee..597862b41 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateBreakTest.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateBreakTest.java @@ -855,23 +855,14 @@ public void generateTable(PrintWriter out) { } out.print( "." - + "
  • Any “treat as” or “ignore” rules are handled as discussed in UAX #" - + (fileName.equals("Line") ? "14" : "29") - + ", and thus reflected in a transformation of the rules usually not visible here. "); - if (fileName.equals("Line")) { - out.print( - "Where it does show up, an extra variable like CM+ may appear, and the rule may be recast. "); - } - out.print( - "In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  • "); + + "
  • Final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.
  • "); if (fileName.equals("Line")) { out.print( "Where a rule has multiple parts (lines), each one is numbered using hundredths, " + "such as 21.01) × BA, 21.02) × HY, ... "); } out.println( - "In some cases, the numbering and form of a rule is changed due to “treat as” rules.
  • " - + "" + "" + "

    For the original rules" + (fileName.equals("Word") || fileName.equals("Sentence") ? " and the macro values they use" @@ -926,6 +917,9 @@ public void generateTable(PrintWriter out) { if (breakPoint < 0) { breakPoint = ruleBody.indexOf('÷'); } + if (breakPoint < 0) { + breakPoint = ruleBody.indexOf('→'); + } out.println( "

    0.2sot ÷
    0.3÷ eot
    " + linkAndAnchor("r" + ruleNumber, ruleNumber) @@ -1399,7 +1393,10 @@ public GenerateGraphemeBreakTest(UCD ucd, Segmenter.Target target) { "क" + "\u094D" + "a", "a" + "\u094D" + "त", "?" + "\u094D" + "त", - "क" + "\u094D\u094D" + "त")); + "क" + "\u094D\u094D" + "त", + // From L2/14-131, §3.2; made into a single EGC by 179-C31. + // This test would have caught ICU-22956. + "સૻ્સૻ")); } } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index c1dc65f44..22a7b0f12 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1309,9 +1309,8 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // Unicode 12 added TANGUT IDEOGRAPH-187F2..TANGUT IDEOGRAPH-187F7. return TANGUT_BASE; } - // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. - if (ch <= 0x187FF && rCompositeVersion >= 0x100000) { - // Unicode [..] added TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF. + if (ch <= 0x187FF && rCompositeVersion >= 0x110000) { + // Unicode 17 added TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF. return TANGUT_BASE; } } @@ -1326,14 +1325,12 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { if (ch <= 0x18D08) { return TANGUT_SUP_BASE; // 18D00..18D08 Tangut Ideograph Supplement } - // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x100000. - if (ch <= 0x18D1C && rCompositeVersion >= 0x100000) { - // Unicode [..] added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. + if (ch <= 0x18D1C && rCompositeVersion >= 0x110000) { + // Unicode 17 added TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1C. return TANGUT_SUP_BASE; } - // REMOVE BEFORE FLIGHT: The actual version will be greater than 0x110000. if (ch <= 0x18D1E && rCompositeVersion >= 0x110000) { - // Unicode [..] added TANGUT IDEOGRAPH-18D1D..TANGUT IDEOGRAPH-18D1E. + // Unicode 17 added TANGUT IDEOGRAPH-18D1D..TANGUT IDEOGRAPH-18D1E. return TANGUT_SUP_BASE; } } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java index 9075daea6..100273ad9 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java @@ -1227,7 +1227,9 @@ public final class UCD_Names implements UCD_Types { "THIN_YEH", "VERTICAL_TAIL", // Unicode 16 - "KASHMIRI_YEH" + "KASHMIRI_YEH", + // Unicode n > 16 + "THIN_NOON", }; static { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index 70c9ea186..e486c0993 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -677,118 +677,7 @@ public interface UCD_Types { JT_T = 5, LIMIT_JOINING_TYPE = 6; public static short NO_SHAPING = 0, - AIN = 1, - ALAPH = 2, - ALEF = 3, - BEH = 4, - BETH = 5, - DAL = 6, - DALATH_RISH = 7, - E = 8, - FEH = 9, - FINAL_SEMKATH = 10, - GAF = 11, - GAMAL = 12, - HAH = 13, - TEH_MARBUTA_GOAL = 14, - HE = 15, - HEH = 16, - HEH_GOAL = 17, - HETH = 18, - KAF = 19, - KAPH = 20, - KNOTTED_HEH = 21, - LAM = 22, - LAMADH = 23, - MEEM = 24, - MIM = 25, - NOON = 26, - NUN = 27, - PE = 28, - QAF = 29, - QAPH = 30, - REH = 31, - REVERSED_PE = 32, - SAD = 33, - SADHE = 34, - SEEN = 35, - SEMKATH = 36, - SHIN = 37, - SWASH_KAF = 38, - TAH = 39, - TAW = 40, - TEH_MARBUTA = 41, - TETH = 42, - WAW = 43, - SYRIAC_WAW = 44, - YEH = 45, - YEH_BARREE = 46, - YEH_WITH_TAIL = 47, - YUDH = 48, - YUDH_HE = 49, - ZAIN = 50, - ZHAIN = 51, - KHAPH = 52, - FE = 53, - BURUSHASKI_YEH_BARREE = 54, - FARSI_YEH = 55, - NYA = 56, - ROHINGYA_YEH = 57, - HAMZAH_ON_HA_GOAL = 58, - STRAIGHT_WAW = 59, - MANICHAEAN_ALEPH = 60, - MANICHAEAN_AYIN = 61, - MANICHAEAN_BETH = 62, - MANICHAEAN_DALETH = 63, - MANICHAEAN_DHAMEDH = 64, - MANICHAEAN_FIVE = 65, - MANICHAEAN_GIMEL = 66, - MANICHAEAN_HETH = 67, - MANICHAEAN_HUNDRED = 68, - MANICHAEAN_KAPH = 69, - MANICHAEAN_LAMEDH = 70, - MANICHAEAN_MEM = 71, - MANICHAEAN_NUN = 72, - MANICHAEAN_ONE = 73, - MANICHAEAN_PE = 74, - MANICHAEAN_QOPH = 75, - MANICHAEAN_RESH = 76, - MANICHAEAN_SADHE = 77, - MANICHAEAN_SAMEKH = 78, - MANICHAEAN_TAW = 79, - MANICHAEAN_TEN = 80, - MANICHAEAN_TETH = 81, - MANICHAEAN_THAMEDH = 82, - MANICHAEAN_TWENTY = 83, - MANICHAEAN_WAW = 84, - MANICHAEAN_YODH = 85, - MANICHAEAN_ZAYIN = 86, - // Unicode 9: - AFRICAN_FEH = 87, - AFRICAN_QAF = 88, - AFRICAN_NOON = 89, - // Unicode 10: - MALAYALAM_NGA = 90, - MALAYALAM_JA = 91, - MALAYALAM_NYA = 92, - MALAYALAM_TTA = 93, - MALAYALAM_NNA = 94, - MALAYALAM_NNNA = 95, - MALAYALAM_BHA = 96, - MALAYALAM_RA = 97, - MALAYALAM_LLA = 98, - MALAYALAM_LLLA = 99, - MALAYALAM_SSA = 100, - // Unicode 11, non-singletons: - Hanifi_Rohingya_Pa = 101, - Hanifi_Rohingya_Kinna_Ya = 102, - // Unicode 14 - THIN_YEH = 103, - VERTICAL_TAIL = 104, - // Unicode 16 - KASHMIRI_YEH = 105, - // limit - LIMIT_JOINING_GROUP = KASHMIRI_YEH + 1; + LIMIT_JOINING_GROUP = (short) UCD_Names.JOINING_GROUP.length; static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3; public static final int NF_COMPATIBILITY_MASK = 2, NF_COMPOSITION_MASK = 1; diff --git a/unicodetools/src/main/java/org/unicode/tools/Segmenter.java b/unicodetools/src/main/java/org/unicode/tools/Segmenter.java index f630b199e..2b3644b85 100644 --- a/unicodetools/src/main/java/org/unicode/tools/Segmenter.java +++ b/unicodetools/src/main/java/org/unicode/tools/Segmenter.java @@ -20,8 +20,6 @@ import com.ibm.icu.text.UnicodeSet.XSymbolTable; import com.ibm.icu.text.UnicodeSetIterator; import com.ibm.icu.util.ULocale; -import java.io.IOException; -import java.io.PrintWriter; import java.text.ParsePosition; import java.util.ArrayList; import java.util.Collection; @@ -32,6 +30,7 @@ import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -39,10 +38,7 @@ import org.unicode.cldr.util.RegexUtilities; import org.unicode.cldr.util.TransliteratorUtilities; import org.unicode.props.UnicodeProperty; -import org.unicode.text.UCD.Default; -import org.unicode.text.UCD.ToolUnicodePropertySource; -import org.unicode.text.utility.Settings; -import org.unicode.tools.Segmenter.Rule.Breaks; +import org.unicode.tools.Segmenter.SegmentationRule.Breaks; /** Ordered list of rules, with variables resolved before building. Use Builder to make. */ public class Segmenter { @@ -170,16 +166,26 @@ public boolean breaksAt(CharSequence text, int position) { breakRule = NOBREAK_SUPPLEMENTARY; return false; } + StringBuilder remapped = new StringBuilder(text.toString()); + Consumer remap = + (s) -> { + remapped.setLength(0); + remapped.append(s); + }; + Integer[] indexInRemapped = new Integer[text.length() + 1]; + for (int i = 0; i < indexInRemapped.length; ++i) { + indexInRemapped[i] = i; + } for (int i = 0; i < rules.size(); ++i) { - Rule rule = rules.get(i); + SegmentationRule rule = rules.get(i); if (DEBUG_AT_RULE_CONTAINING != null && rule.toString().contains(DEBUG_AT_RULE_CONTAINING)) { System.out.println(" !#$@543 Debug"); } - Breaks result = rule.matches(text, position); - if (result != Rule.Breaks.UNKNOWN_BREAK) { + Breaks result = rule.applyAt(position, remapped, indexInRemapped, remap); + if (result != SegmentationRule.Breaks.UNKNOWN_BREAK) { breakRule = orders.get(i).doubleValue(); - return result == Rule.Breaks.BREAK; + return result == SegmentationRule.Breaks.BREAK; } } breakRule = BREAK_ANY; @@ -197,12 +203,12 @@ public int getRuleStatusVec(int[] ruleStatus) { * @param order * @param rule */ - public void add(double order, Rule rule) { + public void add(double order, SegmentationRule rule) { orders.add(new Double(order)); rules.add(rule); } - public Rule get(double order) { + public SegmentationRule get(double order) { int loc = orders.indexOf(new Double(order)); if (loc < 0) return null; return rules.get(loc); @@ -231,8 +237,7 @@ public String toString(boolean showResolved) { return result; } - /** A rule that determines the status of an offset. */ - public static class Rule { + public abstract static class SegmentationRule { /** Status of a breaking rule */ public enum Breaks { UNKNOWN_BREAK, @@ -240,19 +245,151 @@ public enum Breaks { NO_BREAK }; + /** + * Applies this rule throughout the text. + * + * @param remappedString The text, with any preceding remappings applied. + * @param indexInRemapped An array whose size is one greater than the original string. + * Associates indices in the original string to indices in remappedString. + * indexInRemapped[0] == 0, and indexInRemapped[indexInRemapped.size() - 1] == + * remappedString.size(). Whenever indexInRemapped[i] == null, resolvedBreaks[i] == + * NO_BREAK: this corresponds to positions inside a string which has been replaced by a + * remap rule. Remap rules may update this mapping. + * @param resolvedBreaks An array whose size is one greater than the original string, + * indicating resolved breaks in the string. Values that are UNKNOWN_BREAK are updated + * if the rule applies to their position. + * @param remap Called by remap rules with the value of remappedString to be passed to + * subsequent rules. The indices in indexInRemapped are updated consistently. + */ + public abstract void apply( + CharSequence remappedString, + Integer[] indexInRemapped, + Breaks[] resolvedBreaks, + Consumer remap); + + protected abstract String toString(boolean showResolved); + + /** Same as above, but only returns the resolution at the current position. */ + public abstract Breaks applyAt( + int position, + CharSequence remappedString, + Integer[] indexInRemapped, + Consumer remap); + + public String toString() { + return toString(false); + } + } + + /** A « treat as » rule. */ + public static class RemapRule extends SegmentationRule { + + public RemapRule(String leftHandSide, String replacement, String line) { + pattern = Pattern.compile(leftHandSide, REGEX_FLAGS); + this.replacement = replacement; + name = line; + } + + @Override + public void apply( + CharSequence remappedString, + Integer[] indexInRemapped, + Breaks[] resolvedBreaks, + Consumer remap) { + final var result = new StringBuilder(); + int i = 0; + int offset = 0; + final var matcher = pattern.matcher(remappedString); + while (matcher.find()) { + for (; ; ++i) { + if (indexInRemapped[i] == null) { + continue; + } + if (indexInRemapped[i] > matcher.start()) { + break; + } + indexInRemapped[i] += offset; + } + for (; ; ++i) { + if (indexInRemapped[i] == null) { + continue; + } + if (indexInRemapped[i] == matcher.end()) { + break; + } + if (resolvedBreaks[i] == Breaks.BREAK) { + throw new IllegalArgumentException( + "Replacement rule at remapped indices " + + matcher.start() + + " sqq. spans a break: " + + remappedString); + } + resolvedBreaks[i] = Breaks.NO_BREAK; + indexInRemapped[i] = null; + } + matcher.appendReplacement(result, replacement); + offset = result.length() - indexInRemapped[i]; + } + for (; i < indexInRemapped.length; ++i) { + if (indexInRemapped[i] == null) { + continue; + } + indexInRemapped[i] += offset; + } + matcher.appendTail(result); + if (indexInRemapped[indexInRemapped.length - 1] != result.length()) { + StringBuilder indices = new StringBuilder(); + for (var j : indexInRemapped) { + indices.append(j == null ? "null" : j.toString()); + indices.append(","); + } + throw new IllegalArgumentException( + "Inconsistent indexInRemapped " + + indices + + " for new remapped string " + + result); + } + remap.accept(result); + } + + private Pattern pattern; + private String replacement; + private String name; + + @Override + public Breaks applyAt( + int position, + CharSequence remappedString, + Integer[] indexInRemapped, + Consumer remap) { + var resolvedBreaks = new Breaks[indexInRemapped.length]; + apply(remappedString, indexInRemapped, resolvedBreaks, remap); + return resolvedBreaks[position] == null + ? Breaks.UNKNOWN_BREAK + : resolvedBreaks[position]; + } + + @Override + protected String toString(boolean showResolved) { + return name; + } + } + + /** A rule that determines the status of an offset. */ + public static class RegexRule extends SegmentationRule { /** * @param before pattern for the text after the offset. All variables must be resolved. * @param result the break status to return when the rule is invoked * @param after pattern for the text before the offset. All variables must be resolved. * @param line */ - public Rule(String before, Breaks result, String after, String line) { + public RegexRule(String before, Breaks result, String after, String line) { breaks = result; before = ".*(" + before + ")"; String parsing = null; try { - matchPrevious = Pattern.compile(parsing = before, REGEX_FLAGS).matcher(""); - matchSucceeding = Pattern.compile(parsing = after, REGEX_FLAGS).matcher(""); + this.before = Pattern.compile(parsing = before, REGEX_FLAGS); + this.after = Pattern.compile(parsing = after, REGEX_FLAGS); } catch (PatternSyntaxException e) { // Format: Unclosed character class near index 927 int index = e.getIndex(); @@ -279,26 +416,37 @@ public Rule(String before, Breaks result, String after, String line) { // COMMENTS allows whitespace } - // Matcher numberMatcher = PatternCache.get("[0-9]+").matcher(""); - - /** - * Match the rule against text, at a position - * - * @param text - * @param position - * @return break status - */ - public Breaks matches(CharSequence text, int position) { - if (!matchAfter(matchSucceeding, text, position)) return Breaks.UNKNOWN_BREAK; - if (!matchBefore(matchPrevious, text, position)) return Breaks.UNKNOWN_BREAK; - return breaks; + @Override + public void apply( + CharSequence remappedString, + Integer[] indexInRemapped, + Breaks[] resolvedBreaks, + Consumer remap) { + for (int i = 0; i < indexInRemapped.length; ++i) { + if (resolvedBreaks[i] == Breaks.UNKNOWN_BREAK) { + resolvedBreaks[i] = applyAt(i, remappedString, indexInRemapped, remap); + } + } } - /** Debugging aid */ - public String toString() { - return toString(false); + @Override + public Breaks applyAt( + int position, + CharSequence remappedString, + Integer[] indexInRemapped, + Consumer remap) { + if (after.matcher(remappedString) + .region(indexInRemapped[position], remappedString.length()) + .lookingAt() + && before.matcher(remappedString) + .region(0, indexInRemapped[position]) + .matches()) { + return breaks; + } + return Breaks.UNKNOWN_BREAK; } + @Override public String toString(boolean showResolved) { String result = name; if (showResolved) result += ": " + resolved; @@ -306,29 +454,16 @@ public String toString(boolean showResolved) { } // ============== Internals ================ - // in Java 5, this can be more efficient, and use a single regex - // of the form "(?<= before) after". MUST then have transparent bounds - private Matcher matchPrevious; - private Matcher matchSucceeding; + // We cannot use a single regex of the form "(?<= before) after" because + // (RI RI)* RI × RI would require unbounded lookbehind. + private Pattern before; + private Pattern after; private String name; private String resolved; private Breaks breaks; } - /** utility, since we are using Java 1.4 */ - static boolean matchAfter(Matcher matcher, CharSequence text, int position) { - return matcher.reset(text.subSequence(position, text.length())).lookingAt(); - } - - /** - * utility, since we are using Java 1.4 depends on the pattern having been built with .* not - * very efficient, works for testing and the best we can do. - */ - static boolean matchBefore(Matcher matcher, CharSequence text, int position) { - return matcher.reset(text.subSequence(0, position)).matches(); - } - /** Separate the builder for clarity */ /** Sort the longest strings first. Used for variable lists. */ @@ -474,17 +609,26 @@ public boolean addLine(String line) { throw new IllegalArgumentException("Rule must be of form '1)...': <" + line + ">"); } line = line.substring(relationPosition + 1).trim(); + relationPosition = line.indexOf('→'); + if (relationPosition >= 0) { + addRemapRule( + order, + line.substring(0, relationPosition).trim(), + line.substring(relationPosition + 1).trim(), + line); + return true; + } relationPosition = line.indexOf('\u00F7'); - Breaks breaks = Segmenter.Rule.Breaks.BREAK; + Breaks breaks = Segmenter.RegexRule.Breaks.BREAK; if (relationPosition < 0) { relationPosition = line.indexOf('\u00D7'); if (relationPosition < 0) { throw new IllegalArgumentException( "Couldn't find =, \u00F7, or \u00D7 on line: " + line); } - breaks = Segmenter.Rule.Breaks.NO_BREAK; + breaks = Segmenter.RegexRule.Breaks.NO_BREAK; } - addRule( + addRegexRule( order, line.substring(0, relationPosition).trim(), breaks, @@ -591,6 +735,40 @@ public static UnicodeMap composeWith( return target; } + Builder addRemapRule(Double order, String before, String after, String line) { + line = whiteSpace.reset(line).replaceAll(" "); + if (lastComments.size() != 0) { + double increment = 0.0001; + double temp = order.doubleValue() - increment * lastComments.size(); + for (int i = 0; i < lastComments.size(); ++i) { + Double position = new Double(temp); + if (xmlRules.containsKey(position)) { + System.out.println("WARNING: Overriding rule " + position); + } + xmlRules.put(position, lastComments.get(i)); + temp += increment; + } + lastComments.clear(); + } + if (htmlRules.containsKey(order) + || xmlRules.containsKey(order) + || rules.containsKey(order)) { + throw new IllegalArgumentException("Duplicate numbers for rules: " + order); + } + htmlRules.put(order, TransliteratorUtilities.toHTML.transliterate(line)); + xmlRules.put( + order, + " " + + TransliteratorUtilities.toXML.transliterate(line) + + " "); + rules.put(order, new Segmenter.RemapRule(replaceVariables(before), after, line)); + return this; + } + /** * Add a numbered rule, already broken into the parts before and after. * @@ -601,7 +779,8 @@ public static UnicodeMap composeWith( * @param line * @return */ - Builder addRule(Double order, String before, Breaks breaks, String after, String line) { + Builder addRegexRule( + Double order, String before, Breaks breaks, String after, String line) { // if (brokenIdentifierMatcher.reset(line).find()) { // int start = brokenIdentifierMatcher.start(); // int end = brokenIdentifierMatcher.end(); @@ -646,7 +825,7 @@ Builder addRule(Double order, String before, Breaks breaks, String after, String } rules.put( order, - new Segmenter.Rule( + new Segmenter.RegexRule( replaceVariables(before), breaks, replaceVariables(after), line)); return this; } @@ -671,9 +850,9 @@ public Segmenter make() { // longest first, to // make substitution // easy - private Map rules = new TreeMap(); + private Map rules = new TreeMap(); - public Map getProcessedRules() { + public Map getProcessedRules() { return rules; } @@ -813,532 +992,11 @@ public Map getOriginalVariables() { // ============== Internals ================ - private List rules = new ArrayList(1); + private List rules = new ArrayList(1); private List orders = new ArrayList(1); private double breakRule; public UnicodeMap getSamples() { return samples; } - - // TODO: delete? move elsewhere? - // Only used in main() to write to some files. Out of sync with SegmenterDefault.txt. - private static final String[][] cannedRules = { - { - "GraphemeClusterBreak", - "$CR=\\p{Grapheme_Cluster_Break=CR}", - "$LF=\\p{Grapheme_Cluster_Break=LF}", - "$Control=\\p{Grapheme_Cluster_Break=Control}", - "$Extend=\\p{Grapheme_Cluster_Break=Extend}", - "$ZWJ=\\p{Grapheme_Cluster_Break=ZWJ}", - "$RI=\\p{Grapheme_Cluster_Break=Regional_Indicator}", - "$Prepend=\\p{Grapheme_Cluster_Break=Prepend}", - "$SpacingMark=\\p{Grapheme_Cluster_Break=SpacingMark}", - "$L=\\p{Grapheme_Cluster_Break=L}", - "$V=\\p{Grapheme_Cluster_Break=V}", - "$T=\\p{Grapheme_Cluster_Break=T}", - "$LV=\\p{Grapheme_Cluster_Break=LV}", - "$LVT=\\p{Grapheme_Cluster_Break=LVT}", - "$Virama=[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&\\p{Indic_Syllabic_Category=Virama}]", - "$LinkingConsonant=[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&\\p{Indic_Syllabic_Category=Consonant}]", - - // "$E_Base=\\p{Grapheme_Cluster_Break=E_Base}", - // "$E_Modifier=\\p{Grapheme_Cluster_Break=E_Modifier}", - - "$ExtPict=\\p{Extended_Pictographic}", - "$ExtCccZwj=[[$Extend-\\p{ccc=0}] $ZWJ]", - // "$EBG=\\p{Grapheme_Cluster_Break=E_Base_GAZ}", - // "$Glue_After_Zwj=\\p{Grapheme_Cluster_Break=Glue_After_Zwj}", - - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break between a CR and LF. Otherwise, break before and after controls.", - "3) $CR \u00D7 $LF", - "4) ( $Control | $CR | $LF ) \u00F7", - "5) \u00F7 ( $Control | $CR | $LF )", - "# Do not break Hangul syllable sequences.", - "6) $L \u00D7 ( $L | $V | $LV | $LVT )", - "7) ( $LV | $V ) \u00D7 ( $V | $T )", - "8) ( $LVT | $T) \u00D7 $T", - "# Do not break before extending characters or ZWJ.", - // "9) \u00D7 ($Extend | $ZWJ | $Virama)", - "9) \u00D7 ($Extend | $ZWJ)", - "# Only for extended grapheme clusters: Do not break before SpacingMarks, or after Prepend characters.", - "9.1) \u00D7 $SpacingMark", - "9.2) $Prepend \u00D7", - "9.3) $LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* \u00D7 $LinkingConsonant", - "# Do not break within emoji modifier sequences or emoji zwj sequences.", - // "10) $E_Base $Extend* × $E_Modifier", - "11) $ExtPict $Extend* $ZWJ × $ExtPict", - "# Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.", - "12) ^ ($RI $RI)* $RI × $RI", - "13) [^$RI] ($RI $RI)* $RI × $RI", - "# Otherwise, break everywhere.", - }, - { - "LineBreak", - "# Variables", - "$AI=\\p{Line_Break=Ambiguous}", - "$AL=\\p{Line_Break=Alphabetic}", - "$B2=\\p{Line_Break=Break_Both}", - "$BA=\\p{Line_Break=Break_After}", - "$BB=\\p{Line_Break=Break_Before}", - "$BK=\\p{Line_Break=Mandatory_Break}", - "$CB=\\p{Line_Break=Contingent_Break}", - "$CL=\\p{Line_Break=Close_Punctuation}", - "$CP=\\p{Line_Break=CP}", - "$CM1=\\p{Line_Break=Combining_Mark}", - "$CR=\\p{Line_Break=Carriage_Return}", - "$EX=\\p{Line_Break=Exclamation}", - "$GL=\\p{Line_Break=Glue}", - "$H2=\\p{Line_Break=H2}", - "$H3=\\p{Line_Break=H3}", - "$HL=\\p{Line_Break=HL}", - "$HY=\\p{Line_Break=Hyphen}", - "$ID=\\p{Line_Break=Ideographic}", - "$IN=\\p{Line_Break=Inseparable}", - "$IS=\\p{Line_Break=Infix_Numeric}", - "$JL=\\p{Line_Break=JL}", - "$JT=\\p{Line_Break=JT}", - "$JV=\\p{Line_Break=JV}", - "$LF=\\p{Line_Break=Line_Feed}", - "$NL=\\p{Line_Break=Next_Line}", - "$NS=\\p{Line_Break=Nonstarter}", - "$NU=\\p{Line_Break=Numeric}", - "$OP=\\p{Line_Break=Open_Punctuation}", - "$PO=\\p{Line_Break=Postfix_Numeric}", - "$PR=\\p{Line_Break=Prefix_Numeric}", - "$QU=\\p{Line_Break=Quotation}", - "$SA=\\p{Line_Break=Complex_Context}", - "$SG=\\p{Line_Break=Surrogate}", - "$SP=\\p{Line_Break=Space}", - "$SY=\\p{Line_Break=Break_Symbols}", - "$WJ=\\p{Line_Break=Word_Joiner}", - "$XX=\\p{Line_Break=Unknown}", - "$ZW=\\p{Line_Break=ZWSpace}", - "$CJ=\\p{Line_Break=Conditional_Japanese_Starter}", - "$RI=\\p{Line_Break=Regional_Indicator}", - "$EB=\\p{Line_Break=E_Base}", - "$EM=\\p{Line_Break=E_Modifier}", - "$ZWJ_O=\\p{Line_Break=ZWJ}", - "$ZWJ=\\p{Line_Break=ZWJ}", - "# Macros", - "$CM=[$CM1 $ZWJ]", - "# LB 1 Assign a line breaking class to each code point of the input. ", - "# Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm.", - "# NOTE: CB is ok to fall through, but must handle others here.", - // "show $AL", - "$AL=[$AI $AL $SG $XX $SA]", - "$NS=[$NS $CJ]", - // "show $AL", - // "$oldAL=$AL", // for debugging - "# WARNING: Fixes for Rule 9", - "# Treat X (CM|ZWJ* as if it were X.", - "# Where X is any line break class except SP, BK, CR, LF, NL or ZW.", - "$X=$CM*", - "# Macros", - "$Spec1_=[$SP $BK $CR $LF $NL $ZW]", - "$Spec2_=[^ $SP $BK $CR $LF $NL $ZW]", - "$Spec3a_=[^ $SP $BA $HY $CM]", - "$Spec3b_=[^ $BA $HY $CM]", - "$Spec4_=[^ $NU $CM]", - "$AI=($AI $X)", - "$AL=($AL $X)", - "$B2=($B2 $X)", - "$BA=($BA $X)", - "$BB=($BB $X)", - "$CB=($CB $X)", - "$CL=($CL $X)", - "$CP=($CP $X)", - "$CM=($CM $X)", - // "$CM=($CM $X)", - "$EX=($EX $X)", - "$GL=($GL $X)", - "$H2=($H2 $X)", - "$H3=($H3 $X)", - "$HL=($HL $X)", - "$HY=($HY $X)", - "$ID=($ID $X)", - "$IN=($IN $X)", - "$IS=($IS $X)", - "$JL=($JL $X)", - "$JT=($JT $X)", - "$JV=($JV $X)", - "$NS=($NS $X)", - "$NU=($NU $X)", - "$OP=($OP $X)", - "$PO=($PO $X)", - "$PR=($PR $X)", - "$QU=($QU $X)", - "$SA=($SA $X)", - "$SG=($SG $X)", - "$SY=($SY $X)", - "$WJ=($WJ $X)", - "$XX=($XX $X)", - "$RI=($RI $X)", - "$EB=($EB $X)", - "$EM=($EM $X)", - "$ZWJ=($ZWJ $X)", - "# OUT OF ORDER ON PURPOSE", - "# LB 10 Treat any remaining combining mark as AL.", - "$AL=($AL | ^ $CM | (?<=$Spec1_) $CM)", - "# Rules", - "# LB 4 Always break after hard line breaks (but never between CR and LF).", - "4) $BK \u00F7", - "# LB 5 Treat CR followed by LF, as well as CR, LF and NL as hard line breaks.", - "5.01) $CR \u00D7 $LF", - "5.02) $CR \u00F7", - "5.03) $LF \u00F7", - "5.04) $NL \u00F7", - "# LB 6 Do not break before hard line breaks.", - "6) \u00D7 ( $BK | $CR | $LF | $NL )", - "# LB 7 Do not break before spaces or zero-width space.", - "7.01) \u00D7 $SP", - "7.02) \u00D7 $ZW", - "# LB 8 Break before any character following a zero-width space, even if one or more spaces intervene.", - "8) $ZW $SP* \u00F7", - "# LB 8a Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences)", - "8.1) $ZWJ_O \u00D7", - "# LB 9 Do not break a combining character sequence; treat it as if it has the LB class of the base character", - "# in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.)", - "9) $Spec2_ \u00D7 $CM", - "#WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)!", - "# LB 11 Do not break before or after WORD JOINER and related characters.", - "11.01) \u00D7 $WJ", - "11.02) $WJ \u00D7", - "# LB 12 Do not break after NBSP and related characters.", - // "12.01) [^$SP] \u00D7 $GL", - "12) $GL \u00D7", - "12.1) $Spec3a_ \u00D7 $GL", - "12.2) $Spec3b_ $CM+ \u00D7 $GL", - "12.3) ^ $CM+ \u00D7 $GL", - "# LB 13 Do not break before \u2018]\u2019 or \u2018!\u2019 or \u2018;\u2019 or \u2018/\u2019, even after spaces.", - "# Using customization 7.", - "13.01) \u00D7 $EX", - "13.02) $Spec4_ \u00D7 ($CL | $CP | $IS | $SY)", - "13.03) $Spec4_ $CM+ \u00D7 ($CL | $CP | $IS | $SY)", - "13.04) ^ $CM+ \u00D7 ($CL | $CP | $IS | $SY)", - // "13.03) $Spec4_ \u00D7 $IS", - // "13.04) $Spec4_ \u00D7 $SY", - "#LB 14 Do not break after \u2018[\u2019, even after spaces.", - "14) $OP $SP* \u00D7", - "# LB 15 Do not break within \u2018\"[\u2019, even with intervening spaces.", - "15) $QU $SP* \u00D7 $OP", - "# LB 16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces.", - "16) ($CL | $CP) $SP* \u00D7 $NS", - "# LB 17 Do not break within \u2018\u2014\u2014\u2019, even with intervening spaces.", - "17) $B2 $SP* \u00D7 $B2", - "# LB 18 Break after spaces.", - "18) $SP \u00F7", - "# LB 19 Do not break before or after \u2018\"\u2019.", - "19.01) \u00D7 $QU", - "19.02) $QU \u00D7", - "# LB 20 Break before and after unresolved CB.", - "20.01) \u00F7 $CB", - "20.02) $CB \u00F7", - "# LB 21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents.", - "21.01) \u00D7 $BA", - "21.02) \u00D7 $HY", - "21.03) \u00D7 $NS", - "21.04) $BB \u00D7", - "# LB 21a Don't break after Hebrew + Hyphen.", - "21.1) $HL ($HY | $BA) \u00D7", - "# LB 21b Don’t break between Solidus and Hebrew letters.", - "21.2) $SY × $HL", - "# LB 22 Do not break between two ellipses, or between letters, numbers or exclamations and ellipsis.", - // "show $AL", - "22.01) ($AL | $HL) \u00D7 $IN", - "22.02) $EX \u00D7 $IN", - "22.03) ($ID | $EB | $EM) \u00D7 $IN", - "22.04) $IN \u00D7 $IN", - "22.05) $NU \u00D7 $IN", - "# LB 23 Do not break between digits and letters.", - // "23.01) ($ID | $EB | $EM) \u00D7 $PO", - "23.02) ($AL | $HL) \u00D7 $NU", - "23.03) $NU \u00D7 ($AL | $HL)", - "# LB 24 Do not break between prefix and letters or ideographs.", - "23.12) $PR \u00D7 ($ID | $EB | $EM)", - "23.13) ($ID | $EB | $EM) \u00D7 $PO", - "# LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix.", - "24.02) ($PR | $PO) \u00D7 ($AL | $HL)", - "24.03) ($AL | $HL) \u00D7 ($PR | $PO)", - "# Using customization 7", - "# LB Alternative: ( PR | PO) ? ( OP | HY ) ? NU (NU | SY | IS) * (CL | CP) ? ( PR | PO) ?", - "# Insert \u00D7 every place it could go. However, make sure that at least one thing is concrete, otherwise would cause $NU to not break before or after ", - "25.01) ($PR | $PO) \u00D7 ( $OP | $HY )? $NU", - "25.02) ( $OP | $HY ) \u00D7 $NU", - "25.03) $NU \u00D7 ($NU | $SY | $IS)", - "25.04) $NU ($NU | $SY | $IS)* \u00D7 ($NU | $SY | $IS | $CL | $CP)", - "25.05) $NU ($NU | $SY | $IS)* ($CL | $CP)? \u00D7 ($PO | $PR)", - "#LB 26 Do not break a Korean syllable.", - "26.01) $JL \u00D7 $JL | $JV | $H2 | $H3", - "26.02) $JV | $H2 \u00D7 $JV | $JT", - "26.03) $JT | $H3 \u00D7 $JT", - "# LB 27 Treat a Korean Syllable Block the same as ID.", - "27.01) $JL | $JV | $JT | $H2 | $H3 \u00D7 $PO", - "27.02) $PR \u00D7 $JL | $JV | $JT | $H2 | $H3", - "# LB 28 Do not break between alphabetics (\"at\").", - "28) ($AL | $HL) \u00D7 ($AL | $HL)", - "# LB 29 Do not break between numeric punctuation and alphabetics (\"e.g.\").", - "29) $IS \u00D7 ($AL | $HL)", - "# LB 30 Do not break between letters, numbers or ordinary symbols and opening or closing punctuation.", - "30.01) ($AL | $HL | $NU) \u00D7 $OP", - "30.02) $CP \u00D7 ($AL | $HL | $NU)", - "# LB 30a Break between two Regional Indicators if and only if there is an even number of them before the point being considered.", - "30.11) ^ ($RI $RI)* $RI × $RI", - "30.12) [^$RI] ($RI $RI)* $RI × $RI", - "30.13) $RI ÷ $RI", - "30.2) $EB × $EM", - }, - { - "SentenceBreak", - "$CR=\\p{Sentence_Break=CR}", - "$LF=\\p{Sentence_Break=LF}", - "$Extend=\\p{Sentence_Break=Extend}", - "$Format=\\p{Sentence_Break=Format}", - "$Sep=\\p{Sentence_Break=Sep}", - "$Sp=\\p{Sentence_Break=Sp}", - "$Lower=\\p{Sentence_Break=Lower}", - "$Upper=\\p{Sentence_Break=Upper}", - "$OLetter=\\p{Sentence_Break=OLetter}", - "$Numeric=\\p{Sentence_Break=Numeric}", - "$ATerm=\\p{Sentence_Break=ATerm}", - "$STerm=\\p{Sentence_Break=STerm}", - "$Close=\\p{Sentence_Break=Close}", - "$SContinue=\\p{Sentence_Break=SContinue}", - "$Any=.", - // "# subtract Format from Control, since we don't want to break before/after", - // "$Control=[$Control-$Format]", - "# Expresses the negation in rule 8; can't do this with normal regex, but works with UnicodeSet, which is all we need.", - // "$NotStuff=[^$OLetter $Upper $Lower $Sep]", - // "# $ATerm and $Sterm are temporary, to match ICU until UTC decides.", - - "# WARNING: For Rule 5, now add format and extend to everything but Sep, Format, and Extend", - "$FE=[$Format $Extend]", - "# Special rules", - "$NotPreLower_=[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]", - // "$NotSep_=[^ $Sep $CR $LF]", - - // "$FE=$Extend* $Format*", - "$Sp=($Sp $FE*)", - "$Lower=($Lower $FE*)", - "$Upper=($Upper $FE*)", - "$OLetter=($OLetter $FE*)", - "$Numeric=($Numeric $FE*)", - "$ATerm=($ATerm $FE*)", - "$STerm=($STerm $FE*)", - "$Close=($Close $FE*)", - "$SContinue=($SContinue $FE*)", - "# Macros", - "$ParaSep = ($Sep | $CR | $LF)", - "$SATerm = ($STerm | $ATerm)", - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break within CRLF.", - "3) $CR \u00D7 $LF", - "# Break after paragraph separators.", - "4) $ParaSep \u00F7", - // "3.4) ( $Control | $CR | $LF ) \u00F7", - // "3.5) \u00F7 ( $Control | $CR | $LF )", - "# Ignore Format and Extend characters, except after sot, ParaSep, and within CRLF. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend)", - "# WARNING: Implemented as don't break before format (except after linebreaks),", - "# AND add format and extend in all variables definitions that appear after this point!", - // "3.91) [^$Control | $CR | $LF] \u00D7 $Extend", - "5) \u00D7 [$Format $Extend]", - "# Do not break after full stop in certain contexts. [See note below.]", - "# Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter,", - "# is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase.", - "# For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence.", - "6) $ATerm \u00D7 $Numeric", - "7) ($Upper | $Lower) $ATerm \u00D7 $Upper", - "8) $ATerm $Close* $Sp* \u00D7 $NotPreLower_* $Lower", - "8.1) $SATerm $Close* $Sp* \u00D7 ($SContinue | $SATerm)", - "# Break after sentence terminators, but include closing punctuation, trailing spaces, and any paragraph separator. [See note below.] Include closing punctuation, trailing spaces, and (optionally) a paragraph separator.", - "9) $SATerm $Close* \u00D7 ( $Close | $Sp | $ParaSep )", - "# Note the fix to $Sp*, $Sep?", - "10) $SATerm $Close* $Sp* \u00D7 ( $Sp | $ParaSep )", - "11) $SATerm $Close* $Sp* $ParaSep? \u00F7", - "#Otherwise, do not break", - "998) \u00D7 $Any", - }, - { - "WordBreak", - "$CR=\\p{Word_Break=CR}", - "$LF=\\p{Word_Break=LF}", - "$Newline=\\p{Word_Break=Newline}", - // "$Control=\\p{Word_Break=Control}", - "$Extend=\\p{Word_Break=Extend}", - // "$NEWLINE=[$CR $LF \\u0085 \\u000B \\u000C \\u2028 \\u2029]", - // "$Sep=\\p{Sentence_Break=Sep}", - "# Now normal variables", - "$Format=\\p{Word_Break=Format}", - "$Katakana=\\p{Word_Break=Katakana}", - "$ALetter=\\p{Word_Break=ALetter}", - "$MidLetter=\\p{Word_Break=MidLetter}", - "$MidNum=\\p{Word_Break=MidNum}", - "$MidNumLet=\\p{Word_Break=MidNumLet}", - "$Numeric=\\p{Word_Break=Numeric}", - "$ExtendNumLet=\\p{Word_Break=ExtendNumLet}", - "$RI=\\p{Word_Break=Regional_Indicator}", - "$Hebrew_Letter=\\p{Word_Break=Hebrew_Letter}", - "$Double_Quote=\\p{Word_Break=Double_Quote}", - "$Single_Quote=\\p{Word_Break=Single_Quote}", - - // "$E_Base=\\p{Word_Break=E_Base}", - // "$E_Modifier=\\p{Word_Break=E_Modifier}", - "$ZWJ=\\p{Word_Break=ZWJ}", - "$ExtPict=\\p{Extended_Pictographic}", - - // "$EBG=\\p{Word_Break=E_Base_GAZ}", - // "$Glue_After_Zwj=\\p{Word_Break=Glue_After_Zwj}", - - "$WSegSpace=\\p{Word_Break=WSegSpace}", - "# Macros", - "$AHLetter=($ALetter | $Hebrew_Letter)", - "$MidNumLetQ=($MidNumLet | $Single_Quote)", - "# WARNING: For Rule 4: Fixes for GC, Format", - // "# Subtract Format from Control, since we don't want to break before/after", - // "$Control=[$Control-$Format]", - "# Add format and extend to everything", - "$FE=[$Format $Extend $ZWJ]", - "# Special rules", - "$NotBreak_=[^ $Newline $CR $LF ]", - // "$FE= ($Extend | $Format)*", - "$Katakana=($Katakana $FE*)", - "$ALetter=($ALetter $FE*)", - "$MidLetter=($MidLetter $FE*)", - "$MidNum=($MidNum $FE*)", - "$MidNumLet=($MidNumLet $FE*)", - "$Numeric=($Numeric $FE*)", - "$ExtendNumLet=($ExtendNumLet $FE*)", - "$RI=($RI $FE*)", - "$Hebrew_Letter=($Hebrew_Letter $FE*)", - "$Double_Quote=($Double_Quote $FE*)", - "$Single_Quote=($Single_Quote $FE*)", - - // "$E_Base=($E_Base $FE*)", - // "$E_Modifier=($E_Modifier $FE*)", - // "$ZWJ=($ZWJ $FE*)", don't do this one! - // "$Glue_After_Zwj=($Glue_After_Zwj $FE*)", - // "$EBG=($EBG $FE*)", - - "$AHLetter=($AHLetter $FE*)", - "$MidNumLetQ=($MidNumLetQ $FE*)", - "# Rules", - "# Break at the start and end of text, unless the text is empty.", - "# Do not break within CRLF.", - "3) $CR \u00D7 $LF", - "# Otherwise break before and after Newlines (including CR and LF)", - "3.1) ($Newline | $CR | $LF) \u00F7", - "3.2) \u00F7 ($Newline | $CR | $LF)", - "# Do not break within emoji zwj sequences.", - "3.3) $ZWJ × $ExtPict", - "3.4) $WSegSpace × $WSegSpace", - - // "3.4) ( $Control | $CR | $LF ) \u00F7", - // "3.5) \u00F7 ( $Control | $CR | $LF )", - // "3.9) \u00D7 $Extend", - // "3.91) [^$Control | $CR | $LF] \u00D7 $Extend", - "# Ignore Format and Extend characters, except after sot, CR, LF, and Newline. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend)", - "# WARNING: Implemented as don't break before format (except after linebreaks),", - "# AND add format and extend in all variables definitions that appear after this point!", - // "4) \u00D7 [$Format $Extend]", - "4) $NotBreak_ \u00D7 [$Format $Extend $ZWJ]", - "# Vanilla rules", - "# Do not break between most letters.", - "5) $AHLetter \u00D7 $AHLetter", - "# Do not break letters across certain punctuation.", - "6) $AHLetter \u00D7 ($MidLetter | $MidNumLetQ) $AHLetter", - "7) $AHLetter ($MidLetter | $MidNumLetQ) \u00D7 $AHLetter", - "7.1) $Hebrew_Letter × $Single_Quote", - "7.2) $Hebrew_Letter × $Double_Quote $Hebrew_Letter", - "7.3) $Hebrew_Letter $Double_Quote × $Hebrew_Letter", - "# Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”).", - "8) $Numeric \u00D7 $Numeric", - "9) $AHLetter \u00D7 $Numeric", - "10) $Numeric \u00D7 $AHLetter", - "# Do not break within sequences, such as “3.2” or “3,456.789”.", - "11) $Numeric ($MidNum | $MidNumLetQ) \u00D7 $Numeric", - "12) $Numeric \u00D7 ($MidNum | $MidNumLetQ) $Numeric", - "# Do not break between Katakana.", - "13) $Katakana \u00D7 $Katakana", - "# Do not break from extenders.", - "13.1) ($AHLetter | $Numeric | $Katakana | $ExtendNumLet) \u00D7 $ExtendNumLet", - "13.2) $ExtendNumLet \u00D7 ($AHLetter | $Numeric | $Katakana)", - - // "# Do not break within emoji modifier sequences.", - // "14) $E_Base × $E_Modifier", - - "# Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.", - "15) ^ ($RI $RI)* $RI × $RI", - "16) [^$RI] ($RI $RI)* $RI × $RI", - "# Otherwise, break everywhere (including around ideographs).", - } - }; - - public static void main(String[] args) throws IOException { - for (int i = 0; i < cannedRules.length; ++i) { - String type = cannedRules[i][0]; - boolean hadHash = false; - try (PrintWriter out = - FileUtilities.openUTF8Writer( - Settings.Output.GEN_DIR + "segmentation/", type + "Rules.txt")) { - out.println("# Segmentation rules for " + type); - out.println("#"); - out.println("# Character Classes"); - out.println("#"); - for (int j = 1; j < cannedRules[i].length; ++j) { - String cannedRule = cannedRules[i][j].trim(); - if (cannedRule.equals("#")) { - continue; - } - boolean hasHash = cannedRule.startsWith("#"); - if (hasHash && !hadHash) { - out.println("#"); - } - out.println(cannedRule); - if (hasHash) { - out.println("#"); - } - hadHash = hasHash; - } - } - } - - try (PrintWriter out = - FileUtilities.openUTF8Writer( - Settings.Output.GEN_DIR + "cldr/segmentation/", "rootAddon.xml")) { - out.println( - "\n" - + "\n" - + "\n" - + "\n" - + "\t\n" - + "\t\t\n" - + "\t\t\n" - + "\t\n" - + "\t"); - for (final String type : - new String[] { - "GraphemeClusterBreak", "LineBreak", "SentenceBreak", "WordBreak" - }) { - final Builder segBuilder = - Segmenter.make(ToolUnicodePropertySource.make(Default.ucdVersion()), type); - out.print(segBuilder.toString(type, "\t\t")); - if (type.equals("")) { - out.print( - "\t\t\t\n" - + "\t\t\t\t\n" - + "\t\t\t\n"); - } - } - out.println("\t\n" + ""); - } - } } diff --git a/unicodetools/src/main/java/org/unicode/tools/TestSegments.java b/unicodetools/src/main/java/org/unicode/tools/TestSegments.java index e748ff9c6..ead5331b7 100644 --- a/unicodetools/src/main/java/org/unicode/tools/TestSegments.java +++ b/unicodetools/src/main/java/org/unicode/tools/TestSegments.java @@ -22,7 +22,6 @@ import org.unicode.jsp.ICUPropertyFactory; import org.unicode.props.RandomStringGenerator; import org.unicode.props.UnicodeProperty; -import org.unicode.tools.Segmenter.Rule.Breaks; /** * Quick class for testing proposed syntax for Segments. TODO doesn't yet handle supplementaries. It @@ -133,7 +132,7 @@ public static void main(String[] args) throws IOException { } private static void debugRule(Segmenter.Builder rb) { - Segmenter.Rule rule = rb.make().get(16.01); + Segmenter.SegmentationRule rule = rb.make().get(16.01); String oldAL = (String) rb.getVariables().get("$oldAL"); UnicodeSet oldALSet = new UnicodeSet(oldAL); String testStr = "\uA80D/\u0745\u2026"; @@ -142,7 +141,7 @@ private static void debugRule(Segmenter.Builder rb) { System.out.println( k + ": " + inside + com.ibm.icu.impl.Utility.escape("" + testStr.charAt(k))); } - Breaks m = rule.matches(testStr, 3); + rule.applyAt(3, testStr, new Integer[] {0, 1, 2, 3}, null); } private static void doCompare(UnicodeProperty.Factory factory, Segmenter rl, String line) { diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt new file mode 100644 index 000000000..58662bbb8 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/118.txt @@ -0,0 +1,30 @@ +# ARABIC LETTER NOON WITH RING ABOVE (088F) +# https://github.com/unicode-org/utc-release-management/issues/118 + +Let $OldNoons := [ ن ڹ ں ڻ ڼ ڽ ݧ ݨ ݩ ࢉ ] + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring Block Unicode_1_Name: + +# « Another ن, propertywise like the others. ». + +# Differs from ڽ (with three dots above) in Joining_Group, +# ڽ being jg=Nya vs. jg=Noon for the others; see +# https://www.unicode.org/versions/latest/ch09.pdf#G39824. +Ignoring Joining_Group: +Propertywise [ $OldNoons \N{ARABIC LETTER NOON WITH RING ABOVE} ] AreAlike +end Ignoring; + +Propertywise [ $OldNoons - [ڽ] \N{ARABIC LETTER NOON WITH RING ABOVE} ] AreAlike + +end Ignoring; + +end Ignoring; + +end Ignoring; diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 8f86e455f..8d596b081 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -25,6 +25,7 @@ ASCII ; Basic_Latin Bassa_Vah ; Bassa_Vah Batak ; Batak Bengali ; Bengali +Beria_Erfe ; Beria_Erfe Bhaiksuki ; Bhaiksuki Block_Elements ; Block_Elements Bopomofo ; Bopomofo @@ -42,6 +43,7 @@ Cham ; Cham Cherokee ; Cherokee Cherokee_Sup ; Cherokee_Supplement Chess_Symbols ; Chess_Symbols +Chisoi ; Chisoi Chorasmian ; Chorasmian CJK_Compat ; CJK_Compatibility CJK_Compat_Forms ; CJK_Compatibility_Forms @@ -264,9 +266,11 @@ Runic ; Runic Samaritan ; Samaritan Saurashtra ; Saurashtra Sharada ; Sharada +Sharada_Sup ; Sharada_Supplement Shavian ; Shavian Shorthand_Format_Controls ; Shorthand_Format_Controls Siddham ; Siddham +Sidetic ; Sidetic Sinhala ; Sinhala Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers Small_Forms ; Small_Form_Variants @@ -317,6 +321,7 @@ Tibetan ; Tibetan Tifinagh ; Tifinagh Tirhuta ; Tirhuta Todhri ; Todhri +Tolong_Siki ; Tolong_Siki Toto ; Toto Tulu_Tigalari ; Tulu_Tigalari Transport_And_Map ; Transport_And_Map_Symbols diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 570de2e90..b45806a9f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -700,6 +700,7 @@ Let $nonAlphabeticBindus := [] Let $nonAlphabeticDependentVowels := [ \N{ORIYA SIGN OVERLINE} + \p{Name=/^ORIYA SIGN (DOUBLE )?DOT ABOVE$/} # L2/24-106R, related to the overline. \N{THAI CHARACTER MAITAIKHU} \N{LIMBU SIGN KEMPHRENG} \N{SHARADA VOWEL MODIFIER MARK} @@ -721,7 +722,7 @@ Let $nonLowercaseSmallLetters := [ \N{TURNED GREEK SMALL LETTER IOTA} \p{name=/^(SQUARED|PARENTHESIZED|TAG) LATIN SMALL LETTER/} ] -Let $nonLowercaseSmallModifierLetters := [ \p{gc=Lm} & \p{name=/^ARABIC SMALL/} ] +Let $nonLowercaseSmallModifierLetters := [ \p{gc=Lm} & \p{name=/^(ARABIC|CHINESE) SMALL/} ] [ \p{name=/\bSMALL LETTER\b/}-\p{gc=Mn}-\p{gc=Lt} - $nonLowercaseSmallLetters ] ⊆ \p{Lowercase} [ [\p{gc=Lm} & \p{name=/SMALL/}] - $nonLowercaseSmallModifierLetters ] ⊆ \p{Lowercase} @@ -1201,8 +1202,13 @@ $punct ⊇ [[\u0021-\u007E] - [0-9 A-Z a-z]] # The Khitan Small Script filler is a Nonspacing Mark. # The other characters are numerals (the Hangzhou ten through thirty are compatibility decomposable, # but not the one through nine) and have Script=Han. -Let $NonOtherLetterIdeographs := [\N{KHITAN SMALL SCRIPT FILLER} 〇 〡-〩 〸-〺] -$NonOtherLetterIdeographs = [\p{Ideographic} - \p{gc=Lo}] +Let $NonOtherLetterIdeographs := [\p{Ideographic} - \p{gc=Lo}] +$NonOtherLetterIdeographs = [ + \N{KHITAN SMALL SCRIPT FILLER} + 〇 〡-〩 〸-〺 + \p{NAME=/^CHINESE SMALL (SIMPLIFIED|TRADITIONAL) ER$/} + \p{Name=/^YANGQIN SIGN SLOW (ONE|THREE HALF|TWO) BEATS?$/} +] # Ideographic closing mark, gc=Lo. Let $CommonIdeographs := [〆] $CommonIdeographs = [\p{Ideographic} & \p{sc=Common}] @@ -1379,4 +1385,4 @@ Ignoring Unicode_1_Name Confusable_MA: end Ignoring; -end Ignoring; \ No newline at end of file +end Ignoring; diff --git a/unicodetools/src/main/resources/org/unicode/tools/SegmenterDefault.txt b/unicodetools/src/main/resources/org/unicode/tools/SegmenterDefault.txt index e7e6193bd..eb9b2b132 100644 --- a/unicodetools/src/main/resources/org/unicode/tools/SegmenterDefault.txt +++ b/unicodetools/src/main/resources/org/unicode/tools/SegmenterDefault.txt @@ -142,86 +142,10 @@ $CM=[$CM1 $ZWJ] ## show $AL $AL=[$AI $AL $SG $XX $SA] $NS=[$NS $CJ] -## show $AL -## $oldAL=$AL // for debugging -# WARNING: Fixes for Rule 9 -# Treat X (CM|ZWJ* as if it were X. -# Where X is any line break class except SP, BK, CR, LF, NL or ZW. -$X=$CM* # MACROS -$Spec1_=[$SP $BK $CR $LF $NL $ZW] -$Spec2_=[^ $SP $BK $CR $LF $NL $ZW] -$Spec3a_=[^ $SP $BA $HY $CM] -$Spec3b_=[^ $BA $HY $CM] -$Spec4_=[^ $NU $CM] - -# SPECIAL EXTENSIONS - -$AI=($AI $X) -$AK=($AK $X) -$AL=($AL $X) -$AP=($AP $X) -$AS=($AS $X) -$B2=($B2 $X) -$BA=($BA $X) -$BB=($BB $X) -$CB=($CB $X) -$CL=($CL $X) -$CP=($CP $X) -$CM=($CM $X) -## $CM=($CM $X) -$EX=($EX $X) -$GL=($GL $X) -$H2=($H2 $X) -$H3=($H3 $X) -$HL=($HL $X) -$HY=($HY $X) -$ID=($ID $X) -$IN=($IN $X) -$IS=($IS $X) -$JL=($JL $X) -$JT=($JT $X) -$JV=($JV $X) -$NS=($NS $X) -$NU=($NU $X) -$OP=($OP $X) -$PO=($PO $X) -$PR=($PR $X) -$QU=($QU $X) -$SA=($SA $X) -$SG=($SG $X) -$SY=($SY $X) -$VF=($VF $X) -$VI=($VI $X) -$WJ=($WJ $X) -$XX=($XX $X) -$RI=($RI $X) -$EB=($EB $X) -$EM=($EM $X) -$ZWJ=($ZWJ $X) - -$QU_Pi=($QU_Pi $X) -$QU_Pf=($QU_Pf $X) - -$QUmPi=($QUmPi $X) -$QUmPf=($QUmPf $X) - -$NotEastAsian=( $NotEastAsian | [$NotEastAsian - $Spec1_] $X) -$NonEastAsianBA=(NonEastAsianBA $X) - -$DottedCircle=($DottedCircle $X) -$Hyphen=($Hyphen $X) - -$CP30=($CP30 $X) -$OP30=($OP30 $X) - -# OUT OF ORDER ON PURPOSE - -# LB 10 Treat any remaining combining mark as AL and non-$EastAsian. -$AL=($AL | ^ $CM | (?<=$Spec1_) $CM) -$NotEastAsian=( $NotEastAsian | ^ $CM | (?<=$Spec1_) $CM ) +$Spec3a_=[^ $SP $BA $HY] # RULES @@ -241,19 +165,18 @@ $NotEastAsian=( $NotEastAsian | ^ $CM | (?<=$Spec1_) $CM ) 8) $ZW $SP* ÷ # LB 8a Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences) 8.1) $ZWJ_O × -# LB 9 Do not break a combining character sequence; treat it as if it has the LB class of the base character -# in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.) -9) $Spec2_ × $CM -##WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)! +# LB 9 Do not break a combining character sequence; treat it as if it has the line breaking class +# of the base character in all of the following rules. Treat ZWJ as if it were CM. +9) (?[^$BK $CR $LF $NL $SP $ZW]) ( $CM | $ZWJ )* → ${X} +# LB10 Treat any remaining combining mark or ZWJ as AL. +10) ( $CM | $ZWJ ) → A ## LB 11 Do not break before or after WORD JOINER and related characters. 11.01) × $WJ 11.02) $WJ × # LB 12 Do not break after NBSP and related characters. -## 12.01) [^$SP] × $GL 12) $GL × +# LB 12a Do not break before NBSP and related characters, except after spaces and hyphens. 12.1) $Spec3a_ × $GL -12.2) $Spec3b_ $CM+ × $GL -12.3) ^ $CM+ × $GL # LB 13 Do not break before \u2018]\u2019 or \u2018!\u2019 or \u2018;\u2019 or \u2018/\u2019, even after spaces. 13.01) × $EX 13.02) × $CL diff --git a/unicodetools/src/test/java/org/unicode/test/TestSegment.java b/unicodetools/src/test/java/org/unicode/test/TestSegment.java index baf52f539..719f3cf14 100644 --- a/unicodetools/src/test/java/org/unicode/test/TestSegment.java +++ b/unicodetools/src/test/java/org/unicode/test/TestSegment.java @@ -31,7 +31,7 @@ import org.unicode.text.utility.Utility; import org.unicode.tools.Segmenter; import org.unicode.tools.Segmenter.Builder; -import org.unicode.tools.Segmenter.Rule; +import org.unicode.tools.Segmenter.SegmentationRule; public class TestSegment { @@ -398,8 +398,8 @@ private static void checkExemplars() { } private static void getExemplarStrings(UnicodeMap exemplars, Builder segmenter) { - Map srules = segmenter.getProcessedRules(); - for (Entry entry : srules.entrySet()) { + Map srules = segmenter.getProcessedRules(); + for (Entry entry : srules.entrySet()) { System.out.println(entry.getKey() + "\t\t" + entry.getValue()); } }