Skip to content

Commit

Permalink
Mark more Prepended_Concatenation_Marks as non-advancing
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-Bertholet committed May 21, 2024
1 parent 3b56f6d commit 3742586
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 14 deletions.
11 changes: 9 additions & 2 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,17 @@ def load_zero_widths() -> "list[bool]":
# width 2. Therefore, we treat it as having width 2.
zw_map[0x115F] = False

# Syriac abbreviation mark
# This is a `Prepended_Concatenation_Mark`, but unlike the others it's zero-width
# Syriac abbreviation mark:
# Zero-width `Prepended_Concatenation_Mark`
zw_map[0x070F] = True

# Some Arabic Prepended_Concatenation_Mark`s
# https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G27820
zw_map[0x0605] = True
zw_map[0x0890] = True
zw_map[0x0891] = True
zw_map[0x08E2] = True

# U+A8FA DEVANAGARI CARET
# https://www.unicode.org/versions/Unicode15.0.0/ch12.pdf#G667447
zw_map[0xA8FA] = True
Expand Down
9 changes: 7 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,12 @@
//! - [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43).
//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D)
//! with a [`Hangul_Syllable_Type`] of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`).
//! - `'\u{070F}'` [SYRIAC] ABBREVIATION MARK.
//! - The following [`Prepended_Concatenation_Mark`]s:
//! - [`'\u{0605}'` NUMBER MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0605),
//! - [`'\u{070F}'` SYRIAC ABBREVIATION MARK](https://util.unicode.org/UnicodeJsps/character.jsp?a=070F),
//! - [`'\u{0890}'` POUND MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0890),
//! - [`'\u{0891}'` PIASTRE MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0891), and
//! - [`'\u{08E2}'` DISPUTED END OF AYAH](https://util.unicode.org/UnicodeJsps/character.jsp?a=08E2).
//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
//! 7. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
Expand All @@ -70,6 +75,7 @@
//! [`Emoji_Presentation`]: https://unicode.org/reports/tr51/#def_emoji_presentation
//! [`Grapheme_Extend`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G52443
//! [`Hangul_Syllable_Type`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593
//! [`Prepended_Concatenation_Mark`]: https://www.unicode.org/versions/Unicode15.0.0/ch23.pdf#G37908
//!
//! [`Fullwidth`]: https://www.unicode.org/reports/tr11/#ED2
//! [`Wide`]: https://www.unicode.org/reports/tr11/#ED4
Expand All @@ -80,7 +86,6 @@
//!
//! [Enclosed Ideographic Supplement]: https://unicode.org/charts/PDF/U1F200.pdf
//!
//! [Syriac]: https://www.unicode.org/versions/Unicode15.0.0/ch09.pdf#G13006
//! [Lisu tone letter]: https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078
//!
//! ## Canonical equivalence
Expand Down
6 changes: 3 additions & 3 deletions src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ pub mod charwidth {
0x00, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x10, 0x41, 0x10, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x51, 0x55, 0x55, 0x00, 0x00, 0x40, 0x54, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55,
0x54, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05, 0x00, 0x14, 0x00, 0x14, 0x04,
0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x15, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55,
Expand All @@ -330,8 +330,8 @@ pub mod charwidth {
0x55, 0x55, 0x55, 0x15, 0x00, 0x00, 0x55, 0x55, 0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
0x10, 0x00, 0x00, 0x01, 0x01, 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x01, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x55, 0x55, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55,
0x50, 0x55, 0x00, 0x00, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x05,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x55,
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x45, 0x54, 0x01,
0x00, 0x54, 0x51, 0x01, 0x00, 0x55, 0x55, 0x05, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
0x51, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
Expand Down
22 changes: 15 additions & 7 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,22 @@ fn test_jamo() {

#[test]
fn test_prepended_concatenation_marks() {
assert_eq!('\u{0600}'.width(), Some(1));
assert_eq!('\u{08E2}'.width(), Some(1));
assert_eq!('\u{110BD}'.width(), Some(1));
}
for c in [
'\u{0600}',
'\u{0601}',
'\u{0602}',
'\u{0603}',
'\u{0604}',
'\u{06DD}',
'\u{110BD}',
'\u{110CD}',
] {
assert_eq!(c.width(), Some(1), "{c:?} should have width 1");
}

#[test]
fn test_syriac_abbreviation_mark() {
assert_eq!('\u{070F}'.width(), Some(0));
for c in ['\u{0605}', '\u{070F}', '\u{0890}', '\u{0891}', '\u{08E2}'] {
assert_eq!(c.width(), Some(0), "{c:?} should have width 0");
}
}

#[test]
Expand Down

0 comments on commit 3742586

Please sign in to comment.