From f2ede12af934d5457b0c244ddda4df4eea543f4e Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Thu, 5 Oct 2017 09:54:27 +0100 Subject: [PATCH 01/14] Renamed current gpos tests so we can distinguish between kern and mark tests --- tests/gpos_diff_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/gpos_diff_test.py b/tests/gpos_diff_test.py index fb95adf0..d174e40e 100644 --- a/tests/gpos_diff_test.py +++ b/tests/gpos_diff_test.py @@ -38,7 +38,7 @@ def _expect_kerning_diffs(self, source_a, source_b, pairs, values): for value_diff in values: self.assertIn('pos %s %s: %s vs %s' % value_diff, diffs) - def test_simple(self): + def test_kern_simple(self): self._expect_kerning_diffs(''' pos a b -10; pos a c -20; @@ -49,7 +49,7 @@ def test_simple(self): [('-', 'a', 'c', [-20]), ('+', 'a', 'd', [-40])], [('a', 'b', [-10], [-30])]) - def test_multiple_rules(self): + def test_kern_multiple_rules(self): self._expect_kerning_diffs(''' @a_b = [a b]; pos a d -10; @@ -60,7 +60,7 @@ def test_multiple_rules(self): [('-', 'b', 'd', [-20])], [('a', 'd', [-10, -20], [-30])]) - def test_single_vs_class(self): + def test_kern_single_vs_class(self): self._expect_kerning_diffs(''' pos a d -10; ''', ''' From ef37bf806ce17e0fdcdfdd4b131063acff4b99c1 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Thu, 5 Oct 2017 14:43:25 +0100 Subject: [PATCH 02/14] gpos_diff: Fixed parsing mark positions In the previous implementation, a mark positioning rule for a single glyph wouldn't get parsed. By adding ? to the regular expression in the method find_positioning_diffs, it can now parse both single glyphs, classes and groups. The new test, *test_mark_positioning_diffs_simple* will test this functionality. I've added extra tests to ensure positioning diffs are being reported correctly as well. --- nototools/gpos_diff.py | 2 +- tests/gpos_diff_test.py | 86 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/nototools/gpos_diff.py b/nototools/gpos_diff.py index 10110e78..7d9b6df3 100644 --- a/nototools/gpos_diff.py +++ b/nototools/gpos_diff.py @@ -139,7 +139,7 @@ def find_positioning_diffs(self, mark_type='base'): unmatched = {} mismatched = {} rx = re.compile('pos %s \[([\w\d\s@_.]+)\]\s+ ' - 'mark (@[\w\d_.]+);' % mark_type) + 'mark (@?[\w\d_.]+);' % mark_type) self._parse_anchor_info(rx, '-', self.text_a, unmatched, mismatched) self._parse_anchor_info(rx, '+', self.text_b, unmatched, mismatched) diff --git a/tests/gpos_diff_test.py b/tests/gpos_diff_test.py index d174e40e..e1df9dc9 100644 --- a/tests/gpos_diff_test.py +++ b/tests/gpos_diff_test.py @@ -38,6 +38,21 @@ def _expect_kerning_diffs(self, source_a, source_b, pairs, values): for value_diff in values: self.assertIn('pos %s %s: %s vs %s' % value_diff, diffs) + def _expect_mark_positioning_diffs(self, source_a, source_b, values): + font_a = make_font('feature mark {\n%s\n} mark;' % source_a) + font_b = make_font('feature mark {\n%s\n} mark;' % source_b) + file_a = tempfile.NamedTemporaryFile() + file_b = tempfile.NamedTemporaryFile() + font_a.save(file_a.name) + font_b.save(file_b.name) + finder = GposDiffFinder(file_a.name, file_b.name, 0, 100) + + diffs = finder.find_positioning_diffs() + self.assertIn('%d differences in mark-to-base positioning rule values' % len(values), + diffs) + for value_diff in values: + self.assertIn('<%s %s> vs <%s %s>' % value_diff, diffs) + def test_kern_simple(self): self._expect_kerning_diffs(''' pos a b -10; @@ -70,6 +85,77 @@ def test_kern_single_vs_class(self): [('+', 'b', 'd', [-20])], [('a', 'd', [-10], [-20])]) + def test_mark_positioning_diffs_simple(self): + """Find position differences for a single mark and single base glyph""" + self._expect_mark_positioning_diffs(''' + markClass acute @TOP_MARKS; + + position base a mark @TOP_MARKS; + ''',''' + markClass acute @TOP_MARKS; + + position base a mark @TOP_MARKS; + ''', + [(250, 450, 0, 0)]) + + + def test_mark_positioning_diffs_on_groups(self): + """Find position differences for groups""" + self._expect_mark_positioning_diffs(''' + markClass [acute grave] @TOP_MARKS; + + position base [a e o u] mark @TOP_MARKS; + ''',''' + markClass [acute grave] @TOP_MARKS; + + position base [a e o u] mark @TOP_MARKS; + ''', + [(250, 450, 0, 0), + (250, 450, 0, 0), + (250, 450, 0, 0), + (250, 450, 0, 0)]) + + def test_mark_positioning_diffs_on_classes(self): + """Find position differences on classes""" + self._expect_mark_positioning_diffs(''' + @top = [acute grave]; + @base_glyphs = [a e o u]; + + markClass @top @TOP_MARKS; + + position base @base_glyphs mark @TOP_MARKS; + ''',''' + @top = [acute grave]; + @base_glyphs = [a e o u]; + + markClass @top @TOP_MARKS; + + position base @base_glyphs mark @TOP_MARKS; + ''', + [(250, 450, 0, 0), + (250, 450, 0, 0), + (250, 450, 0, 0), + (250, 450, 0, 0)]) + + def test_mark_positioning_diffs_mark_on_mark(self): + """Find positions differences on mark to mark positions""" + self._expect_mark_positioning_diffs(''' + @top = [acute grave]; + + markClass @top @TOP_MARKS; + + position base @top mark @TOP_MARKS; + ''',''' + @top = [acute grave]; + + markClass @top @TOP_MARKS; + + position base @top mark @TOP_MARKS; + ''', + [(250, 450, 0, 0), + (250, 450, 0, 0)]) + + if __name__ == '__main__': unittest.main() From e37e0a9c3030373fd87b3602a7d051d84afc4fb0 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Sat, 11 Nov 2017 15:09:35 +0000 Subject: [PATCH 03/14] [hb_input] Get unicodes for ordinals with a value higher than 10000 on narrow python builds --- nototools/hb_input.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nototools/hb_input.py b/nototools/hb_input.py index 65c6b42d..d58334a6 100644 --- a/nototools/hb_input.py +++ b/nototools/hb_input.py @@ -94,7 +94,12 @@ def input_from_name(self, name, seen=None, pad=False): # see if this glyph has a simple unicode mapping if name in self.reverse_cmap: - text = unichr(self.reverse_cmap[name]) + try: + text = unichr(self.reverse_cmap[name]) + except ValueError: + ordinal = self.reverse_cmap[name] + uni_esc = "\\U%08x" % ordinal + text = uni_esc.decode('unicode-escape') inputs.append(((), text)) # check the substitution features From 16db86b377574a133d0715b8516da09ba8edc7fa Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Sat, 11 Nov 2017 15:25:05 +0000 Subject: [PATCH 04/14] Use fontTools.misc.py23.unichr instead of unichr --- nototools/hb_input.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/nototools/hb_input.py b/nototools/hb_input.py index d58334a6..17e25ef9 100644 --- a/nototools/hb_input.py +++ b/nototools/hb_input.py @@ -16,6 +16,7 @@ from __future__ import division, print_function from fontTools.ttLib import TTFont +from fontTools.misc.py23 import unichr from nototools import summary @@ -94,12 +95,7 @@ def input_from_name(self, name, seen=None, pad=False): # see if this glyph has a simple unicode mapping if name in self.reverse_cmap: - try: - text = unichr(self.reverse_cmap[name]) - except ValueError: - ordinal = self.reverse_cmap[name] - uni_esc = "\\U%08x" % ordinal - text = uni_esc.decode('unicode-escape') + text = unichr(self.reverse_cmap[name]) inputs.append(((), text)) # check the substitution features From d0f0496882e858b7ce1bd756af3db923b1b4adea Mon Sep 17 00:00:00 2001 From: Marek Jeziorek Date: Sun, 21 Oct 2018 14:14:08 -0400 Subject: [PATCH 05/14] Samples for new scripts --- sample_texts/und-Aghb_chars.txt | 1 + sample_texts/und-Ahom_chars.txt | 1 + sample_texts/und-Bhks_chars.txt | 1 + sample_texts/und-Dupl_chars.txt | 1 + sample_texts/und-Elba_chars.txt | 1 + sample_texts/und-Gran_chars.txt | 1 + sample_texts/und-Hatr_chars.txt | 1 + sample_texts/und-Hmng_chars.txt | 1 + sample_texts/und-Hung_chars.txt | 1 + sample_texts/und-Lina_chars.txt | 1 + sample_texts/und-Mahj_chars.txt | 1 + sample_texts/und-Mani_chars.txt | 1 + sample_texts/und-Marc_chars.txt | 1 + sample_texts/und-Mend_chars.txt | 1 + sample_texts/und-Modi_chars.txt | 1 + sample_texts/und-Mroo_chars.txt | 1 + sample_texts/und-Mult_chars.txt | 1 + sample_texts/und-Narb_chars.txt | 1 + sample_texts/und-Nbat_chars.txt | 1 + sample_texts/und-Newa_chars.txt | 1 + sample_texts/und-Palm_chars.txt | 1 + sample_texts/und-Pauc_chars.txt | 1 + sample_texts/und-Perm_chars.txt | 1 + sample_texts/und-Phlp_chars.txt | 1 + sample_texts/und-Plrd_chars.txt | 1 + sample_texts/und-Shrd_chars.txt | 1 + sample_texts/und-Sind_chars.txt | 1 + sample_texts/und-Sora_chars.txt | 1 + sample_texts/und-Takr_chars.txt | 1 + sample_texts/und-Tirh_chars.txt | 1 + sample_texts/und-Wara_chars.txt | 1 + 31 files changed, 31 insertions(+) create mode 100644 sample_texts/und-Aghb_chars.txt create mode 100644 sample_texts/und-Ahom_chars.txt create mode 100644 sample_texts/und-Bhks_chars.txt create mode 100644 sample_texts/und-Dupl_chars.txt create mode 100644 sample_texts/und-Elba_chars.txt create mode 100644 sample_texts/und-Gran_chars.txt create mode 100644 sample_texts/und-Hatr_chars.txt create mode 100644 sample_texts/und-Hmng_chars.txt create mode 100644 sample_texts/und-Hung_chars.txt create mode 100644 sample_texts/und-Lina_chars.txt create mode 100644 sample_texts/und-Mahj_chars.txt create mode 100644 sample_texts/und-Mani_chars.txt create mode 100644 sample_texts/und-Marc_chars.txt create mode 100644 sample_texts/und-Mend_chars.txt create mode 100644 sample_texts/und-Modi_chars.txt create mode 100644 sample_texts/und-Mroo_chars.txt create mode 100644 sample_texts/und-Mult_chars.txt create mode 100644 sample_texts/und-Narb_chars.txt create mode 100644 sample_texts/und-Nbat_chars.txt create mode 100644 sample_texts/und-Newa_chars.txt create mode 100644 sample_texts/und-Palm_chars.txt create mode 100644 sample_texts/und-Pauc_chars.txt create mode 100644 sample_texts/und-Perm_chars.txt create mode 100644 sample_texts/und-Phlp_chars.txt create mode 100644 sample_texts/und-Plrd_chars.txt create mode 100644 sample_texts/und-Shrd_chars.txt create mode 100644 sample_texts/und-Sind_chars.txt create mode 100644 sample_texts/und-Sora_chars.txt create mode 100644 sample_texts/und-Takr_chars.txt create mode 100644 sample_texts/und-Tirh_chars.txt create mode 100644 sample_texts/und-Wara_chars.txt diff --git a/sample_texts/und-Aghb_chars.txt b/sample_texts/und-Aghb_chars.txt new file mode 100644 index 00000000..35c36ae0 --- /dev/null +++ b/sample_texts/und-Aghb_chars.txt @@ -0,0 +1 @@ +๐”ฐ ๐”ฑ ๐”ฒ ๐”ณ ๐”ด ๐”ต ๐”ถ ๐”ท ๐”ธ ๐”น ๐”บ ๐”ป ๐”ผ ๐”ฝ ๐”พ ๐”ฟ ๐•€ ๐• ๐•‚ ๐•ƒ ๐•„ ๐•… ๐•† ๐•‡ ๐•ˆ ๐•‰ ๐•Š ๐•‹ ๐•Œ ๐• ๐•Ž ๐• ๐• ๐•‘ ๐•’ ๐•“ ๐•” ๐•• ๐•– ๐•— ๐•˜ ๐•™ ๐•š ๐•› ๐•œ ๐• ๐•ž ๐•Ÿ ๐•  ๐•ก ๐•ข ๐•ฃ ๐•ค ๐•ฅ ๐•ฆ ๐•ง ๐•จ ๐•ฉ ๐•ช ๐•ซ ๐•ฌ ๐•ญ ๐•ฎ ๐•ฏ diff --git a/sample_texts/und-Ahom_chars.txt b/sample_texts/und-Ahom_chars.txt new file mode 100644 index 00000000..81862596 --- /dev/null +++ b/sample_texts/und-Ahom_chars.txt @@ -0,0 +1 @@ +๐‘œ€ ๐‘œ ๐‘œ‚ ๐‘œƒ ๐‘œ„ ๐‘œ… ๐‘œ† ๐‘œ‡ ๐‘œˆ ๐‘œ‰ ๐‘œŠ ๐‘œ‹ ๐‘œŒ ๐‘œ ๐‘œŽ ๐‘œ ๐‘œ ๐‘œ‘ ๐‘œ’ ๐‘œ“ ๐‘œ” ๐‘œ• ๐‘œ– ๐‘œ— ๐‘œ˜ ๐‘œ™ ๐‘œš ๐‘œ› ๐‘œœ ๐‘œ ๐‘œž ๐‘œŸ ๐‘œ  ๐‘œก ๐‘œข ๐‘œฃ ๐‘œค ๐‘œฅ ๐‘œฆ ๐‘œง ๐‘œจ ๐‘œฉ ๐‘œช ๐‘œซ ๐‘œฌ ๐‘œญ ๐‘œฎ ๐‘œฏ ๐‘œฐ ๐‘œฑ ๐‘œฒ ๐‘œณ ๐‘œด ๐‘œต ๐‘œถ ๐‘œท ๐‘œธ ๐‘œน ๐‘œบ ๐‘œป ๐‘œผ ๐‘œฝ ๐‘œพ ๐‘œฟ diff --git a/sample_texts/und-Bhks_chars.txt b/sample_texts/und-Bhks_chars.txt new file mode 100644 index 00000000..dd6917dc --- /dev/null +++ b/sample_texts/und-Bhks_chars.txt @@ -0,0 +1 @@ +๐‘ฐ€ ๐‘ฐ ๐‘ฐ‚ ๐‘ฐƒ ๐‘ฐ„ ๐‘ฐ… ๐‘ฐ† ๐‘ฐ‡ ๐‘ฐˆ ๐‘ฐ‰ ๐‘ฐŠ ๐‘ฐ‹ ๐‘ฐŒ ๐‘ฐ ๐‘ฐŽ ๐‘ฐ ๐‘ฐ ๐‘ฐ‘ ๐‘ฐ’ ๐‘ฐ“ ๐‘ฐ” ๐‘ฐ• ๐‘ฐ– ๐‘ฐ— ๐‘ฐ˜ ๐‘ฐ™ ๐‘ฐš ๐‘ฐ› ๐‘ฐœ ๐‘ฐ ๐‘ฐž ๐‘ฐŸ ๐‘ฐ  ๐‘ฐก ๐‘ฐข ๐‘ฐฃ ๐‘ฐค ๐‘ฐฅ ๐‘ฐฆ ๐‘ฐง ๐‘ฐจ ๐‘ฐฉ ๐‘ฐช ๐‘ฐซ ๐‘ฐฌ ๐‘ฐญ ๐‘ฐฎ ๐‘ฐฏ ๐‘ฐฐ ๐‘ฐฑ ๐‘ฐฒ ๐‘ฐณ ๐‘ฐด ๐‘ฐต ๐‘ฐถ ๐‘ฐท ๐‘ฐธ ๐‘ฐน ๐‘ฐบ ๐‘ฐป ๐‘ฐผ ๐‘ฐฝ ๐‘ฐพ ๐‘ฐฟ ๐‘ฑ€ ๐‘ฑ ๐‘ฑ‚ ๐‘ฑƒ ๐‘ฑ„ ๐‘ฑ… ๐‘ฑ† ๐‘ฑ‡ ๐‘ฑˆ ๐‘ฑ‰ ๐‘ฑŠ ๐‘ฑ‹ ๐‘ฑŒ ๐‘ฑ ๐‘ฑŽ ๐‘ฑ ๐‘ฑ ๐‘ฑ‘ ๐‘ฑ’ ๐‘ฑ“ ๐‘ฑ” ๐‘ฑ• ๐‘ฑ– ๐‘ฑ— ๐‘ฑ˜ ๐‘ฑ™ ๐‘ฑš ๐‘ฑ› ๐‘ฑœ ๐‘ฑ ๐‘ฑž ๐‘ฑŸ ๐‘ฑ  ๐‘ฑก ๐‘ฑข ๐‘ฑฃ ๐‘ฑค ๐‘ฑฅ ๐‘ฑฆ ๐‘ฑง ๐‘ฑจ ๐‘ฑฉ ๐‘ฑช ๐‘ฑซ ๐‘ฑฌ ๐‘ฑญ ๐‘ฑฎ ๐‘ฑฏ diff --git a/sample_texts/und-Dupl_chars.txt b/sample_texts/und-Dupl_chars.txt new file mode 100644 index 00000000..d4c1e64a --- /dev/null +++ b/sample_texts/und-Dupl_chars.txt @@ -0,0 +1 @@ +๐›ฐ€ ๐›ฐ ๐›ฐ‚ ๐›ฐƒ ๐›ฐ„ ๐›ฐ… ๐›ฐ† ๐›ฐ‡ ๐›ฐˆ ๐›ฐ‰ ๐›ฐŠ ๐›ฐ‹ ๐›ฐŒ ๐›ฐ ๐›ฐŽ ๐›ฐ ๐›ฐ ๐›ฐ‘ ๐›ฐ’ ๐›ฐ“ ๐›ฐ” ๐›ฐ• ๐›ฐ– ๐›ฐ— ๐›ฐ˜ ๐›ฐ™ ๐›ฐš ๐›ฐ› ๐›ฐœ ๐›ฐ ๐›ฐž ๐›ฐŸ ๐›ฐ  ๐›ฐก ๐›ฐข ๐›ฐฃ ๐›ฐค ๐›ฐฅ ๐›ฐฆ ๐›ฐง ๐›ฐจ ๐›ฐฉ ๐›ฐช ๐›ฐซ ๐›ฐฌ ๐›ฐญ ๐›ฐฎ ๐›ฐฏ ๐›ฐฐ ๐›ฐฑ ๐›ฐฒ ๐›ฐณ ๐›ฐด ๐›ฐต ๐›ฐถ ๐›ฐท ๐›ฐธ ๐›ฐน ๐›ฐบ ๐›ฐป ๐›ฐผ ๐›ฐฝ ๐›ฐพ ๐›ฐฟ ๐›ฑ€ ๐›ฑ ๐›ฑ‚ ๐›ฑƒ ๐›ฑ„ ๐›ฑ… ๐›ฑ† ๐›ฑ‡ ๐›ฑˆ ๐›ฑ‰ ๐›ฑŠ ๐›ฑ‹ ๐›ฑŒ ๐›ฑ ๐›ฑŽ ๐›ฑ ๐›ฑ ๐›ฑ‘ ๐›ฑ’ ๐›ฑ“ ๐›ฑ” ๐›ฑ• ๐›ฑ– ๐›ฑ— ๐›ฑ˜ ๐›ฑ™ ๐›ฑš ๐›ฑ› ๐›ฑœ ๐›ฑ ๐›ฑž ๐›ฑŸ ๐›ฑ  ๐›ฑก ๐›ฑข ๐›ฑฃ ๐›ฑค ๐›ฑฅ ๐›ฑฆ ๐›ฑง ๐›ฑจ ๐›ฑฉ ๐›ฑช ๐›ฑซ ๐›ฑฌ ๐›ฑญ ๐›ฑฎ ๐›ฑฏ ๐›ฑฐ ๐›ฑฑ ๐›ฑฒ ๐›ฑณ ๐›ฑด ๐›ฑต ๐›ฑถ ๐›ฑท ๐›ฑธ ๐›ฑน ๐›ฑบ ๐›ฑป ๐›ฑผ ๐›ฑฝ ๐›ฑพ ๐›ฑฟ ๐›ฒ€ ๐›ฒ ๐›ฒ‚ ๐›ฒƒ ๐›ฒ„ ๐›ฒ… ๐›ฒ† ๐›ฒ‡ ๐›ฒˆ ๐›ฒ‰ ๐›ฒŠ ๐›ฒ‹ ๐›ฒŒ ๐›ฒ ๐›ฒŽ ๐›ฒ ๐›ฒ ๐›ฒ‘ ๐›ฒ’ ๐›ฒ“ ๐›ฒ” ๐›ฒ• ๐›ฒ– ๐›ฒ— ๐›ฒ˜ ๐›ฒ™ ๐›ฒš ๐›ฒ› ๐›ฒœ ๐›ฒ ๐›ฒž ๐›ฒŸ diff --git a/sample_texts/und-Elba_chars.txt b/sample_texts/und-Elba_chars.txt new file mode 100644 index 00000000..4a3f7bd7 --- /dev/null +++ b/sample_texts/und-Elba_chars.txt @@ -0,0 +1 @@ +๐”€ ๐” ๐”‚ ๐”ƒ ๐”„ ๐”… ๐”† ๐”‡ ๐”ˆ ๐”‰ ๐”Š ๐”‹ ๐”Œ ๐” ๐”Ž ๐” ๐” ๐”‘ ๐”’ ๐”“ ๐”” ๐”• ๐”– ๐”— ๐”˜ ๐”™ ๐”š ๐”› ๐”œ ๐” ๐”ž ๐”Ÿ ๐”  ๐”ก ๐”ข ๐”ฃ ๐”ค ๐”ฅ ๐”ฆ ๐”ง ๐”จ ๐”ฉ ๐”ช ๐”ซ ๐”ฌ ๐”ญ ๐”ฎ ๐”ฏ diff --git a/sample_texts/und-Gran_chars.txt b/sample_texts/und-Gran_chars.txt new file mode 100644 index 00000000..1d373725 --- /dev/null +++ b/sample_texts/und-Gran_chars.txt @@ -0,0 +1 @@ +๐‘Œ€ ๐‘Œ ๐‘Œ‚ ๐‘Œƒ ๐‘Œ„ ๐‘Œ… ๐‘Œ† ๐‘Œ‡ ๐‘Œˆ ๐‘Œ‰ ๐‘ŒŠ ๐‘Œ‹ ๐‘ŒŒ ๐‘Œ ๐‘ŒŽ ๐‘Œ ๐‘Œ ๐‘Œ‘ ๐‘Œ’ ๐‘Œ“ ๐‘Œ” ๐‘Œ• ๐‘Œ– ๐‘Œ— ๐‘Œ˜ ๐‘Œ™ ๐‘Œš ๐‘Œ› ๐‘Œœ ๐‘Œ ๐‘Œž ๐‘ŒŸ ๐‘Œ  ๐‘Œก ๐‘Œข ๐‘Œฃ ๐‘Œค ๐‘Œฅ ๐‘Œฆ ๐‘Œง ๐‘Œจ ๐‘Œฉ ๐‘Œช ๐‘Œซ ๐‘Œฌ ๐‘Œญ ๐‘Œฎ ๐‘Œฏ ๐‘Œฐ ๐‘Œฑ ๐‘Œฒ ๐‘Œณ ๐‘Œด ๐‘Œต ๐‘Œถ ๐‘Œท ๐‘Œธ ๐‘Œน ๐‘Œบ ๐‘Œป ๐‘Œผ ๐‘Œฝ ๐‘Œพ ๐‘Œฟ ๐‘€ ๐‘ ๐‘‚ ๐‘ƒ ๐‘„ ๐‘… ๐‘† ๐‘‡ ๐‘ˆ ๐‘‰ ๐‘Š ๐‘‹ ๐‘Œ ๐‘ ๐‘Ž ๐‘ ๐‘ ๐‘‘ ๐‘’ ๐‘“ ๐‘” ๐‘• ๐‘– ๐‘— ๐‘˜ ๐‘™ ๐‘š ๐‘› ๐‘œ ๐‘ ๐‘ž ๐‘Ÿ ๐‘  ๐‘ก ๐‘ข ๐‘ฃ ๐‘ค ๐‘ฅ ๐‘ฆ ๐‘ง ๐‘จ ๐‘ฉ ๐‘ช ๐‘ซ ๐‘ฌ ๐‘ญ ๐‘ฎ ๐‘ฏ ๐‘ฐ ๐‘ฑ ๐‘ฒ ๐‘ณ ๐‘ด ๐‘ต ๐‘ถ ๐‘ท ๐‘ธ ๐‘น ๐‘บ ๐‘ป ๐‘ผ ๐‘ฝ ๐‘พ ๐‘ฟ diff --git a/sample_texts/und-Hatr_chars.txt b/sample_texts/und-Hatr_chars.txt new file mode 100644 index 00000000..1da514f2 --- /dev/null +++ b/sample_texts/und-Hatr_chars.txt @@ -0,0 +1 @@ +๐ฃ  ๐ฃก ๐ฃข ๐ฃฃ ๐ฃค ๐ฃฅ ๐ฃฆ ๐ฃง ๐ฃจ ๐ฃฉ ๐ฃช ๐ฃซ ๐ฃฌ ๐ฃญ ๐ฃฎ ๐ฃฏ ๐ฃฐ ๐ฃฑ ๐ฃฒ ๐ฃณ ๐ฃด ๐ฃต ๐ฃถ ๐ฃท ๐ฃธ ๐ฃน ๐ฃบ ๐ฃป ๐ฃผ ๐ฃฝ ๐ฃพ ๐ฃฟ diff --git a/sample_texts/und-Hmng_chars.txt b/sample_texts/und-Hmng_chars.txt new file mode 100644 index 00000000..7a9441f2 --- /dev/null +++ b/sample_texts/und-Hmng_chars.txt @@ -0,0 +1 @@ +๐–ฌ€ ๐–ฌ ๐–ฌ‚ ๐–ฌƒ ๐–ฌ„ ๐–ฌ… ๐–ฌ† ๐–ฌ‡ ๐–ฌˆ ๐–ฌ‰ ๐–ฌŠ ๐–ฌ‹ ๐–ฌŒ ๐–ฌ ๐–ฌŽ ๐–ฌ ๐–ฌ ๐–ฌ‘ ๐–ฌ’ ๐–ฌ“ ๐–ฌ” ๐–ฌ• ๐–ฌ– ๐–ฌ— ๐–ฌ˜ ๐–ฌ™ ๐–ฌš ๐–ฌ› ๐–ฌœ ๐–ฌ ๐–ฌž ๐–ฌŸ ๐–ฌ  ๐–ฌก ๐–ฌข ๐–ฌฃ ๐–ฌค ๐–ฌฅ ๐–ฌฆ ๐–ฌง ๐–ฌจ ๐–ฌฉ ๐–ฌช ๐–ฌซ ๐–ฌฌ ๐–ฌญ ๐–ฌฎ ๐–ฌฏ ๐–ฌฐ ๐–ฌฑ ๐–ฌฒ ๐–ฌณ ๐–ฌด ๐–ฌต ๐–ฌถ ๐–ฌท ๐–ฌธ ๐–ฌน ๐–ฌบ ๐–ฌป ๐–ฌผ ๐–ฌฝ ๐–ฌพ ๐–ฌฟ ๐–ญ€ ๐–ญ ๐–ญ‚ ๐–ญƒ ๐–ญ„ ๐–ญ… ๐–ญ† ๐–ญ‡ ๐–ญˆ ๐–ญ‰ ๐–ญŠ ๐–ญ‹ ๐–ญŒ ๐–ญ ๐–ญŽ ๐–ญ ๐–ญ ๐–ญ‘ ๐–ญ’ ๐–ญ“ ๐–ญ” ๐–ญ• ๐–ญ– ๐–ญ— ๐–ญ˜ ๐–ญ™ ๐–ญš ๐–ญ› ๐–ญœ ๐–ญ ๐–ญž ๐–ญŸ ๐–ญ  ๐–ญก ๐–ญข ๐–ญฃ ๐–ญค ๐–ญฅ ๐–ญฆ ๐–ญง ๐–ญจ ๐–ญฉ ๐–ญช ๐–ญซ ๐–ญฌ ๐–ญญ ๐–ญฎ ๐–ญฏ ๐–ญฐ ๐–ญฑ ๐–ญฒ ๐–ญณ ๐–ญด ๐–ญต ๐–ญถ ๐–ญท ๐–ญธ ๐–ญน ๐–ญบ ๐–ญป ๐–ญผ ๐–ญฝ ๐–ญพ ๐–ญฟ ๐–ฎ€ ๐–ฎ ๐–ฎ‚ ๐–ฎƒ ๐–ฎ„ ๐–ฎ… ๐–ฎ† ๐–ฎ‡ ๐–ฎˆ ๐–ฎ‰ ๐–ฎŠ ๐–ฎ‹ ๐–ฎŒ ๐–ฎ ๐–ฎŽ ๐–ฎ diff --git a/sample_texts/und-Hung_chars.txt b/sample_texts/und-Hung_chars.txt new file mode 100644 index 00000000..5c5e6227 --- /dev/null +++ b/sample_texts/und-Hung_chars.txt @@ -0,0 +1 @@ +๐ฒ€ ๐ฒ ๐ฒ‚ ๐ฒƒ ๐ฒ„ ๐ฒ… ๐ฒ† ๐ฒ‡ ๐ฒˆ ๐ฒ‰ ๐ฒŠ ๐ฒ‹ ๐ฒŒ ๐ฒ ๐ฒŽ ๐ฒ ๐ฒ ๐ฒ‘ ๐ฒ’ ๐ฒ“ ๐ฒ” ๐ฒ• ๐ฒ– ๐ฒ— ๐ฒ˜ ๐ฒ™ ๐ฒš ๐ฒ› ๐ฒœ ๐ฒ ๐ฒž ๐ฒŸ ๐ฒ  ๐ฒก ๐ฒข ๐ฒฃ ๐ฒค ๐ฒฅ ๐ฒฆ ๐ฒง ๐ฒจ ๐ฒฉ ๐ฒช ๐ฒซ ๐ฒฌ ๐ฒญ ๐ฒฎ ๐ฒฏ ๐ฒฐ ๐ฒฑ ๐ฒฒ ๐ฒณ ๐ฒด ๐ฒต ๐ฒถ ๐ฒท ๐ฒธ ๐ฒน ๐ฒบ ๐ฒป ๐ฒผ ๐ฒฝ ๐ฒพ ๐ฒฟ ๐ณ€ ๐ณ ๐ณ‚ ๐ณƒ ๐ณ„ ๐ณ… ๐ณ† ๐ณ‡ ๐ณˆ ๐ณ‰ ๐ณŠ ๐ณ‹ ๐ณŒ ๐ณ ๐ณŽ ๐ณ ๐ณ ๐ณ‘ ๐ณ’ ๐ณ“ ๐ณ” ๐ณ• ๐ณ– ๐ณ— ๐ณ˜ ๐ณ™ ๐ณš ๐ณ› ๐ณœ ๐ณ ๐ณž ๐ณŸ ๐ณ  ๐ณก ๐ณข ๐ณฃ ๐ณค ๐ณฅ ๐ณฆ ๐ณง ๐ณจ ๐ณฉ ๐ณช ๐ณซ ๐ณฌ ๐ณญ ๐ณฎ ๐ณฏ ๐ณฐ ๐ณฑ ๐ณฒ ๐ณณ ๐ณด ๐ณต ๐ณถ ๐ณท ๐ณธ ๐ณน ๐ณบ ๐ณป ๐ณผ ๐ณฝ ๐ณพ ๐ณฟ diff --git a/sample_texts/und-Lina_chars.txt b/sample_texts/und-Lina_chars.txt new file mode 100644 index 00000000..efaf7803 --- /dev/null +++ b/sample_texts/und-Lina_chars.txt @@ -0,0 +1 @@ +๐˜€ ๐˜ ๐˜‚ ๐˜ƒ ๐˜„ ๐˜… ๐˜† ๐˜‡ ๐˜ˆ ๐˜‰ ๐˜Š ๐˜‹ ๐˜Œ ๐˜ ๐˜Ž ๐˜ ๐˜ ๐˜‘ ๐˜’ ๐˜“ ๐˜” ๐˜• ๐˜– ๐˜— ๐˜˜ ๐˜™ ๐˜š ๐˜› ๐˜œ ๐˜ ๐˜ž ๐˜Ÿ ๐˜  ๐˜ก ๐˜ข ๐˜ฃ ๐˜ค ๐˜ฅ ๐˜ฆ ๐˜ง ๐˜จ ๐˜ฉ ๐˜ช ๐˜ซ ๐˜ฌ ๐˜ญ ๐˜ฎ ๐˜ฏ ๐˜ฐ ๐˜ฑ ๐˜ฒ ๐˜ณ ๐˜ด ๐˜ต ๐˜ถ ๐˜ท ๐˜ธ ๐˜น ๐˜บ ๐˜ป ๐˜ผ ๐˜ฝ ๐˜พ ๐˜ฟ ๐™€ ๐™ ๐™‚ ๐™ƒ ๐™„ ๐™… ๐™† ๐™‡ ๐™ˆ ๐™‰ ๐™Š ๐™‹ ๐™Œ ๐™ ๐™Ž ๐™ ๐™ ๐™‘ ๐™’ ๐™“ ๐™” ๐™• ๐™– ๐™— ๐™˜ ๐™™ ๐™š ๐™› ๐™œ ๐™ ๐™ž ๐™Ÿ ๐™  ๐™ก ๐™ข ๐™ฃ ๐™ค ๐™ฅ ๐™ฆ ๐™ง ๐™จ ๐™ฉ ๐™ช ๐™ซ ๐™ฌ ๐™ญ ๐™ฎ ๐™ฏ ๐™ฐ ๐™ฑ ๐™ฒ ๐™ณ ๐™ด ๐™ต ๐™ถ ๐™ท ๐™ธ ๐™น ๐™บ ๐™ป ๐™ผ ๐™ฝ ๐™พ ๐™ฟ ๐š€ ๐š ๐š‚ ๐šƒ ๐š„ ๐š… ๐š† ๐š‡ ๐šˆ ๐š‰ ๐šŠ ๐š‹ ๐šŒ ๐š ๐šŽ ๐š ๐š ๐š‘ ๐š’ ๐š“ ๐š” ๐š• ๐š– ๐š— ๐š˜ ๐š™ ๐šš ๐š› ๐šœ ๐š ๐šž ๐šŸ ๐š  ๐šก ๐šข ๐šฃ ๐šค ๐šฅ ๐šฆ ๐šง ๐šจ ๐šฉ ๐šช ๐šซ ๐šฌ ๐šญ ๐šฎ ๐šฏ ๐šฐ ๐šฑ ๐šฒ ๐šณ ๐šด ๐šต ๐šถ ๐šท ๐šธ ๐šน ๐šบ ๐šป ๐šผ ๐šฝ ๐šพ ๐šฟ ๐›€ ๐› ๐›‚ ๐›ƒ ๐›„ ๐›… ๐›† ๐›‡ ๐›ˆ ๐›‰ ๐›Š ๐›‹ ๐›Œ ๐› ๐›Ž ๐› ๐› ๐›‘ ๐›’ ๐›“ ๐›” ๐›• ๐›– ๐›— ๐›˜ ๐›™ ๐›š ๐›› ๐›œ ๐› ๐›ž ๐›Ÿ ๐›  ๐›ก ๐›ข ๐›ฃ ๐›ค ๐›ฅ ๐›ฆ ๐›ง ๐›จ ๐›ฉ ๐›ช ๐›ซ ๐›ฌ ๐›ญ ๐›ฎ ๐›ฏ ๐›ฐ ๐›ฑ ๐›ฒ ๐›ณ ๐›ด ๐›ต ๐›ถ ๐›ท ๐›ธ ๐›น ๐›บ ๐›ป ๐›ผ ๐›ฝ ๐›พ ๐›ฟ ๐œ€ ๐œ ๐œ‚ ๐œƒ ๐œ„ ๐œ… ๐œ† ๐œ‡ ๐œˆ ๐œ‰ ๐œŠ ๐œ‹ ๐œŒ ๐œ ๐œŽ ๐œ ๐œ ๐œ‘ ๐œ’ ๐œ“ ๐œ” ๐œ• ๐œ– ๐œ— ๐œ˜ ๐œ™ ๐œš ๐œ› ๐œœ ๐œ ๐œž ๐œŸ ๐œ  ๐œก ๐œข ๐œฃ ๐œค ๐œฅ ๐œฆ ๐œง ๐œจ ๐œฉ ๐œช ๐œซ ๐œฌ ๐œญ ๐œฎ ๐œฏ ๐œฐ ๐œฑ ๐œฒ ๐œณ ๐œด ๐œต ๐œถ ๐œท ๐œธ ๐œน ๐œบ ๐œป ๐œผ ๐œฝ ๐œพ ๐œฟ ๐€ ๐ ๐‚ ๐ƒ ๐„ ๐… ๐† ๐‡ ๐ˆ ๐‰ ๐Š ๐‹ ๐Œ ๐ ๐Ž ๐ ๐ ๐‘ ๐’ ๐“ ๐” ๐• ๐– ๐— ๐˜ ๐™ ๐š ๐› ๐œ ๐ ๐ž ๐Ÿ ๐  ๐ก ๐ข ๐ฃ ๐ค ๐ฅ ๐ฆ ๐ง ๐จ ๐ฉ ๐ช ๐ซ ๐ฌ ๐ญ ๐ฎ ๐ฏ ๐ฐ ๐ฑ ๐ฒ ๐ณ ๐ด ๐ต ๐ถ ๐ท ๐ธ ๐น ๐บ ๐ป ๐ผ ๐ฝ ๐พ ๐ฟ diff --git a/sample_texts/und-Mahj_chars.txt b/sample_texts/und-Mahj_chars.txt new file mode 100644 index 00000000..06f05d34 --- /dev/null +++ b/sample_texts/und-Mahj_chars.txt @@ -0,0 +1 @@ +๐‘… ๐‘…‘ ๐‘…’ ๐‘…“ ๐‘…” ๐‘…• ๐‘…– ๐‘…— ๐‘…˜ ๐‘…™ ๐‘…š ๐‘…› ๐‘…œ ๐‘… ๐‘…ž ๐‘…Ÿ ๐‘…  ๐‘…ก ๐‘…ข ๐‘…ฃ ๐‘…ค ๐‘…ฅ ๐‘…ฆ ๐‘…ง ๐‘…จ ๐‘…ฉ ๐‘…ช ๐‘…ซ ๐‘…ฌ ๐‘…ญ ๐‘…ฎ ๐‘…ฏ ๐‘…ฐ ๐‘…ฑ ๐‘…ฒ ๐‘…ณ ๐‘…ด ๐‘…ต ๐‘…ถ ๐‘…ท ๐‘…ธ ๐‘…น ๐‘…บ ๐‘…ป ๐‘…ผ ๐‘…ฝ ๐‘…พ ๐‘…ฟ diff --git a/sample_texts/und-Mani_chars.txt b/sample_texts/und-Mani_chars.txt new file mode 100644 index 00000000..828cdc8a --- /dev/null +++ b/sample_texts/und-Mani_chars.txt @@ -0,0 +1 @@ +๐ซ€ ๐ซ ๐ซ‚ ๐ซƒ ๐ซ„ ๐ซ… ๐ซ† ๐ซ‡ ๐ซˆ ๐ซ‰ ๐ซŠ ๐ซ‹ ๐ซŒ ๐ซ ๐ซŽ ๐ซ ๐ซ ๐ซ‘ ๐ซ’ ๐ซ“ ๐ซ” ๐ซ• ๐ซ– ๐ซ— ๐ซ˜ ๐ซ™ ๐ซš ๐ซ› ๐ซœ ๐ซ ๐ซž ๐ซŸ ๐ซ  ๐ซก ๐ซข ๐ซฃ ๐ซค ๐ซฅ ๐ซฆ ๐ซง ๐ซจ ๐ซฉ ๐ซช ๐ซซ ๐ซฌ ๐ซญ ๐ซฎ ๐ซฏ ๐ซฐ ๐ซฑ ๐ซฒ ๐ซณ ๐ซด ๐ซต ๐ซถ ๐ซท ๐ซธ ๐ซน ๐ซบ ๐ซป ๐ซผ ๐ซฝ ๐ซพ ๐ซฟ diff --git a/sample_texts/und-Marc_chars.txt b/sample_texts/und-Marc_chars.txt new file mode 100644 index 00000000..89f36901 --- /dev/null +++ b/sample_texts/und-Marc_chars.txt @@ -0,0 +1 @@ +๐‘ฑฐ ๐‘ฑฑ ๐‘ฑฒ ๐‘ฑณ ๐‘ฑด ๐‘ฑต ๐‘ฑถ ๐‘ฑท ๐‘ฑธ ๐‘ฑน ๐‘ฑบ ๐‘ฑป ๐‘ฑผ ๐‘ฑฝ ๐‘ฑพ ๐‘ฑฟ ๐‘ฒ€ ๐‘ฒ ๐‘ฒ‚ ๐‘ฒƒ ๐‘ฒ„ ๐‘ฒ… ๐‘ฒ† ๐‘ฒ‡ ๐‘ฒˆ ๐‘ฒ‰ ๐‘ฒŠ ๐‘ฒ‹ ๐‘ฒŒ ๐‘ฒ ๐‘ฒŽ ๐‘ฒ ๐‘ฒ ๐‘ฒ‘ ๐‘ฒ’ ๐‘ฒ“ ๐‘ฒ” ๐‘ฒ• ๐‘ฒ– ๐‘ฒ— ๐‘ฒ˜ ๐‘ฒ™ ๐‘ฒš ๐‘ฒ› ๐‘ฒœ ๐‘ฒ ๐‘ฒž ๐‘ฒŸ ๐‘ฒ  ๐‘ฒก ๐‘ฒข ๐‘ฒฃ ๐‘ฒค ๐‘ฒฅ ๐‘ฒฆ ๐‘ฒง ๐‘ฒจ ๐‘ฒฉ ๐‘ฒช ๐‘ฒซ ๐‘ฒฌ ๐‘ฒญ ๐‘ฒฎ ๐‘ฒฏ ๐‘ฒฐ ๐‘ฒฑ ๐‘ฒฒ ๐‘ฒณ ๐‘ฒด ๐‘ฒต ๐‘ฒถ ๐‘ฒท ๐‘ฒธ ๐‘ฒน ๐‘ฒบ ๐‘ฒป ๐‘ฒผ ๐‘ฒฝ ๐‘ฒพ ๐‘ฒฟ diff --git a/sample_texts/und-Mend_chars.txt b/sample_texts/und-Mend_chars.txt new file mode 100644 index 00000000..f939f11e --- /dev/null +++ b/sample_texts/und-Mend_chars.txt @@ -0,0 +1 @@ +๐ž € ๐ž  ๐ž ‚ ๐ž ƒ ๐ž „ ๐ž … ๐ž † ๐ž ‡ ๐ž ˆ ๐ž ‰ ๐ž Š ๐ž ‹ ๐ž Œ ๐ž  ๐ž Ž ๐ž  ๐ž  ๐ž ‘ ๐ž ’ ๐ž “ ๐ž ” ๐ž • ๐ž – ๐ž — ๐ž ˜ ๐ž ™ ๐ž š ๐ž › ๐ž œ ๐ž  ๐ž ž ๐ž Ÿ ๐ž   ๐ž ก ๐ž ข ๐ž ฃ ๐ž ค ๐ž ฅ ๐ž ฆ ๐ž ง ๐ž จ ๐ž ฉ ๐ž ช ๐ž ซ ๐ž ฌ ๐ž ญ ๐ž ฎ ๐ž ฏ ๐ž ฐ ๐ž ฑ ๐ž ฒ ๐ž ณ ๐ž ด ๐ž ต ๐ž ถ ๐ž ท ๐ž ธ ๐ž น ๐ž บ ๐ž ป ๐ž ผ ๐ž ฝ ๐ž พ ๐ž ฟ ๐žก€ ๐žก ๐žก‚ ๐žกƒ ๐žก„ ๐žก… ๐žก† ๐žก‡ ๐žกˆ ๐žก‰ ๐žกŠ ๐žก‹ ๐žกŒ ๐žก ๐žกŽ ๐žก ๐žก ๐žก‘ ๐žก’ ๐žก“ ๐žก” ๐žก• ๐žก– ๐žก— ๐žก˜ ๐žก™ ๐žกš ๐žก› ๐žกœ ๐žก ๐žกž ๐žกŸ ๐žก  ๐žกก ๐žกข ๐žกฃ ๐žกค ๐žกฅ ๐žกฆ ๐žกง ๐žกจ ๐žกฉ ๐žกช ๐žกซ ๐žกฌ ๐žกญ ๐žกฎ ๐žกฏ ๐žกฐ ๐žกฑ ๐žกฒ ๐žกณ ๐žกด ๐žกต ๐žกถ ๐žกท ๐žกธ ๐žกน ๐žกบ ๐žกป ๐žกผ ๐žกฝ ๐žกพ ๐žกฟ ๐žข€ ๐žข ๐žข‚ ๐žขƒ ๐žข„ ๐žข… ๐žข† ๐žข‡ ๐žขˆ ๐žข‰ ๐žขŠ ๐žข‹ ๐žขŒ ๐žข ๐žขŽ ๐žข ๐žข ๐žข‘ ๐žข’ ๐žข“ ๐žข” ๐žข• ๐žข– ๐žข— ๐žข˜ ๐žข™ ๐žขš ๐žข› ๐žขœ ๐žข ๐žขž ๐žขŸ ๐žข  ๐žขก ๐žขข ๐žขฃ ๐žขค ๐žขฅ ๐žขฆ ๐žขง ๐žขจ ๐žขฉ ๐žขช ๐žขซ ๐žขฌ ๐žขญ ๐žขฎ ๐žขฏ ๐žขฐ ๐žขฑ ๐žขฒ ๐žขณ ๐žขด ๐žขต ๐žขถ ๐žขท ๐žขธ ๐žขน ๐žขบ ๐žขป ๐žขผ ๐žขฝ ๐žขพ ๐žขฟ ๐žฃ€ ๐žฃ ๐žฃ‚ ๐žฃƒ ๐žฃ„ ๐žฃ… ๐žฃ† ๐žฃ‡ ๐žฃˆ ๐žฃ‰ ๐žฃŠ ๐žฃ‹ ๐žฃŒ ๐žฃ ๐žฃŽ ๐žฃ ๐žฃ ๐žฃ‘ ๐žฃ’ ๐žฃ“ ๐žฃ” ๐žฃ• ๐žฃ– ๐žฃ— ๐žฃ˜ ๐žฃ™ ๐žฃš ๐žฃ› ๐žฃœ ๐žฃ ๐žฃž ๐žฃŸ diff --git a/sample_texts/und-Modi_chars.txt b/sample_texts/und-Modi_chars.txt new file mode 100644 index 00000000..f747a4ff --- /dev/null +++ b/sample_texts/und-Modi_chars.txt @@ -0,0 +1 @@ +๐‘˜€ ๐‘˜ ๐‘˜‚ ๐‘˜ƒ ๐‘˜„ ๐‘˜… ๐‘˜† ๐‘˜‡ ๐‘˜ˆ ๐‘˜‰ ๐‘˜Š ๐‘˜‹ ๐‘˜Œ ๐‘˜ ๐‘˜Ž ๐‘˜ ๐‘˜ ๐‘˜‘ ๐‘˜’ ๐‘˜“ ๐‘˜” ๐‘˜• ๐‘˜– ๐‘˜— ๐‘˜˜ ๐‘˜™ ๐‘˜š ๐‘˜› ๐‘˜œ ๐‘˜ ๐‘˜ž ๐‘˜Ÿ ๐‘˜  ๐‘˜ก ๐‘˜ข ๐‘˜ฃ ๐‘˜ค ๐‘˜ฅ ๐‘˜ฆ ๐‘˜ง ๐‘˜จ ๐‘˜ฉ ๐‘˜ช ๐‘˜ซ ๐‘˜ฌ ๐‘˜ญ ๐‘˜ฎ ๐‘˜ฏ ๐‘˜ฐ ๐‘˜ฑ ๐‘˜ฒ ๐‘˜ณ ๐‘˜ด ๐‘˜ต ๐‘˜ถ ๐‘˜ท ๐‘˜ธ ๐‘˜น ๐‘˜บ ๐‘˜ป ๐‘˜ผ ๐‘˜ฝ ๐‘˜พ ๐‘˜ฟ ๐‘™€ ๐‘™ ๐‘™‚ ๐‘™ƒ ๐‘™„ ๐‘™… ๐‘™† ๐‘™‡ ๐‘™ˆ ๐‘™‰ ๐‘™Š ๐‘™‹ ๐‘™Œ ๐‘™ ๐‘™Ž ๐‘™ ๐‘™ ๐‘™‘ ๐‘™’ ๐‘™“ ๐‘™” ๐‘™• ๐‘™– ๐‘™— ๐‘™˜ ๐‘™™ ๐‘™š ๐‘™› ๐‘™œ ๐‘™ ๐‘™ž ๐‘™Ÿ diff --git a/sample_texts/und-Mroo_chars.txt b/sample_texts/und-Mroo_chars.txt new file mode 100644 index 00000000..82d19023 --- /dev/null +++ b/sample_texts/und-Mroo_chars.txt @@ -0,0 +1 @@ +๐–ฉ€ ๐–ฉ ๐–ฉ‚ ๐–ฉƒ ๐–ฉ„ ๐–ฉ… ๐–ฉ† ๐–ฉ‡ ๐–ฉˆ ๐–ฉ‰ ๐–ฉŠ ๐–ฉ‹ ๐–ฉŒ ๐–ฉ ๐–ฉŽ ๐–ฉ ๐–ฉ ๐–ฉ‘ ๐–ฉ’ ๐–ฉ“ ๐–ฉ” ๐–ฉ• ๐–ฉ– ๐–ฉ— ๐–ฉ˜ ๐–ฉ™ ๐–ฉš ๐–ฉ› ๐–ฉœ ๐–ฉ ๐–ฉž ๐–ฉŸ ๐–ฉ  ๐–ฉก ๐–ฉข ๐–ฉฃ ๐–ฉค ๐–ฉฅ ๐–ฉฆ ๐–ฉง ๐–ฉจ ๐–ฉฉ ๐–ฉช ๐–ฉซ ๐–ฉฌ ๐–ฉญ ๐–ฉฎ ๐–ฉฏ diff --git a/sample_texts/und-Mult_chars.txt b/sample_texts/und-Mult_chars.txt new file mode 100644 index 00000000..7d4619f4 --- /dev/null +++ b/sample_texts/und-Mult_chars.txt @@ -0,0 +1 @@ +๐‘Š€ ๐‘Š ๐‘Š‚ ๐‘Šƒ ๐‘Š„ ๐‘Š… ๐‘Š† ๐‘Š‡ ๐‘Šˆ ๐‘Š‰ ๐‘ŠŠ ๐‘Š‹ ๐‘ŠŒ ๐‘Š ๐‘ŠŽ ๐‘Š ๐‘Š ๐‘Š‘ ๐‘Š’ ๐‘Š“ ๐‘Š” ๐‘Š• ๐‘Š– ๐‘Š— ๐‘Š˜ ๐‘Š™ ๐‘Šš ๐‘Š› ๐‘Šœ ๐‘Š ๐‘Šž ๐‘ŠŸ ๐‘Š  ๐‘Šก ๐‘Šข ๐‘Šฃ ๐‘Šค ๐‘Šฅ ๐‘Šฆ ๐‘Šง ๐‘Šจ ๐‘Šฉ ๐‘Šช ๐‘Šซ ๐‘Šฌ ๐‘Šญ ๐‘Šฎ ๐‘Šฏ diff --git a/sample_texts/und-Narb_chars.txt b/sample_texts/und-Narb_chars.txt new file mode 100644 index 00000000..91f6ec30 --- /dev/null +++ b/sample_texts/und-Narb_chars.txt @@ -0,0 +1 @@ +๐ช€ ๐ช ๐ช‚ ๐ชƒ ๐ช„ ๐ช… ๐ช† ๐ช‡ ๐ชˆ ๐ช‰ ๐ชŠ ๐ช‹ ๐ชŒ ๐ช ๐ชŽ ๐ช ๐ช ๐ช‘ ๐ช’ ๐ช“ ๐ช” ๐ช• ๐ช– ๐ช— ๐ช˜ ๐ช™ ๐ชš ๐ช› ๐ชœ ๐ช ๐ชž ๐ชŸ diff --git a/sample_texts/und-Nbat_chars.txt b/sample_texts/und-Nbat_chars.txt new file mode 100644 index 00000000..f917d99a --- /dev/null +++ b/sample_texts/und-Nbat_chars.txt @@ -0,0 +1 @@ +๐ข€ ๐ข ๐ข‚ ๐ขƒ ๐ข„ ๐ข… ๐ข† ๐ข‡ ๐ขˆ ๐ข‰ ๐ขŠ ๐ข‹ ๐ขŒ ๐ข ๐ขŽ ๐ข ๐ข ๐ข‘ ๐ข’ ๐ข“ ๐ข” ๐ข• ๐ข– ๐ข— ๐ข˜ ๐ข™ ๐ขš ๐ข› ๐ขœ ๐ข ๐ขž ๐ขŸ ๐ข  ๐ขก ๐ขข ๐ขฃ ๐ขค ๐ขฅ ๐ขฆ ๐ขง ๐ขจ ๐ขฉ ๐ขช ๐ขซ ๐ขฌ ๐ขญ ๐ขฎ ๐ขฏ diff --git a/sample_texts/und-Newa_chars.txt b/sample_texts/und-Newa_chars.txt new file mode 100644 index 00000000..e81353fa --- /dev/null +++ b/sample_texts/und-Newa_chars.txt @@ -0,0 +1 @@ +๐‘€ ๐‘ ๐‘‚ ๐‘ƒ ๐‘„ ๐‘… ๐‘† ๐‘‡ ๐‘ˆ ๐‘‰ ๐‘Š ๐‘‹ ๐‘Œ ๐‘ ๐‘Ž ๐‘ ๐‘ ๐‘‘ ๐‘’ ๐‘“ ๐‘” ๐‘• ๐‘– ๐‘— ๐‘˜ ๐‘™ ๐‘š ๐‘› ๐‘œ ๐‘ ๐‘ž ๐‘Ÿ ๐‘  ๐‘ก ๐‘ข ๐‘ฃ ๐‘ค ๐‘ฅ ๐‘ฆ ๐‘ง ๐‘จ ๐‘ฉ ๐‘ช ๐‘ซ ๐‘ฌ ๐‘ญ ๐‘ฎ ๐‘ฏ ๐‘ฐ ๐‘ฑ ๐‘ฒ ๐‘ณ ๐‘ด ๐‘ต ๐‘ถ ๐‘ท ๐‘ธ ๐‘น ๐‘บ ๐‘ป ๐‘ผ ๐‘ฝ ๐‘พ ๐‘ฟ ๐‘‘€ ๐‘‘ ๐‘‘‚ ๐‘‘ƒ ๐‘‘„ ๐‘‘… ๐‘‘† ๐‘‘‡ ๐‘‘ˆ ๐‘‘‰ ๐‘‘Š ๐‘‘‹ ๐‘‘Œ ๐‘‘ ๐‘‘Ž ๐‘‘ ๐‘‘ ๐‘‘‘ ๐‘‘’ ๐‘‘“ ๐‘‘” ๐‘‘• ๐‘‘– ๐‘‘— ๐‘‘˜ ๐‘‘™ ๐‘‘š ๐‘‘› ๐‘‘œ ๐‘‘ ๐‘‘ž ๐‘‘Ÿ ๐‘‘  ๐‘‘ก ๐‘‘ข ๐‘‘ฃ ๐‘‘ค ๐‘‘ฅ ๐‘‘ฆ ๐‘‘ง ๐‘‘จ ๐‘‘ฉ ๐‘‘ช ๐‘‘ซ ๐‘‘ฌ ๐‘‘ญ ๐‘‘ฎ ๐‘‘ฏ ๐‘‘ฐ ๐‘‘ฑ ๐‘‘ฒ ๐‘‘ณ ๐‘‘ด ๐‘‘ต ๐‘‘ถ ๐‘‘ท ๐‘‘ธ ๐‘‘น ๐‘‘บ ๐‘‘ป ๐‘‘ผ ๐‘‘ฝ ๐‘‘พ ๐‘‘ฟ diff --git a/sample_texts/und-Palm_chars.txt b/sample_texts/und-Palm_chars.txt new file mode 100644 index 00000000..4a5e65e4 --- /dev/null +++ b/sample_texts/und-Palm_chars.txt @@ -0,0 +1 @@ +๐ŸŒ€ ๐ŸŒ ๐ŸŒ‚ ๐ŸŒƒ ๐ŸŒ„ ๐ŸŒ… ๐ŸŒ† ๐ŸŒ‡ ๐ŸŒˆ ๐ŸŒ‰ ๐ŸŒŠ ๐ŸŒ‹ ๐ŸŒŒ ๐ŸŒ ๐ŸŒŽ ๐ŸŒ ๐ŸŒ ๐ŸŒ‘ ๐ŸŒ’ ๐ŸŒ“ ๐ŸŒ” ๐ŸŒ• ๐ŸŒ– ๐ŸŒ— ๐ŸŒ˜ ๐ŸŒ™ ๐ŸŒš ๐ŸŒ› ๐ŸŒœ ๐ŸŒ ๐ŸŒž ๐ŸŒŸ ๐ŸŒ  ๐ŸŒก ๐ŸŒข ๐ŸŒฃ ๐ŸŒค ๐ŸŒฅ ๐ŸŒฆ ๐ŸŒง ๐ŸŒจ ๐ŸŒฉ ๐ŸŒช ๐ŸŒซ ๐ŸŒฌ ๐ŸŒญ ๐ŸŒฎ ๐ŸŒฏ ๐ŸŒฐ ๐ŸŒฑ ๐ŸŒฒ ๐ŸŒณ ๐ŸŒด ๐ŸŒต ๐ŸŒถ ๐ŸŒท ๐ŸŒธ ๐ŸŒน ๐ŸŒบ ๐ŸŒป ๐ŸŒผ ๐ŸŒฝ ๐ŸŒพ ๐ŸŒฟ ๐Ÿ€ ๐Ÿ ๐Ÿ‚ ๐Ÿƒ ๐Ÿ„ ๐Ÿ… ๐Ÿ† ๐Ÿ‡ ๐Ÿˆ ๐Ÿ‰ ๐ŸŠ ๐Ÿ‹ ๐ŸŒ ๐Ÿ ๐ŸŽ ๐Ÿ ๐Ÿ ๐Ÿ‘ ๐Ÿ’ ๐Ÿ“ ๐Ÿ” ๐Ÿ• ๐Ÿ– ๐Ÿ— ๐Ÿ˜ ๐Ÿ™ ๐Ÿš ๐Ÿ› ๐Ÿœ ๐Ÿ ๐Ÿž ๐ŸŸ ๐Ÿ  ๐Ÿก ๐Ÿข ๐Ÿฃ ๐Ÿค ๐Ÿฅ ๐Ÿฆ ๐Ÿง ๐Ÿจ ๐Ÿฉ ๐Ÿช ๐Ÿซ ๐Ÿฌ ๐Ÿญ ๐Ÿฎ ๐Ÿฏ ๐Ÿฐ ๐Ÿฑ ๐Ÿฒ ๐Ÿณ ๐Ÿด ๐Ÿต ๐Ÿถ ๐Ÿท ๐Ÿธ ๐Ÿน ๐Ÿบ ๐Ÿป ๐Ÿผ ๐Ÿฝ ๐Ÿพ ๐Ÿฟ ๐ŸŽ€ ๐ŸŽ ๐ŸŽ‚ ๐ŸŽƒ ๐ŸŽ„ ๐ŸŽ… ๐ŸŽ† ๐ŸŽ‡ ๐ŸŽˆ ๐ŸŽ‰ ๐ŸŽŠ ๐ŸŽ‹ ๐ŸŽŒ ๐ŸŽ ๐ŸŽŽ ๐ŸŽ ๐ŸŽ ๐ŸŽ‘ ๐ŸŽ’ ๐ŸŽ“ ๐ŸŽ” ๐ŸŽ• ๐ŸŽ– ๐ŸŽ— ๐ŸŽ˜ ๐ŸŽ™ ๐ŸŽš ๐ŸŽ› ๐ŸŽœ ๐ŸŽ ๐ŸŽž ๐ŸŽŸ ๐ŸŽ  ๐ŸŽก ๐ŸŽข ๐ŸŽฃ ๐ŸŽค ๐ŸŽฅ ๐ŸŽฆ ๐ŸŽง ๐ŸŽจ ๐ŸŽฉ ๐ŸŽช ๐ŸŽซ ๐ŸŽฌ ๐ŸŽญ ๐ŸŽฎ ๐ŸŽฏ ๐ŸŽฐ ๐ŸŽฑ ๐ŸŽฒ ๐ŸŽณ ๐ŸŽด ๐ŸŽต ๐ŸŽถ ๐ŸŽท ๐ŸŽธ ๐ŸŽน ๐ŸŽบ ๐ŸŽป ๐ŸŽผ ๐ŸŽฝ ๐ŸŽพ ๐ŸŽฟ ๐Ÿ€ ๐Ÿ ๐Ÿ‚ ๐Ÿƒ ๐Ÿ„ ๐Ÿ… ๐Ÿ† ๐Ÿ‡ ๐Ÿˆ ๐Ÿ‰ ๐ŸŠ ๐Ÿ‹ ๐ŸŒ ๐Ÿ ๐ŸŽ ๐Ÿ ๐Ÿ ๐Ÿ‘ ๐Ÿ’ ๐Ÿ“ ๐Ÿ” ๐Ÿ• ๐Ÿ– ๐Ÿ— ๐Ÿ˜ ๐Ÿ™ ๐Ÿš ๐Ÿ› ๐Ÿœ ๐Ÿ ๐Ÿž ๐ŸŸ ๐Ÿ  ๐Ÿก ๐Ÿข ๐Ÿฃ ๐Ÿค ๐Ÿฅ ๐Ÿฆ ๐Ÿง ๐Ÿจ ๐Ÿฉ ๐Ÿช ๐Ÿซ ๐Ÿฌ ๐Ÿญ ๐Ÿฎ ๐Ÿฏ ๐Ÿฐ ๐Ÿฑ ๐Ÿฒ ๐Ÿณ ๐Ÿด ๐Ÿต ๐Ÿถ ๐Ÿท ๐Ÿธ ๐Ÿน ๐Ÿบ ๐Ÿป ๐Ÿผ ๐Ÿฝ ๐Ÿพ ๐Ÿฟ ๐Ÿ€ ๐Ÿ ๐Ÿ‚ ๐Ÿƒ ๐Ÿ„ ๐Ÿ… ๐Ÿ† ๐Ÿ‡ ๐Ÿˆ ๐Ÿ‰ ๐ŸŠ ๐Ÿ‹ ๐ŸŒ ๐Ÿ ๐ŸŽ ๐Ÿ ๐Ÿ ๐Ÿ‘ ๐Ÿ’ ๐Ÿ“ ๐Ÿ” ๐Ÿ• ๐Ÿ– ๐Ÿ— ๐Ÿ˜ ๐Ÿ™ ๐Ÿš ๐Ÿ› ๐Ÿœ ๐Ÿ ๐Ÿž ๐ŸŸ ๐Ÿ  ๐Ÿก ๐Ÿข ๐Ÿฃ ๐Ÿค ๐Ÿฅ ๐Ÿฆ ๐Ÿง ๐Ÿจ ๐Ÿฉ ๐Ÿช ๐Ÿซ ๐Ÿฌ ๐Ÿญ ๐Ÿฎ ๐Ÿฏ ๐Ÿฐ ๐Ÿฑ ๐Ÿฒ ๐Ÿณ ๐Ÿด ๐Ÿต ๐Ÿถ ๐Ÿท ๐Ÿธ ๐Ÿน ๐Ÿบ ๐Ÿป ๐Ÿผ ๐Ÿฝ ๐Ÿพ ๐Ÿฟ ๐Ÿ‘€ ๐Ÿ‘ ๐Ÿ‘‚ ๐Ÿ‘ƒ ๐Ÿ‘„ ๐Ÿ‘… ๐Ÿ‘† ๐Ÿ‘‡ ๐Ÿ‘ˆ ๐Ÿ‘‰ ๐Ÿ‘Š ๐Ÿ‘‹ ๐Ÿ‘Œ ๐Ÿ‘ ๐Ÿ‘Ž ๐Ÿ‘ ๐Ÿ‘ ๐Ÿ‘‘ ๐Ÿ‘’ ๐Ÿ‘“ ๐Ÿ‘” ๐Ÿ‘• ๐Ÿ‘– ๐Ÿ‘— ๐Ÿ‘˜ ๐Ÿ‘™ ๐Ÿ‘š ๐Ÿ‘› ๐Ÿ‘œ ๐Ÿ‘ ๐Ÿ‘ž ๐Ÿ‘Ÿ ๐Ÿ‘  ๐Ÿ‘ก ๐Ÿ‘ข ๐Ÿ‘ฃ ๐Ÿ‘ค ๐Ÿ‘ฅ ๐Ÿ‘ฆ ๐Ÿ‘ง ๐Ÿ‘จ ๐Ÿ‘ฉ ๐Ÿ‘ช ๐Ÿ‘ซ ๐Ÿ‘ฌ ๐Ÿ‘ญ ๐Ÿ‘ฎ ๐Ÿ‘ฏ ๐Ÿ‘ฐ ๐Ÿ‘ฑ ๐Ÿ‘ฒ ๐Ÿ‘ณ ๐Ÿ‘ด ๐Ÿ‘ต ๐Ÿ‘ถ ๐Ÿ‘ท ๐Ÿ‘ธ ๐Ÿ‘น ๐Ÿ‘บ ๐Ÿ‘ป ๐Ÿ‘ผ ๐Ÿ‘ฝ ๐Ÿ‘พ ๐Ÿ‘ฟ ๐Ÿ’€ ๐Ÿ’ ๐Ÿ’‚ ๐Ÿ’ƒ ๐Ÿ’„ ๐Ÿ’… ๐Ÿ’† ๐Ÿ’‡ ๐Ÿ’ˆ ๐Ÿ’‰ ๐Ÿ’Š ๐Ÿ’‹ ๐Ÿ’Œ ๐Ÿ’ ๐Ÿ’Ž ๐Ÿ’ ๐Ÿ’ ๐Ÿ’‘ ๐Ÿ’’ ๐Ÿ’“ ๐Ÿ’” ๐Ÿ’• ๐Ÿ’– ๐Ÿ’— ๐Ÿ’˜ ๐Ÿ’™ ๐Ÿ’š ๐Ÿ’› ๐Ÿ’œ ๐Ÿ’ ๐Ÿ’ž ๐Ÿ’Ÿ ๐Ÿ’  ๐Ÿ’ก ๐Ÿ’ข ๐Ÿ’ฃ ๐Ÿ’ค ๐Ÿ’ฅ ๐Ÿ’ฆ ๐Ÿ’ง ๐Ÿ’จ ๐Ÿ’ฉ ๐Ÿ’ช ๐Ÿ’ซ ๐Ÿ’ฌ ๐Ÿ’ญ ๐Ÿ’ฎ ๐Ÿ’ฏ ๐Ÿ’ฐ ๐Ÿ’ฑ ๐Ÿ’ฒ ๐Ÿ’ณ ๐Ÿ’ด ๐Ÿ’ต ๐Ÿ’ถ ๐Ÿ’ท ๐Ÿ’ธ ๐Ÿ’น ๐Ÿ’บ ๐Ÿ’ป ๐Ÿ’ผ ๐Ÿ’ฝ ๐Ÿ’พ ๐Ÿ’ฟ ๐Ÿ“€ ๐Ÿ“ ๐Ÿ“‚ ๐Ÿ“ƒ ๐Ÿ“„ ๐Ÿ“… ๐Ÿ“† ๐Ÿ“‡ ๐Ÿ“ˆ ๐Ÿ“‰ ๐Ÿ“Š ๐Ÿ“‹ ๐Ÿ“Œ ๐Ÿ“ ๐Ÿ“Ž ๐Ÿ“ ๐Ÿ“ ๐Ÿ“‘ ๐Ÿ“’ ๐Ÿ““ ๐Ÿ“” ๐Ÿ“• ๐Ÿ“– ๐Ÿ“— ๐Ÿ“˜ ๐Ÿ“™ ๐Ÿ“š ๐Ÿ“› ๐Ÿ“œ ๐Ÿ“ ๐Ÿ“ž ๐Ÿ“Ÿ ๐Ÿ“  ๐Ÿ“ก ๐Ÿ“ข ๐Ÿ“ฃ ๐Ÿ“ค ๐Ÿ“ฅ ๐Ÿ“ฆ ๐Ÿ“ง ๐Ÿ“จ ๐Ÿ“ฉ ๐Ÿ“ช ๐Ÿ“ซ ๐Ÿ“ฌ ๐Ÿ“ญ ๐Ÿ“ฎ ๐Ÿ“ฏ ๐Ÿ“ฐ ๐Ÿ“ฑ ๐Ÿ“ฒ ๐Ÿ“ณ ๐Ÿ“ด ๐Ÿ“ต ๐Ÿ“ถ ๐Ÿ“ท ๐Ÿ“ธ ๐Ÿ“น ๐Ÿ“บ ๐Ÿ“ป ๐Ÿ“ผ ๐Ÿ“ฝ ๐Ÿ“พ ๐Ÿ“ฟ ๐Ÿ”€ ๐Ÿ” ๐Ÿ”‚ ๐Ÿ”ƒ ๐Ÿ”„ ๐Ÿ”… ๐Ÿ”† ๐Ÿ”‡ ๐Ÿ”ˆ ๐Ÿ”‰ ๐Ÿ”Š ๐Ÿ”‹ ๐Ÿ”Œ ๐Ÿ” ๐Ÿ”Ž ๐Ÿ” ๐Ÿ” ๐Ÿ”‘ ๐Ÿ”’ ๐Ÿ”“ ๐Ÿ”” ๐Ÿ”• ๐Ÿ”– ๐Ÿ”— ๐Ÿ”˜ ๐Ÿ”™ ๐Ÿ”š ๐Ÿ”› ๐Ÿ”œ ๐Ÿ” ๐Ÿ”ž ๐Ÿ”Ÿ ๐Ÿ”  ๐Ÿ”ก ๐Ÿ”ข ๐Ÿ”ฃ ๐Ÿ”ค ๐Ÿ”ฅ ๐Ÿ”ฆ ๐Ÿ”ง ๐Ÿ”จ ๐Ÿ”ฉ ๐Ÿ”ช ๐Ÿ”ซ ๐Ÿ”ฌ ๐Ÿ”ญ ๐Ÿ”ฎ ๐Ÿ”ฏ ๐Ÿ”ฐ ๐Ÿ”ฑ ๐Ÿ”ฒ ๐Ÿ”ณ ๐Ÿ”ด ๐Ÿ”ต ๐Ÿ”ถ ๐Ÿ”ท ๐Ÿ”ธ ๐Ÿ”น ๐Ÿ”บ ๐Ÿ”ป ๐Ÿ”ผ ๐Ÿ”ฝ ๐Ÿ”พ ๐Ÿ”ฟ ๐Ÿ•€ ๐Ÿ• ๐Ÿ•‚ ๐Ÿ•ƒ ๐Ÿ•„ ๐Ÿ•… ๐Ÿ•† ๐Ÿ•‡ ๐Ÿ•ˆ ๐Ÿ•‰ ๐Ÿ•Š ๐Ÿ•‹ ๐Ÿ•Œ ๐Ÿ• ๐Ÿ•Ž ๐Ÿ• ๐Ÿ• ๐Ÿ•‘ ๐Ÿ•’ ๐Ÿ•“ ๐Ÿ•” ๐Ÿ•• ๐Ÿ•– ๐Ÿ•— ๐Ÿ•˜ ๐Ÿ•™ ๐Ÿ•š ๐Ÿ•› ๐Ÿ•œ ๐Ÿ• ๐Ÿ•ž ๐Ÿ•Ÿ ๐Ÿ•  ๐Ÿ•ก ๐Ÿ•ข ๐Ÿ•ฃ ๐Ÿ•ค ๐Ÿ•ฅ ๐Ÿ•ฆ ๐Ÿ•ง ๐Ÿ•จ ๐Ÿ•ฉ ๐Ÿ•ช ๐Ÿ•ซ ๐Ÿ•ฌ ๐Ÿ•ญ ๐Ÿ•ฎ ๐Ÿ•ฏ ๐Ÿ•ฐ ๐Ÿ•ฑ ๐Ÿ•ฒ ๐Ÿ•ณ ๐Ÿ•ด ๐Ÿ•ต ๐Ÿ•ถ ๐Ÿ•ท ๐Ÿ•ธ ๐Ÿ•น ๐Ÿ•บ ๐Ÿ•ป ๐Ÿ•ผ ๐Ÿ•ฝ ๐Ÿ•พ ๐Ÿ•ฟ ๐Ÿ–€ ๐Ÿ– ๐Ÿ–‚ ๐Ÿ–ƒ ๐Ÿ–„ ๐Ÿ–… ๐Ÿ–† ๐Ÿ–‡ ๐Ÿ–ˆ ๐Ÿ–‰ ๐Ÿ–Š ๐Ÿ–‹ ๐Ÿ–Œ ๐Ÿ– ๐Ÿ–Ž ๐Ÿ– ๐Ÿ– ๐Ÿ–‘ ๐Ÿ–’ ๐Ÿ–“ ๐Ÿ–” ๐Ÿ–• ๐Ÿ–– ๐Ÿ–— ๐Ÿ–˜ ๐Ÿ–™ ๐Ÿ–š ๐Ÿ–› ๐Ÿ–œ ๐Ÿ– ๐Ÿ–ž ๐Ÿ–Ÿ ๐Ÿ–  ๐Ÿ–ก ๐Ÿ–ข ๐Ÿ–ฃ ๐Ÿ–ค ๐Ÿ–ฅ ๐Ÿ–ฆ ๐Ÿ–ง ๐Ÿ–จ ๐Ÿ–ฉ ๐Ÿ–ช ๐Ÿ–ซ ๐Ÿ–ฌ ๐Ÿ–ญ ๐Ÿ–ฎ ๐Ÿ–ฏ ๐Ÿ–ฐ ๐Ÿ–ฑ ๐Ÿ–ฒ ๐Ÿ–ณ ๐Ÿ–ด ๐Ÿ–ต ๐Ÿ–ถ ๐Ÿ–ท ๐Ÿ–ธ ๐Ÿ–น ๐Ÿ–บ ๐Ÿ–ป ๐Ÿ–ผ ๐Ÿ–ฝ ๐Ÿ–พ ๐Ÿ–ฟ ๐Ÿ—€ ๐Ÿ— ๐Ÿ—‚ ๐Ÿ—ƒ ๐Ÿ—„ ๐Ÿ—… ๐Ÿ—† ๐Ÿ—‡ ๐Ÿ—ˆ ๐Ÿ—‰ ๐Ÿ—Š ๐Ÿ—‹ ๐Ÿ—Œ ๐Ÿ— ๐Ÿ—Ž ๐Ÿ— ๐Ÿ— ๐Ÿ—‘ ๐Ÿ—’ ๐Ÿ—“ ๐Ÿ—” ๐Ÿ—• ๐Ÿ—– ๐Ÿ—— ๐Ÿ—˜ ๐Ÿ—™ ๐Ÿ—š ๐Ÿ—› ๐Ÿ—œ ๐Ÿ— ๐Ÿ—ž ๐Ÿ—Ÿ ๐Ÿ—  ๐Ÿ—ก ๐Ÿ—ข ๐Ÿ—ฃ ๐Ÿ—ค ๐Ÿ—ฅ ๐Ÿ—ฆ ๐Ÿ—ง ๐Ÿ—จ ๐Ÿ—ฉ ๐Ÿ—ช ๐Ÿ—ซ ๐Ÿ—ฌ ๐Ÿ—ญ ๐Ÿ—ฎ ๐Ÿ—ฏ ๐Ÿ—ฐ ๐Ÿ—ฑ ๐Ÿ—ฒ ๐Ÿ—ณ ๐Ÿ—ด ๐Ÿ—ต ๐Ÿ—ถ ๐Ÿ—ท ๐Ÿ—ธ ๐Ÿ—น ๐Ÿ—บ ๐Ÿ—ป ๐Ÿ—ผ ๐Ÿ—ฝ ๐Ÿ—พ ๐Ÿ—ฟ diff --git a/sample_texts/und-Pauc_chars.txt b/sample_texts/und-Pauc_chars.txt new file mode 100644 index 00000000..3d7ee67b --- /dev/null +++ b/sample_texts/und-Pauc_chars.txt @@ -0,0 +1 @@ +๐‘ซ€ ๐‘ซ ๐‘ซ‚ ๐‘ซƒ ๐‘ซ„ ๐‘ซ… ๐‘ซ† ๐‘ซ‡ ๐‘ซˆ ๐‘ซ‰ ๐‘ซŠ ๐‘ซ‹ ๐‘ซŒ ๐‘ซ ๐‘ซŽ ๐‘ซ ๐‘ซ ๐‘ซ‘ ๐‘ซ’ ๐‘ซ“ ๐‘ซ” ๐‘ซ• ๐‘ซ– ๐‘ซ— ๐‘ซ˜ ๐‘ซ™ ๐‘ซš ๐‘ซ› ๐‘ซœ ๐‘ซ ๐‘ซž ๐‘ซŸ ๐‘ซ  ๐‘ซก ๐‘ซข ๐‘ซฃ ๐‘ซค ๐‘ซฅ ๐‘ซฆ ๐‘ซง ๐‘ซจ ๐‘ซฉ ๐‘ซช ๐‘ซซ ๐‘ซฌ ๐‘ซญ ๐‘ซฎ ๐‘ซฏ ๐‘ซฐ ๐‘ซฑ ๐‘ซฒ ๐‘ซณ ๐‘ซด ๐‘ซต ๐‘ซถ ๐‘ซท ๐‘ซธ ๐‘ซน ๐‘ซบ ๐‘ซป ๐‘ซผ ๐‘ซฝ ๐‘ซพ ๐‘ซฟ diff --git a/sample_texts/und-Perm_chars.txt b/sample_texts/und-Perm_chars.txt new file mode 100644 index 00000000..ae7ea02d --- /dev/null +++ b/sample_texts/und-Perm_chars.txt @@ -0,0 +1 @@ +๐ ๐‘ ๐’ ๐“ ๐” ๐• ๐– ๐— ๐˜ ๐™ ๐š ๐› ๐œ ๐ ๐ž ๐Ÿ ๐  ๐ก ๐ข ๐ฃ ๐ค ๐ฅ ๐ฆ ๐ง ๐จ ๐ฉ ๐ช ๐ซ ๐ฌ ๐ญ ๐ฎ ๐ฏ ๐ฐ ๐ฑ ๐ฒ ๐ณ ๐ด ๐ต ๐ถ ๐ท ๐ธ ๐น ๐บ ๐ป ๐ผ ๐ฝ ๐พ ๐ฟ diff --git a/sample_texts/und-Phlp_chars.txt b/sample_texts/und-Phlp_chars.txt new file mode 100644 index 00000000..d3dfce21 --- /dev/null +++ b/sample_texts/und-Phlp_chars.txt @@ -0,0 +1 @@ +๐ฎ€ ๐ฎ ๐ฎ‚ ๐ฎƒ ๐ฎ„ ๐ฎ… ๐ฎ† ๐ฎ‡ ๐ฎˆ ๐ฎ‰ ๐ฎŠ ๐ฎ‹ ๐ฎŒ ๐ฎ ๐ฎŽ ๐ฎ ๐ฎ ๐ฎ‘ ๐ฎ’ ๐ฎ“ ๐ฎ” ๐ฎ• ๐ฎ– ๐ฎ— ๐ฎ˜ ๐ฎ™ ๐ฎš ๐ฎ› ๐ฎœ ๐ฎ ๐ฎž ๐ฎŸ ๐ฎ  ๐ฎก ๐ฎข ๐ฎฃ ๐ฎค ๐ฎฅ ๐ฎฆ ๐ฎง ๐ฎจ ๐ฎฉ ๐ฎช ๐ฎซ ๐ฎฌ ๐ฎญ ๐ฎฎ ๐ฎฏ diff --git a/sample_texts/und-Plrd_chars.txt b/sample_texts/und-Plrd_chars.txt new file mode 100644 index 00000000..5d565877 --- /dev/null +++ b/sample_texts/und-Plrd_chars.txt @@ -0,0 +1 @@ +๐–ผ€ ๐–ผ ๐–ผ‚ ๐–ผƒ ๐–ผ„ ๐–ผ… ๐–ผ† ๐–ผ‡ ๐–ผˆ ๐–ผ‰ ๐–ผŠ ๐–ผ‹ ๐–ผŒ ๐–ผ ๐–ผŽ ๐–ผ ๐–ผ ๐–ผ‘ ๐–ผ’ ๐–ผ“ ๐–ผ” ๐–ผ• ๐–ผ– ๐–ผ— ๐–ผ˜ ๐–ผ™ ๐–ผš ๐–ผ› ๐–ผœ ๐–ผ ๐–ผž ๐–ผŸ ๐–ผ  ๐–ผก ๐–ผข ๐–ผฃ ๐–ผค ๐–ผฅ ๐–ผฆ ๐–ผง ๐–ผจ ๐–ผฉ ๐–ผช ๐–ผซ ๐–ผฌ ๐–ผญ ๐–ผฎ ๐–ผฏ ๐–ผฐ ๐–ผฑ ๐–ผฒ ๐–ผณ ๐–ผด ๐–ผต ๐–ผถ ๐–ผท ๐–ผธ ๐–ผน ๐–ผบ ๐–ผป ๐–ผผ ๐–ผฝ ๐–ผพ ๐–ผฟ ๐–ฝ€ ๐–ฝ ๐–ฝ‚ ๐–ฝƒ ๐–ฝ„ ๐–ฝ… ๐–ฝ† ๐–ฝ‡ ๐–ฝˆ ๐–ฝ‰ ๐–ฝŠ ๐–ฝ‹ ๐–ฝŒ ๐–ฝ ๐–ฝŽ ๐–ฝ ๐–ฝ ๐–ฝ‘ ๐–ฝ’ ๐–ฝ“ ๐–ฝ” ๐–ฝ• ๐–ฝ– ๐–ฝ— ๐–ฝ˜ ๐–ฝ™ ๐–ฝš ๐–ฝ› ๐–ฝœ ๐–ฝ ๐–ฝž ๐–ฝŸ ๐–ฝ  ๐–ฝก ๐–ฝข ๐–ฝฃ ๐–ฝค ๐–ฝฅ ๐–ฝฆ ๐–ฝง ๐–ฝจ ๐–ฝฉ ๐–ฝช ๐–ฝซ ๐–ฝฌ ๐–ฝญ ๐–ฝฎ ๐–ฝฏ ๐–ฝฐ ๐–ฝฑ ๐–ฝฒ ๐–ฝณ ๐–ฝด ๐–ฝต ๐–ฝถ ๐–ฝท ๐–ฝธ ๐–ฝน ๐–ฝบ ๐–ฝป ๐–ฝผ ๐–ฝฝ ๐–ฝพ ๐–ฝฟ ๐–พ€ ๐–พ ๐–พ‚ ๐–พƒ ๐–พ„ ๐–พ… ๐–พ† ๐–พ‡ ๐–พˆ ๐–พ‰ ๐–พŠ ๐–พ‹ ๐–พŒ ๐–พ ๐–พŽ ๐–พ ๐–พ ๐–พ‘ ๐–พ’ ๐–พ“ ๐–พ” ๐–พ• ๐–พ– ๐–พ— ๐–พ˜ ๐–พ™ ๐–พš ๐–พ› ๐–พœ ๐–พ ๐–พž ๐–พŸ diff --git a/sample_texts/und-Shrd_chars.txt b/sample_texts/und-Shrd_chars.txt new file mode 100644 index 00000000..e14ca4a5 --- /dev/null +++ b/sample_texts/und-Shrd_chars.txt @@ -0,0 +1 @@ +๐‘†€ ๐‘† ๐‘†‚ ๐‘†ƒ ๐‘†„ ๐‘†… ๐‘†† ๐‘†‡ ๐‘†ˆ ๐‘†‰ ๐‘†Š ๐‘†‹ ๐‘†Œ ๐‘† ๐‘†Ž ๐‘† ๐‘† ๐‘†‘ ๐‘†’ ๐‘†“ ๐‘†” ๐‘†• ๐‘†– ๐‘†— ๐‘†˜ ๐‘†™ ๐‘†š ๐‘†› ๐‘†œ ๐‘† ๐‘†ž ๐‘†Ÿ ๐‘†  ๐‘†ก ๐‘†ข ๐‘†ฃ ๐‘†ค ๐‘†ฅ ๐‘†ฆ ๐‘†ง ๐‘†จ ๐‘†ฉ ๐‘†ช ๐‘†ซ ๐‘†ฌ ๐‘†ญ ๐‘†ฎ ๐‘†ฏ ๐‘†ฐ ๐‘†ฑ ๐‘†ฒ ๐‘†ณ ๐‘†ด ๐‘†ต ๐‘†ถ ๐‘†ท ๐‘†ธ ๐‘†น ๐‘†บ ๐‘†ป ๐‘†ผ ๐‘†ฝ ๐‘†พ ๐‘†ฟ ๐‘‡€ ๐‘‡ ๐‘‡‚ ๐‘‡ƒ ๐‘‡„ ๐‘‡… ๐‘‡† ๐‘‡‡ ๐‘‡ˆ ๐‘‡‰ ๐‘‡Š ๐‘‡‹ ๐‘‡Œ ๐‘‡ ๐‘‡Ž ๐‘‡ ๐‘‡ ๐‘‡‘ ๐‘‡’ ๐‘‡“ ๐‘‡” ๐‘‡• ๐‘‡– ๐‘‡— ๐‘‡˜ ๐‘‡™ ๐‘‡š ๐‘‡› ๐‘‡œ ๐‘‡ ๐‘‡ž ๐‘‡Ÿ diff --git a/sample_texts/und-Sind_chars.txt b/sample_texts/und-Sind_chars.txt new file mode 100644 index 00000000..588456e4 --- /dev/null +++ b/sample_texts/und-Sind_chars.txt @@ -0,0 +1 @@ +๐‘Šฐ ๐‘Šฑ ๐‘Šฒ ๐‘Šณ ๐‘Šด ๐‘Šต ๐‘Šถ ๐‘Šท ๐‘Šธ ๐‘Šน ๐‘Šบ ๐‘Šป ๐‘Šผ ๐‘Šฝ ๐‘Šพ ๐‘Šฟ ๐‘‹€ ๐‘‹ ๐‘‹‚ ๐‘‹ƒ ๐‘‹„ ๐‘‹… ๐‘‹† ๐‘‹‡ ๐‘‹ˆ ๐‘‹‰ ๐‘‹Š ๐‘‹‹ ๐‘‹Œ ๐‘‹ ๐‘‹Ž ๐‘‹ ๐‘‹ ๐‘‹‘ ๐‘‹’ ๐‘‹“ ๐‘‹” ๐‘‹• ๐‘‹– ๐‘‹— ๐‘‹˜ ๐‘‹™ ๐‘‹š ๐‘‹› ๐‘‹œ ๐‘‹ ๐‘‹ž ๐‘‹Ÿ ๐‘‹  ๐‘‹ก ๐‘‹ข ๐‘‹ฃ ๐‘‹ค ๐‘‹ฅ ๐‘‹ฆ ๐‘‹ง ๐‘‹จ ๐‘‹ฉ ๐‘‹ช ๐‘‹ซ ๐‘‹ฌ ๐‘‹ญ ๐‘‹ฎ ๐‘‹ฏ ๐‘‹ฐ ๐‘‹ฑ ๐‘‹ฒ ๐‘‹ณ ๐‘‹ด ๐‘‹ต ๐‘‹ถ ๐‘‹ท ๐‘‹ธ ๐‘‹น ๐‘‹บ ๐‘‹ป ๐‘‹ผ ๐‘‹ฝ ๐‘‹พ ๐‘‹ฟ diff --git a/sample_texts/und-Sora_chars.txt b/sample_texts/und-Sora_chars.txt new file mode 100644 index 00000000..0c56e619 --- /dev/null +++ b/sample_texts/und-Sora_chars.txt @@ -0,0 +1 @@ +๐‘ƒ ๐‘ƒ‘ ๐‘ƒ’ ๐‘ƒ“ ๐‘ƒ” ๐‘ƒ• ๐‘ƒ– ๐‘ƒ— ๐‘ƒ˜ ๐‘ƒ™ ๐‘ƒš ๐‘ƒ› ๐‘ƒœ ๐‘ƒ ๐‘ƒž ๐‘ƒŸ ๐‘ƒ  ๐‘ƒก ๐‘ƒข ๐‘ƒฃ ๐‘ƒค ๐‘ƒฅ ๐‘ƒฆ ๐‘ƒง ๐‘ƒจ ๐‘ƒฉ ๐‘ƒช ๐‘ƒซ ๐‘ƒฌ ๐‘ƒญ ๐‘ƒฎ ๐‘ƒฏ ๐‘ƒฐ ๐‘ƒฑ ๐‘ƒฒ ๐‘ƒณ ๐‘ƒด ๐‘ƒต ๐‘ƒถ ๐‘ƒท ๐‘ƒธ ๐‘ƒน ๐‘ƒบ ๐‘ƒป ๐‘ƒผ ๐‘ƒฝ ๐‘ƒพ ๐‘ƒฟ diff --git a/sample_texts/und-Takr_chars.txt b/sample_texts/und-Takr_chars.txt new file mode 100644 index 00000000..d97c0fc0 --- /dev/null +++ b/sample_texts/und-Takr_chars.txt @@ -0,0 +1 @@ +๐‘š€ ๐‘š ๐‘š‚ ๐‘šƒ ๐‘š„ ๐‘š… ๐‘š† ๐‘š‡ ๐‘šˆ ๐‘š‰ ๐‘šŠ ๐‘š‹ ๐‘šŒ ๐‘š ๐‘šŽ ๐‘š ๐‘š ๐‘š‘ ๐‘š’ ๐‘š“ ๐‘š” ๐‘š• ๐‘š– ๐‘š— ๐‘š˜ ๐‘š™ ๐‘šš ๐‘š› ๐‘šœ ๐‘š ๐‘šž ๐‘šŸ ๐‘š  ๐‘šก ๐‘šข ๐‘šฃ ๐‘šค ๐‘šฅ ๐‘šฆ ๐‘šง ๐‘šจ ๐‘šฉ ๐‘šช ๐‘šซ ๐‘šฌ ๐‘šญ ๐‘šฎ ๐‘šฏ ๐‘šฐ ๐‘šฑ ๐‘šฒ ๐‘šณ ๐‘šด ๐‘šต ๐‘šถ ๐‘šท ๐‘šธ ๐‘šน ๐‘šบ ๐‘šป ๐‘šผ ๐‘šฝ ๐‘šพ ๐‘šฟ ๐‘›€ ๐‘› ๐‘›‚ ๐‘›ƒ ๐‘›„ ๐‘›… ๐‘›† ๐‘›‡ ๐‘›ˆ ๐‘›‰ ๐‘›Š ๐‘›‹ ๐‘›Œ ๐‘› ๐‘›Ž ๐‘› diff --git a/sample_texts/und-Tirh_chars.txt b/sample_texts/und-Tirh_chars.txt new file mode 100644 index 00000000..d0341602 --- /dev/null +++ b/sample_texts/und-Tirh_chars.txt @@ -0,0 +1 @@ +๐‘’€ ๐‘’ ๐‘’‚ ๐‘’ƒ ๐‘’„ ๐‘’… ๐‘’† ๐‘’‡ ๐‘’ˆ ๐‘’‰ ๐‘’Š ๐‘’‹ ๐‘’Œ ๐‘’ ๐‘’Ž ๐‘’ ๐‘’ ๐‘’‘ ๐‘’’ ๐‘’“ ๐‘’” ๐‘’• ๐‘’– ๐‘’— ๐‘’˜ ๐‘’™ ๐‘’š ๐‘’› ๐‘’œ ๐‘’ ๐‘’ž ๐‘’Ÿ ๐‘’  ๐‘’ก ๐‘’ข ๐‘’ฃ ๐‘’ค ๐‘’ฅ ๐‘’ฆ ๐‘’ง ๐‘’จ ๐‘’ฉ ๐‘’ช ๐‘’ซ ๐‘’ฌ ๐‘’ญ ๐‘’ฎ ๐‘’ฏ ๐‘’ฐ ๐‘’ฑ ๐‘’ฒ ๐‘’ณ ๐‘’ด ๐‘’ต ๐‘’ถ ๐‘’ท ๐‘’ธ ๐‘’น ๐‘’บ ๐‘’ป ๐‘’ผ ๐‘’ฝ ๐‘’พ ๐‘’ฟ ๐‘“€ ๐‘“ ๐‘“‚ ๐‘“ƒ ๐‘“„ ๐‘“… ๐‘“† ๐‘“‡ ๐‘“ˆ ๐‘“‰ ๐‘“Š ๐‘“‹ ๐‘“Œ ๐‘“ ๐‘“Ž ๐‘“ ๐‘“ ๐‘“‘ ๐‘“’ ๐‘““ ๐‘“” ๐‘“• ๐‘“– ๐‘“— ๐‘“˜ ๐‘“™ ๐‘“š ๐‘“› ๐‘“œ ๐‘“ ๐‘“ž ๐‘“Ÿ diff --git a/sample_texts/und-Wara_chars.txt b/sample_texts/und-Wara_chars.txt new file mode 100644 index 00000000..99ddfe34 --- /dev/null +++ b/sample_texts/und-Wara_chars.txt @@ -0,0 +1 @@ +๐‘ข  ๐‘ขก ๐‘ขข ๐‘ขฃ ๐‘ขค ๐‘ขฅ ๐‘ขฆ ๐‘ขง ๐‘ขจ ๐‘ขฉ ๐‘ขช ๐‘ขซ ๐‘ขฌ ๐‘ขญ ๐‘ขฎ ๐‘ขฏ ๐‘ขฐ ๐‘ขฑ ๐‘ขฒ ๐‘ขณ ๐‘ขด ๐‘ขต ๐‘ขถ ๐‘ขท ๐‘ขธ ๐‘ขน ๐‘ขบ ๐‘ขป ๐‘ขผ ๐‘ขฝ ๐‘ขพ ๐‘ขฟ ๐‘ฃ€ ๐‘ฃ ๐‘ฃ‚ ๐‘ฃƒ ๐‘ฃ„ ๐‘ฃ… ๐‘ฃ† ๐‘ฃ‡ ๐‘ฃˆ ๐‘ฃ‰ ๐‘ฃŠ ๐‘ฃ‹ ๐‘ฃŒ ๐‘ฃ ๐‘ฃŽ ๐‘ฃ ๐‘ฃ ๐‘ฃ‘ ๐‘ฃ’ ๐‘ฃ“ ๐‘ฃ” ๐‘ฃ• ๐‘ฃ– ๐‘ฃ— ๐‘ฃ˜ ๐‘ฃ™ ๐‘ฃš ๐‘ฃ› ๐‘ฃœ ๐‘ฃ ๐‘ฃž ๐‘ฃŸ ๐‘ฃ  ๐‘ฃก ๐‘ฃข ๐‘ฃฃ ๐‘ฃค ๐‘ฃฅ ๐‘ฃฆ ๐‘ฃง ๐‘ฃจ ๐‘ฃฉ ๐‘ฃช ๐‘ฃซ ๐‘ฃฌ ๐‘ฃญ ๐‘ฃฎ ๐‘ฃฏ ๐‘ฃฐ ๐‘ฃฑ ๐‘ฃฒ ๐‘ฃณ ๐‘ฃด ๐‘ฃต ๐‘ฃถ ๐‘ฃท ๐‘ฃธ ๐‘ฃน ๐‘ฃบ ๐‘ฃป ๐‘ฃผ ๐‘ฃฝ ๐‘ฃพ ๐‘ฃฟ From 507c1129001091a38d8e275e1bad4bb949d4e1ba Mon Sep 17 00:00:00 2001 From: "Marek Z. Jeziorek" Date: Mon, 22 Oct 2018 10:14:45 -0400 Subject: [PATCH 06/14] removed unassigned code points (tofu) from Unicode ranges for the new samples --- sample_texts/und-Aghb_chars.txt | 2 +- sample_texts/und-Ahom_chars.txt | 2 +- sample_texts/und-Bhks_chars.txt | 2 +- sample_texts/und-Dupl_chars.txt | 2 +- sample_texts/und-Elba_chars.txt | 2 +- sample_texts/und-Gran_chars.txt | 2 +- sample_texts/und-Hatr_chars.txt | 2 +- sample_texts/und-Hmng_chars.txt | 2 +- sample_texts/und-Hung_chars.txt | 2 +- sample_texts/und-Lina_chars.txt | 2 +- sample_texts/und-Mahj_chars.txt | 2 +- sample_texts/und-Mani_chars.txt | 2 +- sample_texts/und-Marc_chars.txt | 2 +- sample_texts/und-Mend_chars.txt | 2 +- sample_texts/und-Modi_chars.txt | 2 +- sample_texts/und-Mroo_chars.txt | 2 +- sample_texts/und-Mult_chars.txt | 2 +- sample_texts/und-Nbat_chars.txt | 2 +- sample_texts/und-Newa_chars.txt | 2 +- sample_texts/und-Palm_chars.txt | 2 +- sample_texts/und-Pauc_chars.txt | 2 +- sample_texts/und-Perm_chars.txt | 2 +- sample_texts/und-Phlp_chars.txt | 2 +- sample_texts/und-Plrd_chars.txt | 2 +- sample_texts/und-Shrd_chars.txt | 2 +- sample_texts/und-Sind_chars.txt | 2 +- sample_texts/und-Sora_chars.txt | 2 +- sample_texts/und-Takr_chars.txt | 2 +- sample_texts/und-Tirh_chars.txt | 2 +- sample_texts/und-Wara_chars.txt | 2 +- sample_texts/und-Zsym-muse_chars.txt | 1 + 31 files changed, 31 insertions(+), 30 deletions(-) create mode 100644 sample_texts/und-Zsym-muse_chars.txt diff --git a/sample_texts/und-Aghb_chars.txt b/sample_texts/und-Aghb_chars.txt index 35c36ae0..a77fb203 100644 --- a/sample_texts/und-Aghb_chars.txt +++ b/sample_texts/und-Aghb_chars.txt @@ -1 +1 @@ -๐”ฐ ๐”ฑ ๐”ฒ ๐”ณ ๐”ด ๐”ต ๐”ถ ๐”ท ๐”ธ ๐”น ๐”บ ๐”ป ๐”ผ ๐”ฝ ๐”พ ๐”ฟ ๐•€ ๐• ๐•‚ ๐•ƒ ๐•„ ๐•… ๐•† ๐•‡ ๐•ˆ ๐•‰ ๐•Š ๐•‹ ๐•Œ ๐• ๐•Ž ๐• ๐• ๐•‘ ๐•’ ๐•“ ๐•” ๐•• ๐•– ๐•— ๐•˜ ๐•™ ๐•š ๐•› ๐•œ ๐• ๐•ž ๐•Ÿ ๐•  ๐•ก ๐•ข ๐•ฃ ๐•ค ๐•ฅ ๐•ฆ ๐•ง ๐•จ ๐•ฉ ๐•ช ๐•ซ ๐•ฌ ๐•ญ ๐•ฎ ๐•ฏ +๐”ฐ ๐”ฑ ๐”ฒ ๐”ณ ๐”ด ๐”ต ๐”ถ ๐”ท ๐”ธ ๐”น ๐”บ ๐”ป ๐”ผ ๐”ฝ ๐”พ ๐”ฟ ๐•€ ๐• ๐•‚ ๐•ƒ ๐•„ ๐•… ๐•† ๐•‡ ๐•ˆ ๐•‰ ๐•Š ๐•‹ ๐•Œ ๐• ๐•Ž ๐• ๐• ๐•‘ ๐•’ ๐•“ ๐•” ๐•• ๐•– ๐•— ๐•˜ ๐•™ ๐•š ๐•› ๐•œ ๐• ๐•ž ๐•Ÿ ๐•  ๐•ก ๐•ข ๐•ฃ ๐•ฏ diff --git a/sample_texts/und-Ahom_chars.txt b/sample_texts/und-Ahom_chars.txt index 81862596..34416cb2 100644 --- a/sample_texts/und-Ahom_chars.txt +++ b/sample_texts/und-Ahom_chars.txt @@ -1 +1 @@ -๐‘œ€ ๐‘œ ๐‘œ‚ ๐‘œƒ ๐‘œ„ ๐‘œ… ๐‘œ† ๐‘œ‡ ๐‘œˆ ๐‘œ‰ ๐‘œŠ ๐‘œ‹ ๐‘œŒ ๐‘œ ๐‘œŽ ๐‘œ ๐‘œ ๐‘œ‘ ๐‘œ’ ๐‘œ“ ๐‘œ” ๐‘œ• ๐‘œ– ๐‘œ— ๐‘œ˜ ๐‘œ™ ๐‘œš ๐‘œ› ๐‘œœ ๐‘œ ๐‘œž ๐‘œŸ ๐‘œ  ๐‘œก ๐‘œข ๐‘œฃ ๐‘œค ๐‘œฅ ๐‘œฆ ๐‘œง ๐‘œจ ๐‘œฉ ๐‘œช ๐‘œซ ๐‘œฌ ๐‘œญ ๐‘œฎ ๐‘œฏ ๐‘œฐ ๐‘œฑ ๐‘œฒ ๐‘œณ ๐‘œด ๐‘œต ๐‘œถ ๐‘œท ๐‘œธ ๐‘œน ๐‘œบ ๐‘œป ๐‘œผ ๐‘œฝ ๐‘œพ ๐‘œฟ +๐‘œ€ ๐‘œ ๐‘œ‚ ๐‘œƒ ๐‘œ„ ๐‘œ… ๐‘œ† ๐‘œ‡ ๐‘œˆ ๐‘œ‰ ๐‘œŠ ๐‘œ‹ ๐‘œŒ ๐‘œ ๐‘œŽ ๐‘œ ๐‘œ ๐‘œ‘ ๐‘œ’ ๐‘œ“ ๐‘œ” ๐‘œ• ๐‘œ– ๐‘œ— ๐‘œ˜ ๐‘œ™ ๐‘œฐ ๐‘œฑ ๐‘œฒ ๐‘œณ ๐‘œด ๐‘œต ๐‘œถ ๐‘œท ๐‘œธ ๐‘œน ๐‘œบ ๐‘œป ๐‘œผ ๐‘œฝ ๐‘œพ ๐‘œฟ diff --git a/sample_texts/und-Bhks_chars.txt b/sample_texts/und-Bhks_chars.txt index dd6917dc..e8e0af70 100644 --- a/sample_texts/und-Bhks_chars.txt +++ b/sample_texts/und-Bhks_chars.txt @@ -1 +1 @@ -๐‘ฐ€ ๐‘ฐ ๐‘ฐ‚ ๐‘ฐƒ ๐‘ฐ„ ๐‘ฐ… ๐‘ฐ† ๐‘ฐ‡ ๐‘ฐˆ ๐‘ฐ‰ ๐‘ฐŠ ๐‘ฐ‹ ๐‘ฐŒ ๐‘ฐ ๐‘ฐŽ ๐‘ฐ ๐‘ฐ ๐‘ฐ‘ ๐‘ฐ’ ๐‘ฐ“ ๐‘ฐ” ๐‘ฐ• ๐‘ฐ– ๐‘ฐ— ๐‘ฐ˜ ๐‘ฐ™ ๐‘ฐš ๐‘ฐ› ๐‘ฐœ ๐‘ฐ ๐‘ฐž ๐‘ฐŸ ๐‘ฐ  ๐‘ฐก ๐‘ฐข ๐‘ฐฃ ๐‘ฐค ๐‘ฐฅ ๐‘ฐฆ ๐‘ฐง ๐‘ฐจ ๐‘ฐฉ ๐‘ฐช ๐‘ฐซ ๐‘ฐฌ ๐‘ฐญ ๐‘ฐฎ ๐‘ฐฏ ๐‘ฐฐ ๐‘ฐฑ ๐‘ฐฒ ๐‘ฐณ ๐‘ฐด ๐‘ฐต ๐‘ฐถ ๐‘ฐท ๐‘ฐธ ๐‘ฐน ๐‘ฐบ ๐‘ฐป ๐‘ฐผ ๐‘ฐฝ ๐‘ฐพ ๐‘ฐฟ ๐‘ฑ€ ๐‘ฑ ๐‘ฑ‚ ๐‘ฑƒ ๐‘ฑ„ ๐‘ฑ… ๐‘ฑ† ๐‘ฑ‡ ๐‘ฑˆ ๐‘ฑ‰ ๐‘ฑŠ ๐‘ฑ‹ ๐‘ฑŒ ๐‘ฑ ๐‘ฑŽ ๐‘ฑ ๐‘ฑ ๐‘ฑ‘ ๐‘ฑ’ ๐‘ฑ“ ๐‘ฑ” ๐‘ฑ• ๐‘ฑ– ๐‘ฑ— ๐‘ฑ˜ ๐‘ฑ™ ๐‘ฑš ๐‘ฑ› ๐‘ฑœ ๐‘ฑ ๐‘ฑž ๐‘ฑŸ ๐‘ฑ  ๐‘ฑก ๐‘ฑข ๐‘ฑฃ ๐‘ฑค ๐‘ฑฅ ๐‘ฑฆ ๐‘ฑง ๐‘ฑจ ๐‘ฑฉ ๐‘ฑช ๐‘ฑซ ๐‘ฑฌ ๐‘ฑญ ๐‘ฑฎ ๐‘ฑฏ +๐‘ฐ€ ๐‘ฐ ๐‘ฐ‚ ๐‘ฐƒ ๐‘ฐ„ ๐‘ฐ… ๐‘ฐ† ๐‘ฐ‡ ๐‘ฐˆ ๐‘ฐŠ ๐‘ฐ‹ ๐‘ฐŒ ๐‘ฐ ๐‘ฐŽ ๐‘ฐ ๐‘ฐ ๐‘ฐ‘ ๐‘ฐ’ ๐‘ฐ“ ๐‘ฐ” ๐‘ฐ• ๐‘ฐ– ๐‘ฐ— ๐‘ฐ˜ ๐‘ฐ™ ๐‘ฐš ๐‘ฐ› ๐‘ฐœ ๐‘ฐ ๐‘ฐž ๐‘ฐŸ ๐‘ฐ  ๐‘ฐก ๐‘ฐข ๐‘ฐฃ ๐‘ฐค ๐‘ฐฅ ๐‘ฐฆ ๐‘ฐง ๐‘ฐจ ๐‘ฐฉ ๐‘ฐช ๐‘ฐซ ๐‘ฐฌ ๐‘ฐญ ๐‘ฐฎ ๐‘ฐฏ ๐‘ฐฐ ๐‘ฐฑ ๐‘ฐฒ ๐‘ฐณ ๐‘ฐด ๐‘ฐต ๐‘ฐถ ๐‘ฐธ ๐‘ฐน ๐‘ฐบ ๐‘ฐป ๐‘ฐผ ๐‘ฐฝ ๐‘ฐพ ๐‘ฐฟ ๐‘ฑ€ ๐‘ฑ ๐‘ฑ‚ ๐‘ฑƒ ๐‘ฑ„ ๐‘ฑ… ๐‘ฑ ๐‘ฑ‘ ๐‘ฑ’ ๐‘ฑ“ ๐‘ฑ” ๐‘ฑ• ๐‘ฑ– ๐‘ฑ— ๐‘ฑ˜ ๐‘ฑ™ ๐‘ฑš ๐‘ฑ› ๐‘ฑœ ๐‘ฑ ๐‘ฑž ๐‘ฑŸ ๐‘ฑ  ๐‘ฑก ๐‘ฑข ๐‘ฑฃ ๐‘ฑค ๐‘ฑฅ ๐‘ฑฆ ๐‘ฑง ๐‘ฑจ ๐‘ฑฉ ๐‘ฑช ๐‘ฑซ ๐‘ฑฌ diff --git a/sample_texts/und-Dupl_chars.txt b/sample_texts/und-Dupl_chars.txt index d4c1e64a..d991bd2f 100644 --- a/sample_texts/und-Dupl_chars.txt +++ b/sample_texts/und-Dupl_chars.txt @@ -1 +1 @@ -๐›ฐ€ ๐›ฐ ๐›ฐ‚ ๐›ฐƒ ๐›ฐ„ ๐›ฐ… ๐›ฐ† ๐›ฐ‡ ๐›ฐˆ ๐›ฐ‰ ๐›ฐŠ ๐›ฐ‹ ๐›ฐŒ ๐›ฐ ๐›ฐŽ ๐›ฐ ๐›ฐ ๐›ฐ‘ ๐›ฐ’ ๐›ฐ“ ๐›ฐ” ๐›ฐ• ๐›ฐ– ๐›ฐ— ๐›ฐ˜ ๐›ฐ™ ๐›ฐš ๐›ฐ› ๐›ฐœ ๐›ฐ ๐›ฐž ๐›ฐŸ ๐›ฐ  ๐›ฐก ๐›ฐข ๐›ฐฃ ๐›ฐค ๐›ฐฅ ๐›ฐฆ ๐›ฐง ๐›ฐจ ๐›ฐฉ ๐›ฐช ๐›ฐซ ๐›ฐฌ ๐›ฐญ ๐›ฐฎ ๐›ฐฏ ๐›ฐฐ ๐›ฐฑ ๐›ฐฒ ๐›ฐณ ๐›ฐด ๐›ฐต ๐›ฐถ ๐›ฐท ๐›ฐธ ๐›ฐน ๐›ฐบ ๐›ฐป ๐›ฐผ ๐›ฐฝ ๐›ฐพ ๐›ฐฟ ๐›ฑ€ ๐›ฑ ๐›ฑ‚ ๐›ฑƒ ๐›ฑ„ ๐›ฑ… ๐›ฑ† ๐›ฑ‡ ๐›ฑˆ ๐›ฑ‰ ๐›ฑŠ ๐›ฑ‹ ๐›ฑŒ ๐›ฑ ๐›ฑŽ ๐›ฑ ๐›ฑ ๐›ฑ‘ ๐›ฑ’ ๐›ฑ“ ๐›ฑ” ๐›ฑ• ๐›ฑ– ๐›ฑ— ๐›ฑ˜ ๐›ฑ™ ๐›ฑš ๐›ฑ› ๐›ฑœ ๐›ฑ ๐›ฑž ๐›ฑŸ ๐›ฑ  ๐›ฑก ๐›ฑข ๐›ฑฃ ๐›ฑค ๐›ฑฅ ๐›ฑฆ ๐›ฑง ๐›ฑจ ๐›ฑฉ ๐›ฑช ๐›ฑซ ๐›ฑฌ ๐›ฑญ ๐›ฑฎ ๐›ฑฏ ๐›ฑฐ ๐›ฑฑ ๐›ฑฒ ๐›ฑณ ๐›ฑด ๐›ฑต ๐›ฑถ ๐›ฑท ๐›ฑธ ๐›ฑน ๐›ฑบ ๐›ฑป ๐›ฑผ ๐›ฑฝ ๐›ฑพ ๐›ฑฟ ๐›ฒ€ ๐›ฒ ๐›ฒ‚ ๐›ฒƒ ๐›ฒ„ ๐›ฒ… ๐›ฒ† ๐›ฒ‡ ๐›ฒˆ ๐›ฒ‰ ๐›ฒŠ ๐›ฒ‹ ๐›ฒŒ ๐›ฒ ๐›ฒŽ ๐›ฒ ๐›ฒ ๐›ฒ‘ ๐›ฒ’ ๐›ฒ“ ๐›ฒ” ๐›ฒ• ๐›ฒ– ๐›ฒ— ๐›ฒ˜ ๐›ฒ™ ๐›ฒš ๐›ฒ› ๐›ฒœ ๐›ฒ ๐›ฒž ๐›ฒŸ +๐›ฐ€ ๐›ฐ ๐›ฐ‚ ๐›ฐƒ ๐›ฐ„ ๐›ฐ… ๐›ฐ† ๐›ฐ‡ ๐›ฐˆ ๐›ฐ‰ ๐›ฐŠ ๐›ฐ‹ ๐›ฐŒ ๐›ฐ ๐›ฐŽ ๐›ฐ ๐›ฐ ๐›ฐ‘ ๐›ฐ’ ๐›ฐ“ ๐›ฐ” ๐›ฐ• ๐›ฐ– ๐›ฐ— ๐›ฐ˜ ๐›ฐ™ ๐›ฐš ๐›ฐ› ๐›ฐœ ๐›ฐ ๐›ฐž ๐›ฐŸ ๐›ฐ  ๐›ฐก ๐›ฐข ๐›ฐฃ ๐›ฐค ๐›ฐฅ ๐›ฐฆ ๐›ฐง ๐›ฐจ ๐›ฐฉ ๐›ฐช ๐›ฐซ ๐›ฐฌ ๐›ฐญ ๐›ฐฎ ๐›ฐฏ ๐›ฐฐ ๐›ฐฑ ๐›ฐฒ ๐›ฐณ ๐›ฐด ๐›ฐต ๐›ฐถ ๐›ฐท ๐›ฐธ ๐›ฐน ๐›ฐบ ๐›ฐป ๐›ฐผ ๐›ฐฝ ๐›ฐพ ๐›ฐฟ ๐›ฑ€ ๐›ฑ ๐›ฑ‚ ๐›ฑƒ ๐›ฑ„ ๐›ฑ… ๐›ฑ† ๐›ฑ‡ ๐›ฑˆ ๐›ฑ‰ ๐›ฑŠ ๐›ฑ‹ ๐›ฑŒ ๐›ฑ ๐›ฑŽ ๐›ฑ ๐›ฑ ๐›ฑ‘ ๐›ฑ’ ๐›ฑ“ ๐›ฑ” ๐›ฑ• ๐›ฑ– ๐›ฑ— ๐›ฑ˜ ๐›ฑ™ ๐›ฑš ๐›ฑ› ๐›ฑœ ๐›ฑ ๐›ฑž ๐›ฑŸ ๐›ฑ  ๐›ฑก ๐›ฑข ๐›ฑฃ ๐›ฑค ๐›ฑฅ ๐›ฑฆ ๐›ฑง ๐›ฑจ ๐›ฑฉ ๐›ฑช ๐›ฑฐ ๐›ฑฑ ๐›ฑฒ ๐›ฑณ ๐›ฑด ๐›ฑต ๐›ฑถ ๐›ฑท ๐›ฑธ ๐›ฑน ๐›ฑบ ๐›ฑป ๐›ฑผ ๐›ฒ€ ๐›ฒ ๐›ฒ‚ ๐›ฒƒ ๐›ฒ„ ๐›ฒ… ๐›ฒ† ๐›ฒ‡ ๐›ฒˆ ๐›ฒ ๐›ฒ‘ ๐›ฒ’ ๐›ฒ“ ๐›ฒ” ๐›ฒ• ๐›ฒ– ๐›ฒ— ๐›ฒ˜ ๐›ฒ™ ๐›ฒœ ๐›ฒ ๐›ฒž ๐›ฒŸ diff --git a/sample_texts/und-Elba_chars.txt b/sample_texts/und-Elba_chars.txt index 4a3f7bd7..2e6adad9 100644 --- a/sample_texts/und-Elba_chars.txt +++ b/sample_texts/und-Elba_chars.txt @@ -1 +1 @@ -๐”€ ๐” ๐”‚ ๐”ƒ ๐”„ ๐”… ๐”† ๐”‡ ๐”ˆ ๐”‰ ๐”Š ๐”‹ ๐”Œ ๐” ๐”Ž ๐” ๐” ๐”‘ ๐”’ ๐”“ ๐”” ๐”• ๐”– ๐”— ๐”˜ ๐”™ ๐”š ๐”› ๐”œ ๐” ๐”ž ๐”Ÿ ๐”  ๐”ก ๐”ข ๐”ฃ ๐”ค ๐”ฅ ๐”ฆ ๐”ง ๐”จ ๐”ฉ ๐”ช ๐”ซ ๐”ฌ ๐”ญ ๐”ฎ ๐”ฏ +๐”€ ๐” ๐”‚ ๐”ƒ ๐”„ ๐”… ๐”† ๐”‡ ๐”ˆ ๐”‰ ๐”Š ๐”‹ ๐”Œ ๐” ๐”Ž ๐” ๐” ๐”‘ ๐”’ ๐”“ ๐”” ๐”• ๐”– ๐”— ๐”˜ ๐”™ ๐”š ๐”› ๐”œ ๐” ๐”ž ๐”Ÿ ๐”  ๐”ก ๐”ข ๐”ฃ ๐”ค ๐”ฅ ๐”ฆ ๐”ง diff --git a/sample_texts/und-Gran_chars.txt b/sample_texts/und-Gran_chars.txt index 1d373725..039f6a93 100644 --- a/sample_texts/und-Gran_chars.txt +++ b/sample_texts/und-Gran_chars.txt @@ -1 +1 @@ -๐‘Œ€ ๐‘Œ ๐‘Œ‚ ๐‘Œƒ ๐‘Œ„ ๐‘Œ… ๐‘Œ† ๐‘Œ‡ ๐‘Œˆ ๐‘Œ‰ ๐‘ŒŠ ๐‘Œ‹ ๐‘ŒŒ ๐‘Œ ๐‘ŒŽ ๐‘Œ ๐‘Œ ๐‘Œ‘ ๐‘Œ’ ๐‘Œ“ ๐‘Œ” ๐‘Œ• ๐‘Œ– ๐‘Œ— ๐‘Œ˜ ๐‘Œ™ ๐‘Œš ๐‘Œ› ๐‘Œœ ๐‘Œ ๐‘Œž ๐‘ŒŸ ๐‘Œ  ๐‘Œก ๐‘Œข ๐‘Œฃ ๐‘Œค ๐‘Œฅ ๐‘Œฆ ๐‘Œง ๐‘Œจ ๐‘Œฉ ๐‘Œช ๐‘Œซ ๐‘Œฌ ๐‘Œญ ๐‘Œฎ ๐‘Œฏ ๐‘Œฐ ๐‘Œฑ ๐‘Œฒ ๐‘Œณ ๐‘Œด ๐‘Œต ๐‘Œถ ๐‘Œท ๐‘Œธ ๐‘Œน ๐‘Œบ ๐‘Œป ๐‘Œผ ๐‘Œฝ ๐‘Œพ ๐‘Œฟ ๐‘€ ๐‘ ๐‘‚ ๐‘ƒ ๐‘„ ๐‘… ๐‘† ๐‘‡ ๐‘ˆ ๐‘‰ ๐‘Š ๐‘‹ ๐‘Œ ๐‘ ๐‘Ž ๐‘ ๐‘ ๐‘‘ ๐‘’ ๐‘“ ๐‘” ๐‘• ๐‘– ๐‘— ๐‘˜ ๐‘™ ๐‘š ๐‘› ๐‘œ ๐‘ ๐‘ž ๐‘Ÿ ๐‘  ๐‘ก ๐‘ข ๐‘ฃ ๐‘ค ๐‘ฅ ๐‘ฆ ๐‘ง ๐‘จ ๐‘ฉ ๐‘ช ๐‘ซ ๐‘ฌ ๐‘ญ ๐‘ฎ ๐‘ฏ ๐‘ฐ ๐‘ฑ ๐‘ฒ ๐‘ณ ๐‘ด ๐‘ต ๐‘ถ ๐‘ท ๐‘ธ ๐‘น ๐‘บ ๐‘ป ๐‘ผ ๐‘ฝ ๐‘พ ๐‘ฟ +๐‘Œ€ ๐‘Œ ๐‘Œ‚ ๐‘Œƒ ๐‘Œ… ๐‘Œ† ๐‘Œ‡ ๐‘Œˆ ๐‘Œ‰ ๐‘ŒŠ ๐‘Œ‹ ๐‘ŒŒ ๐‘Œ ๐‘Œ ๐‘Œ“ ๐‘Œ” ๐‘Œ• ๐‘Œ– ๐‘Œ— ๐‘Œ˜ ๐‘Œ™ ๐‘Œš ๐‘Œ› ๐‘Œœ ๐‘Œ ๐‘Œž ๐‘ŒŸ ๐‘Œ  ๐‘Œก ๐‘Œข ๐‘Œฃ ๐‘Œค ๐‘Œฅ ๐‘Œฆ ๐‘Œง ๐‘Œจ ๐‘Œฒ ๐‘Œณ ๐‘Œต ๐‘Œถ ๐‘Œท ๐‘Œธ ๐‘Œน ๐‘Œป ๐‘Œผ ๐‘Œฝ ๐‘Œพ ๐‘Œฟ ๐‘€ ๐‘ ๐‘‚ ๐‘ƒ ๐‘„ ๐‘‡ ๐‘ˆ ๐‘‹ ๐‘Œ ๐‘ ๐‘ ๐‘— ๐‘ ๐‘ž ๐‘Ÿ ๐‘  ๐‘ก ๐‘ข ๐‘ฃ ๐‘ฆ ๐‘ง ๐‘จ ๐‘ฉ ๐‘ช ๐‘ซ ๐‘ฌ ๐‘ฐ ๐‘ฑ ๐‘ฒ ๐‘ณ ๐‘ด diff --git a/sample_texts/und-Hatr_chars.txt b/sample_texts/und-Hatr_chars.txt index 1da514f2..9fca996e 100644 --- a/sample_texts/und-Hatr_chars.txt +++ b/sample_texts/und-Hatr_chars.txt @@ -1 +1 @@ -๐ฃ  ๐ฃก ๐ฃข ๐ฃฃ ๐ฃค ๐ฃฅ ๐ฃฆ ๐ฃง ๐ฃจ ๐ฃฉ ๐ฃช ๐ฃซ ๐ฃฌ ๐ฃญ ๐ฃฎ ๐ฃฏ ๐ฃฐ ๐ฃฑ ๐ฃฒ ๐ฃณ ๐ฃด ๐ฃต ๐ฃถ ๐ฃท ๐ฃธ ๐ฃน ๐ฃบ ๐ฃป ๐ฃผ ๐ฃฝ ๐ฃพ ๐ฃฟ +๐ฃ  ๐ฃก ๐ฃข ๐ฃฃ ๐ฃค ๐ฃฅ ๐ฃฆ ๐ฃง ๐ฃจ ๐ฃฉ ๐ฃช ๐ฃซ ๐ฃฌ ๐ฃญ ๐ฃฎ ๐ฃฏ ๐ฃฐ ๐ฃฑ ๐ฃฒ ๐ฃด ๐ฃต ๐ฃป ๐ฃผ ๐ฃฝ ๐ฃพ ๐ฃฟ diff --git a/sample_texts/und-Hmng_chars.txt b/sample_texts/und-Hmng_chars.txt index 7a9441f2..c60daccb 100644 --- a/sample_texts/und-Hmng_chars.txt +++ b/sample_texts/und-Hmng_chars.txt @@ -1 +1 @@ -๐–ฌ€ ๐–ฌ ๐–ฌ‚ ๐–ฌƒ ๐–ฌ„ ๐–ฌ… ๐–ฌ† ๐–ฌ‡ ๐–ฌˆ ๐–ฌ‰ ๐–ฌŠ ๐–ฌ‹ ๐–ฌŒ ๐–ฌ ๐–ฌŽ ๐–ฌ ๐–ฌ ๐–ฌ‘ ๐–ฌ’ ๐–ฌ“ ๐–ฌ” ๐–ฌ• ๐–ฌ– ๐–ฌ— ๐–ฌ˜ ๐–ฌ™ ๐–ฌš ๐–ฌ› ๐–ฌœ ๐–ฌ ๐–ฌž ๐–ฌŸ ๐–ฌ  ๐–ฌก ๐–ฌข ๐–ฌฃ ๐–ฌค ๐–ฌฅ ๐–ฌฆ ๐–ฌง ๐–ฌจ ๐–ฌฉ ๐–ฌช ๐–ฌซ ๐–ฌฌ ๐–ฌญ ๐–ฌฎ ๐–ฌฏ ๐–ฌฐ ๐–ฌฑ ๐–ฌฒ ๐–ฌณ ๐–ฌด ๐–ฌต ๐–ฌถ ๐–ฌท ๐–ฌธ ๐–ฌน ๐–ฌบ ๐–ฌป ๐–ฌผ ๐–ฌฝ ๐–ฌพ ๐–ฌฟ ๐–ญ€ ๐–ญ ๐–ญ‚ ๐–ญƒ ๐–ญ„ ๐–ญ… ๐–ญ† ๐–ญ‡ ๐–ญˆ ๐–ญ‰ ๐–ญŠ ๐–ญ‹ ๐–ญŒ ๐–ญ ๐–ญŽ ๐–ญ ๐–ญ ๐–ญ‘ ๐–ญ’ ๐–ญ“ ๐–ญ” ๐–ญ• ๐–ญ– ๐–ญ— ๐–ญ˜ ๐–ญ™ ๐–ญš ๐–ญ› ๐–ญœ ๐–ญ ๐–ญž ๐–ญŸ ๐–ญ  ๐–ญก ๐–ญข ๐–ญฃ ๐–ญค ๐–ญฅ ๐–ญฆ ๐–ญง ๐–ญจ ๐–ญฉ ๐–ญช ๐–ญซ ๐–ญฌ ๐–ญญ ๐–ญฎ ๐–ญฏ ๐–ญฐ ๐–ญฑ ๐–ญฒ ๐–ญณ ๐–ญด ๐–ญต ๐–ญถ ๐–ญท ๐–ญธ ๐–ญน ๐–ญบ ๐–ญป ๐–ญผ ๐–ญฝ ๐–ญพ ๐–ญฟ ๐–ฎ€ ๐–ฎ ๐–ฎ‚ ๐–ฎƒ ๐–ฎ„ ๐–ฎ… ๐–ฎ† ๐–ฎ‡ ๐–ฎˆ ๐–ฎ‰ ๐–ฎŠ ๐–ฎ‹ ๐–ฎŒ ๐–ฎ ๐–ฎŽ ๐–ฎ +๐–ฌ€ ๐–ฌ ๐–ฌ‚ ๐–ฌƒ ๐–ฌ„ ๐–ฌ… ๐–ฌ† ๐–ฌ‡ ๐–ฌˆ ๐–ฌ‰ ๐–ฌŠ ๐–ฌ‹ ๐–ฌŒ ๐–ฌ ๐–ฌŽ ๐–ฌ ๐–ฌ ๐–ฌ‘ ๐–ฌ’ ๐–ฌ“ ๐–ฌ” ๐–ฌ• ๐–ฌ– ๐–ฌ— ๐–ฌ˜ ๐–ฌ™ ๐–ฌš ๐–ฌ› ๐–ฌœ ๐–ฌ ๐–ฌž ๐–ฌŸ ๐–ฌ  ๐–ฌก ๐–ฌข ๐–ฌฃ ๐–ฌค ๐–ฌฅ ๐–ฌฆ ๐–ฌง ๐–ฌจ ๐–ฌฉ ๐–ฌช ๐–ฌซ ๐–ฌฌ ๐–ฌญ ๐–ฌฎ ๐–ฌฏ ๐–ฌฐ ๐–ฌฑ ๐–ฌฒ ๐–ฌณ ๐–ฌด ๐–ฌต ๐–ฌถ ๐–ฌท ๐–ฌธ ๐–ฌน ๐–ฌบ ๐–ฌป ๐–ฌผ ๐–ฌฝ ๐–ฌพ ๐–ฌฟ ๐–ญ€ ๐–ญ ๐–ญ‚ ๐–ญƒ ๐–ญ„ ๐–ญ… ๐–ญ ๐–ญ‘ ๐–ญ’ ๐–ญ“ ๐–ญ” ๐–ญ• ๐–ญ– ๐–ญ— ๐–ญ˜ ๐–ญ™ ๐–ญ› ๐–ญœ ๐–ญ ๐–ญž ๐–ญŸ ๐–ญ  ๐–ญก ๐–ญฃ ๐–ญค ๐–ญฅ ๐–ญฆ ๐–ญง ๐–ญจ ๐–ญฉ ๐–ญช ๐–ญซ ๐–ญฌ ๐–ญญ ๐–ญฎ ๐–ญฏ ๐–ญฐ ๐–ญฑ ๐–ญฒ ๐–ญณ ๐–ญด ๐–ญต ๐–ญถ ๐–ญท ๐–ญฝ ๐–ญพ ๐–ญฟ ๐–ฎ€ ๐–ฎ ๐–ฎ‚ ๐–ฎƒ ๐–ฎ„ ๐–ฎ… ๐–ฎ† ๐–ฎ‡ ๐–ฎˆ ๐–ฎ‰ ๐–ฎŠ ๐–ฎ‹ ๐–ฎŒ ๐–ฎ ๐–ฎŽ ๐–ฎ diff --git a/sample_texts/und-Hung_chars.txt b/sample_texts/und-Hung_chars.txt index 5c5e6227..f6be23e3 100644 --- a/sample_texts/und-Hung_chars.txt +++ b/sample_texts/und-Hung_chars.txt @@ -1 +1 @@ -๐ฒ€ ๐ฒ ๐ฒ‚ ๐ฒƒ ๐ฒ„ ๐ฒ… ๐ฒ† ๐ฒ‡ ๐ฒˆ ๐ฒ‰ ๐ฒŠ ๐ฒ‹ ๐ฒŒ ๐ฒ ๐ฒŽ ๐ฒ ๐ฒ ๐ฒ‘ ๐ฒ’ ๐ฒ“ ๐ฒ” ๐ฒ• ๐ฒ– ๐ฒ— ๐ฒ˜ ๐ฒ™ ๐ฒš ๐ฒ› ๐ฒœ ๐ฒ ๐ฒž ๐ฒŸ ๐ฒ  ๐ฒก ๐ฒข ๐ฒฃ ๐ฒค ๐ฒฅ ๐ฒฆ ๐ฒง ๐ฒจ ๐ฒฉ ๐ฒช ๐ฒซ ๐ฒฌ ๐ฒญ ๐ฒฎ ๐ฒฏ ๐ฒฐ ๐ฒฑ ๐ฒฒ ๐ฒณ ๐ฒด ๐ฒต ๐ฒถ ๐ฒท ๐ฒธ ๐ฒน ๐ฒบ ๐ฒป ๐ฒผ ๐ฒฝ ๐ฒพ ๐ฒฟ ๐ณ€ ๐ณ ๐ณ‚ ๐ณƒ ๐ณ„ ๐ณ… ๐ณ† ๐ณ‡ ๐ณˆ ๐ณ‰ ๐ณŠ ๐ณ‹ ๐ณŒ ๐ณ ๐ณŽ ๐ณ ๐ณ ๐ณ‘ ๐ณ’ ๐ณ“ ๐ณ” ๐ณ• ๐ณ– ๐ณ— ๐ณ˜ ๐ณ™ ๐ณš ๐ณ› ๐ณœ ๐ณ ๐ณž ๐ณŸ ๐ณ  ๐ณก ๐ณข ๐ณฃ ๐ณค ๐ณฅ ๐ณฆ ๐ณง ๐ณจ ๐ณฉ ๐ณช ๐ณซ ๐ณฌ ๐ณญ ๐ณฎ ๐ณฏ ๐ณฐ ๐ณฑ ๐ณฒ ๐ณณ ๐ณด ๐ณต ๐ณถ ๐ณท ๐ณธ ๐ณน ๐ณบ ๐ณป ๐ณผ ๐ณฝ ๐ณพ ๐ณฟ +๐ฒ€ ๐ฒ ๐ฒ‚ ๐ฒƒ ๐ฒ„ ๐ฒ… ๐ฒ† ๐ฒ‡ ๐ฒˆ ๐ฒ‰ ๐ฒŠ ๐ฒ‹ ๐ฒŒ ๐ฒ ๐ฒŽ ๐ฒ ๐ฒ ๐ฒ‘ ๐ฒ’ ๐ฒ“ ๐ฒ” ๐ฒ• ๐ฒ– ๐ฒ— ๐ฒ˜ ๐ฒ™ ๐ฒš ๐ฒ› ๐ฒœ ๐ฒ ๐ฒž ๐ฒŸ ๐ฒ  ๐ฒก ๐ฒข ๐ฒฃ ๐ฒค ๐ฒฅ ๐ฒฆ ๐ฒง ๐ฒจ ๐ฒฉ ๐ฒช ๐ฒซ ๐ฒฌ ๐ฒญ ๐ฒฎ ๐ฒฏ ๐ฒฐ ๐ฒฑ ๐ฒฒ ๐ณ€ ๐ณ ๐ณ‚ ๐ณƒ ๐ณ„ ๐ณ… ๐ณ† ๐ณ‡ ๐ณˆ ๐ณ‰ ๐ณŠ ๐ณ‹ ๐ณŒ ๐ณ ๐ณŽ ๐ณ ๐ณ ๐ณ‘ ๐ณ’ ๐ณ“ ๐ณ” ๐ณ• ๐ณ– ๐ณ— ๐ณ˜ ๐ณ™ ๐ณš ๐ณ› ๐ณœ ๐ณ ๐ณž ๐ณŸ ๐ณ  ๐ณก ๐ณข ๐ณฃ ๐ณค ๐ณฅ ๐ณฆ ๐ณง ๐ณจ ๐ณฉ ๐ณช ๐ณซ ๐ณฌ ๐ณญ ๐ณฎ ๐ณฏ ๐ณฐ ๐ณฑ ๐ณฒ ๐ณบ ๐ณป ๐ณผ ๐ณฝ ๐ณพ ๐ณฟ diff --git a/sample_texts/und-Lina_chars.txt b/sample_texts/und-Lina_chars.txt index efaf7803..b71390c5 100644 --- a/sample_texts/und-Lina_chars.txt +++ b/sample_texts/und-Lina_chars.txt @@ -1 +1 @@ -๐˜€ ๐˜ ๐˜‚ ๐˜ƒ ๐˜„ ๐˜… ๐˜† ๐˜‡ ๐˜ˆ ๐˜‰ ๐˜Š ๐˜‹ ๐˜Œ ๐˜ ๐˜Ž ๐˜ ๐˜ ๐˜‘ ๐˜’ ๐˜“ ๐˜” ๐˜• ๐˜– ๐˜— ๐˜˜ ๐˜™ ๐˜š ๐˜› ๐˜œ ๐˜ ๐˜ž ๐˜Ÿ ๐˜  ๐˜ก ๐˜ข ๐˜ฃ ๐˜ค ๐˜ฅ ๐˜ฆ ๐˜ง ๐˜จ ๐˜ฉ ๐˜ช ๐˜ซ ๐˜ฌ ๐˜ญ ๐˜ฎ ๐˜ฏ ๐˜ฐ ๐˜ฑ ๐˜ฒ ๐˜ณ ๐˜ด ๐˜ต ๐˜ถ ๐˜ท ๐˜ธ ๐˜น ๐˜บ ๐˜ป ๐˜ผ ๐˜ฝ ๐˜พ ๐˜ฟ ๐™€ ๐™ ๐™‚ ๐™ƒ ๐™„ ๐™… ๐™† ๐™‡ ๐™ˆ ๐™‰ ๐™Š ๐™‹ ๐™Œ ๐™ ๐™Ž ๐™ ๐™ ๐™‘ ๐™’ ๐™“ ๐™” ๐™• ๐™– ๐™— ๐™˜ ๐™™ ๐™š ๐™› ๐™œ ๐™ ๐™ž ๐™Ÿ ๐™  ๐™ก ๐™ข ๐™ฃ ๐™ค ๐™ฅ ๐™ฆ ๐™ง ๐™จ ๐™ฉ ๐™ช ๐™ซ ๐™ฌ ๐™ญ ๐™ฎ ๐™ฏ ๐™ฐ ๐™ฑ ๐™ฒ ๐™ณ ๐™ด ๐™ต ๐™ถ ๐™ท ๐™ธ ๐™น ๐™บ ๐™ป ๐™ผ ๐™ฝ ๐™พ ๐™ฟ ๐š€ ๐š ๐š‚ ๐šƒ ๐š„ ๐š… ๐š† ๐š‡ ๐šˆ ๐š‰ ๐šŠ ๐š‹ ๐šŒ ๐š ๐šŽ ๐š ๐š ๐š‘ ๐š’ ๐š“ ๐š” ๐š• ๐š– ๐š— ๐š˜ ๐š™ ๐šš ๐š› ๐šœ ๐š ๐šž ๐šŸ ๐š  ๐šก ๐šข ๐šฃ ๐šค ๐šฅ ๐šฆ ๐šง ๐šจ ๐šฉ ๐šช ๐šซ ๐šฌ ๐šญ ๐šฎ ๐šฏ ๐šฐ ๐šฑ ๐šฒ ๐šณ ๐šด ๐šต ๐šถ ๐šท ๐šธ ๐šน ๐šบ ๐šป ๐šผ ๐šฝ ๐šพ ๐šฟ ๐›€ ๐› ๐›‚ ๐›ƒ ๐›„ ๐›… ๐›† ๐›‡ ๐›ˆ ๐›‰ ๐›Š ๐›‹ ๐›Œ ๐› ๐›Ž ๐› ๐› ๐›‘ ๐›’ ๐›“ ๐›” ๐›• ๐›– ๐›— ๐›˜ ๐›™ ๐›š ๐›› ๐›œ ๐› ๐›ž ๐›Ÿ ๐›  ๐›ก ๐›ข ๐›ฃ ๐›ค ๐›ฅ ๐›ฆ ๐›ง ๐›จ ๐›ฉ ๐›ช ๐›ซ ๐›ฌ ๐›ญ ๐›ฎ ๐›ฏ ๐›ฐ ๐›ฑ ๐›ฒ ๐›ณ ๐›ด ๐›ต ๐›ถ ๐›ท ๐›ธ ๐›น ๐›บ ๐›ป ๐›ผ ๐›ฝ ๐›พ ๐›ฟ ๐œ€ ๐œ ๐œ‚ ๐œƒ ๐œ„ ๐œ… ๐œ† ๐œ‡ ๐œˆ ๐œ‰ ๐œŠ ๐œ‹ ๐œŒ ๐œ ๐œŽ ๐œ ๐œ ๐œ‘ ๐œ’ ๐œ“ ๐œ” ๐œ• ๐œ– ๐œ— ๐œ˜ ๐œ™ ๐œš ๐œ› ๐œœ ๐œ ๐œž ๐œŸ ๐œ  ๐œก ๐œข ๐œฃ ๐œค ๐œฅ ๐œฆ ๐œง ๐œจ ๐œฉ ๐œช ๐œซ ๐œฌ ๐œญ ๐œฎ ๐œฏ ๐œฐ ๐œฑ ๐œฒ ๐œณ ๐œด ๐œต ๐œถ ๐œท ๐œธ ๐œน ๐œบ ๐œป ๐œผ ๐œฝ ๐œพ ๐œฟ ๐€ ๐ ๐‚ ๐ƒ ๐„ ๐… ๐† ๐‡ ๐ˆ ๐‰ ๐Š ๐‹ ๐Œ ๐ ๐Ž ๐ ๐ ๐‘ ๐’ ๐“ ๐” ๐• ๐– ๐— ๐˜ ๐™ ๐š ๐› ๐œ ๐ ๐ž ๐Ÿ ๐  ๐ก ๐ข ๐ฃ ๐ค ๐ฅ ๐ฆ ๐ง ๐จ ๐ฉ ๐ช ๐ซ ๐ฌ ๐ญ ๐ฎ ๐ฏ ๐ฐ ๐ฑ ๐ฒ ๐ณ ๐ด ๐ต ๐ถ ๐ท ๐ธ ๐น ๐บ ๐ป ๐ผ ๐ฝ ๐พ ๐ฟ +๐˜€ ๐˜ ๐˜‚ ๐˜ƒ ๐˜„ ๐˜… ๐˜† ๐˜‡ ๐˜ˆ ๐˜‰ ๐˜Š ๐˜‹ ๐˜Œ ๐˜ ๐˜Ž ๐˜ ๐˜ ๐˜‘ ๐˜’ ๐˜“ ๐˜” ๐˜• ๐˜– ๐˜— ๐˜˜ ๐˜™ ๐˜š ๐˜› ๐˜œ ๐˜ ๐˜ž ๐˜Ÿ ๐˜  ๐˜ก ๐˜ข ๐˜ฃ ๐˜ค ๐˜ฅ ๐˜ฆ ๐˜ง ๐˜จ ๐˜ฉ ๐˜ช ๐˜ซ ๐˜ฌ ๐˜ญ ๐˜ฎ ๐˜ฏ ๐˜ฐ ๐˜ฑ ๐˜ฒ ๐˜ณ ๐˜ด ๐˜ต ๐˜ถ ๐˜ท ๐˜ธ ๐˜น ๐˜บ ๐˜ป ๐˜ผ ๐˜ฝ ๐˜พ ๐˜ฟ ๐™€ ๐™ ๐™‚ ๐™ƒ ๐™„ ๐™… ๐™† ๐™‡ ๐™ˆ ๐™‰ ๐™Š ๐™‹ ๐™Œ ๐™ ๐™Ž ๐™ ๐™ ๐™‘ ๐™’ ๐™“ ๐™” ๐™• ๐™– ๐™— ๐™˜ ๐™™ ๐™š ๐™› ๐™œ ๐™ ๐™ž ๐™Ÿ ๐™  ๐™ก ๐™ข ๐™ฃ ๐™ค ๐™ฅ ๐™ฆ ๐™ง ๐™จ ๐™ฉ ๐™ช ๐™ซ ๐™ฌ ๐™ญ ๐™ฎ ๐™ฏ ๐™ฐ ๐™ฑ ๐™ฒ ๐™ณ ๐™ด ๐™ต ๐™ถ ๐™ท ๐™ธ ๐™น ๐™บ ๐™ป ๐™ผ ๐™ฝ ๐™พ ๐™ฟ ๐š€ ๐š ๐š‚ ๐šƒ ๐š„ ๐š… ๐š† ๐š‡ ๐šˆ ๐š‰ ๐šŠ ๐š‹ ๐šŒ ๐š ๐šŽ ๐š ๐š ๐š‘ ๐š’ ๐š“ ๐š” ๐š• ๐š– ๐š— ๐š˜ ๐š™ ๐šš ๐š› ๐šœ ๐š ๐šž ๐šŸ ๐š  ๐šก ๐šข ๐šฃ ๐šค ๐šฅ ๐šฆ ๐šง ๐šจ ๐šฉ ๐šช ๐šซ ๐šฌ ๐šญ ๐šฎ ๐šฏ ๐šฐ ๐šฑ ๐šฒ ๐šณ ๐šด ๐šต ๐šถ ๐šท ๐šธ ๐šน ๐šบ ๐šป ๐šผ ๐šฝ ๐šพ ๐šฟ ๐›€ ๐› ๐›‚ ๐›ƒ ๐›„ ๐›… ๐›† ๐›‡ ๐›ˆ ๐›‰ ๐›Š ๐›‹ ๐›Œ ๐› ๐›Ž ๐› ๐› ๐›‘ ๐›’ ๐›“ ๐›” ๐›• ๐›– ๐›— ๐›˜ ๐›™ ๐›š ๐›› ๐›œ ๐› ๐›ž ๐›Ÿ ๐›  ๐›ก ๐›ข ๐›ฃ ๐›ค ๐›ฅ ๐›ฆ ๐›ง ๐›จ ๐›ฉ ๐›ช ๐›ซ ๐›ฌ ๐›ญ ๐›ฎ ๐›ฏ ๐›ฐ ๐›ฑ ๐›ฒ ๐›ณ ๐›ด ๐›ต ๐›ถ ๐›ท ๐›ธ ๐›น ๐›บ ๐›ป ๐›ผ ๐›ฝ ๐›พ ๐›ฟ ๐œ€ ๐œ ๐œ‚ ๐œƒ ๐œ„ ๐œ… ๐œ† ๐œ‡ ๐œˆ ๐œ‰ ๐œŠ ๐œ‹ ๐œŒ ๐œ ๐œŽ ๐œ ๐œ ๐œ‘ ๐œ’ ๐œ“ ๐œ” ๐œ• ๐œ– ๐œ— ๐œ˜ ๐œ™ ๐œš ๐œ› ๐œœ ๐œ ๐œž ๐œŸ ๐œ  ๐œก ๐œข ๐œฃ ๐œค ๐œฅ ๐œฆ ๐œง ๐œจ ๐œฉ ๐œช ๐œซ ๐œฌ ๐œญ ๐œฎ ๐œฏ ๐œฐ ๐œฑ ๐œฒ ๐œณ ๐œด ๐œต ๐œถ ๐€ ๐ ๐‚ ๐ƒ ๐„ ๐… ๐† ๐‡ ๐ˆ ๐‰ ๐Š ๐‹ ๐Œ ๐ ๐Ž ๐ ๐ ๐‘ ๐’ ๐“ ๐” ๐• ๐  ๐ก ๐ข ๐ฃ ๐ค ๐ฅ ๐ฆ ๐ง diff --git a/sample_texts/und-Mahj_chars.txt b/sample_texts/und-Mahj_chars.txt index 06f05d34..4e329030 100644 --- a/sample_texts/und-Mahj_chars.txt +++ b/sample_texts/und-Mahj_chars.txt @@ -1 +1 @@ -๐‘… ๐‘…‘ ๐‘…’ ๐‘…“ ๐‘…” ๐‘…• ๐‘…– ๐‘…— ๐‘…˜ ๐‘…™ ๐‘…š ๐‘…› ๐‘…œ ๐‘… ๐‘…ž ๐‘…Ÿ ๐‘…  ๐‘…ก ๐‘…ข ๐‘…ฃ ๐‘…ค ๐‘…ฅ ๐‘…ฆ ๐‘…ง ๐‘…จ ๐‘…ฉ ๐‘…ช ๐‘…ซ ๐‘…ฌ ๐‘…ญ ๐‘…ฎ ๐‘…ฏ ๐‘…ฐ ๐‘…ฑ ๐‘…ฒ ๐‘…ณ ๐‘…ด ๐‘…ต ๐‘…ถ ๐‘…ท ๐‘…ธ ๐‘…น ๐‘…บ ๐‘…ป ๐‘…ผ ๐‘…ฝ ๐‘…พ ๐‘…ฟ +๐‘… ๐‘…‘ ๐‘…’ ๐‘…“ ๐‘…” ๐‘…• ๐‘…– ๐‘…— ๐‘…˜ ๐‘…™ ๐‘…š ๐‘…› ๐‘…œ ๐‘… ๐‘…ž ๐‘…Ÿ ๐‘…  ๐‘…ก ๐‘…ข ๐‘…ฃ ๐‘…ค ๐‘…ฅ ๐‘…ฆ ๐‘…ง ๐‘…จ ๐‘…ฉ ๐‘…ช ๐‘…ซ ๐‘…ฌ ๐‘…ญ ๐‘…ฎ ๐‘…ฏ ๐‘…ฐ ๐‘…ฑ ๐‘…ฒ ๐‘…ณ ๐‘…ด ๐‘…ต ๐‘…ถ diff --git a/sample_texts/und-Mani_chars.txt b/sample_texts/und-Mani_chars.txt index 828cdc8a..88473d5d 100644 --- a/sample_texts/und-Mani_chars.txt +++ b/sample_texts/und-Mani_chars.txt @@ -1 +1 @@ -๐ซ€ ๐ซ ๐ซ‚ ๐ซƒ ๐ซ„ ๐ซ… ๐ซ† ๐ซ‡ ๐ซˆ ๐ซ‰ ๐ซŠ ๐ซ‹ ๐ซŒ ๐ซ ๐ซŽ ๐ซ ๐ซ ๐ซ‘ ๐ซ’ ๐ซ“ ๐ซ” ๐ซ• ๐ซ– ๐ซ— ๐ซ˜ ๐ซ™ ๐ซš ๐ซ› ๐ซœ ๐ซ ๐ซž ๐ซŸ ๐ซ  ๐ซก ๐ซข ๐ซฃ ๐ซค ๐ซฅ ๐ซฆ ๐ซง ๐ซจ ๐ซฉ ๐ซช ๐ซซ ๐ซฌ ๐ซญ ๐ซฎ ๐ซฏ ๐ซฐ ๐ซฑ ๐ซฒ ๐ซณ ๐ซด ๐ซต ๐ซถ ๐ซท ๐ซธ ๐ซน ๐ซบ ๐ซป ๐ซผ ๐ซฝ ๐ซพ ๐ซฟ +๐ซ€ ๐ซ ๐ซ‚ ๐ซƒ ๐ซ„ ๐ซ… ๐ซ† ๐ซ‡ ๐ซˆ ๐ซ‰ ๐ซŠ ๐ซ‹ ๐ซŒ ๐ซ ๐ซŽ ๐ซ ๐ซ ๐ซ‘ ๐ซ’ ๐ซ“ ๐ซ” ๐ซ• ๐ซ– ๐ซ— ๐ซ˜ ๐ซ™ ๐ซš ๐ซ› ๐ซœ ๐ซ ๐ซž ๐ซŸ ๐ซ  ๐ซก ๐ซข ๐ซฃ ๐ซค ๐ซฅ ๐ซฆ ๐ซซ ๐ซฌ ๐ซญ ๐ซฎ ๐ซฏ ๐ซฐ ๐ซฑ ๐ซฒ ๐ซณ ๐ซด ๐ซต ๐ซถ diff --git a/sample_texts/und-Marc_chars.txt b/sample_texts/und-Marc_chars.txt index 89f36901..814c7da4 100644 --- a/sample_texts/und-Marc_chars.txt +++ b/sample_texts/und-Marc_chars.txt @@ -1 +1 @@ -๐‘ฑฐ ๐‘ฑฑ ๐‘ฑฒ ๐‘ฑณ ๐‘ฑด ๐‘ฑต ๐‘ฑถ ๐‘ฑท ๐‘ฑธ ๐‘ฑน ๐‘ฑบ ๐‘ฑป ๐‘ฑผ ๐‘ฑฝ ๐‘ฑพ ๐‘ฑฟ ๐‘ฒ€ ๐‘ฒ ๐‘ฒ‚ ๐‘ฒƒ ๐‘ฒ„ ๐‘ฒ… ๐‘ฒ† ๐‘ฒ‡ ๐‘ฒˆ ๐‘ฒ‰ ๐‘ฒŠ ๐‘ฒ‹ ๐‘ฒŒ ๐‘ฒ ๐‘ฒŽ ๐‘ฒ ๐‘ฒ ๐‘ฒ‘ ๐‘ฒ’ ๐‘ฒ“ ๐‘ฒ” ๐‘ฒ• ๐‘ฒ– ๐‘ฒ— ๐‘ฒ˜ ๐‘ฒ™ ๐‘ฒš ๐‘ฒ› ๐‘ฒœ ๐‘ฒ ๐‘ฒž ๐‘ฒŸ ๐‘ฒ  ๐‘ฒก ๐‘ฒข ๐‘ฒฃ ๐‘ฒค ๐‘ฒฅ ๐‘ฒฆ ๐‘ฒง ๐‘ฒจ ๐‘ฒฉ ๐‘ฒช ๐‘ฒซ ๐‘ฒฌ ๐‘ฒญ ๐‘ฒฎ ๐‘ฒฏ ๐‘ฒฐ ๐‘ฒฑ ๐‘ฒฒ ๐‘ฒณ ๐‘ฒด ๐‘ฒต ๐‘ฒถ ๐‘ฒท ๐‘ฒธ ๐‘ฒน ๐‘ฒบ ๐‘ฒป ๐‘ฒผ ๐‘ฒฝ ๐‘ฒพ ๐‘ฒฟ +๐‘ฑฐ ๐‘ฑฑ ๐‘ฑฒ ๐‘ฑณ ๐‘ฑด ๐‘ฑต ๐‘ฑถ ๐‘ฑท ๐‘ฑธ ๐‘ฑน ๐‘ฑบ ๐‘ฑป ๐‘ฑผ ๐‘ฑฝ ๐‘ฑพ ๐‘ฑฟ ๐‘ฒ€ ๐‘ฒ ๐‘ฒ‚ ๐‘ฒƒ ๐‘ฒ„ ๐‘ฒ… ๐‘ฒ† ๐‘ฒ‡ ๐‘ฒˆ ๐‘ฒ‰ ๐‘ฒŠ ๐‘ฒ‹ ๐‘ฒŒ ๐‘ฒ ๐‘ฒŽ ๐‘ฒ ๐‘ฒ’ ๐‘ฒ“ ๐‘ฒ” ๐‘ฒ• ๐‘ฒ– ๐‘ฒ— ๐‘ฒ˜ ๐‘ฒ™ ๐‘ฒš ๐‘ฒ› ๐‘ฒœ ๐‘ฒ ๐‘ฒž ๐‘ฒŸ ๐‘ฒ  ๐‘ฒก ๐‘ฒข ๐‘ฒฃ ๐‘ฒค ๐‘ฒฅ ๐‘ฒฆ ๐‘ฒง ๐‘ฒฉ ๐‘ฒช ๐‘ฒซ ๐‘ฒฌ ๐‘ฒญ ๐‘ฒฎ ๐‘ฒฏ ๐‘ฒฐ ๐‘ฒฑ ๐‘ฒฒ ๐‘ฒณ ๐‘ฒด ๐‘ฒต ๐‘ฒถ diff --git a/sample_texts/und-Mend_chars.txt b/sample_texts/und-Mend_chars.txt index f939f11e..7e8b0f89 100644 --- a/sample_texts/und-Mend_chars.txt +++ b/sample_texts/und-Mend_chars.txt @@ -1 +1 @@ -๐ž € ๐ž  ๐ž ‚ ๐ž ƒ ๐ž „ ๐ž … ๐ž † ๐ž ‡ ๐ž ˆ ๐ž ‰ ๐ž Š ๐ž ‹ ๐ž Œ ๐ž  ๐ž Ž ๐ž  ๐ž  ๐ž ‘ ๐ž ’ ๐ž “ ๐ž ” ๐ž • ๐ž – ๐ž — ๐ž ˜ ๐ž ™ ๐ž š ๐ž › ๐ž œ ๐ž  ๐ž ž ๐ž Ÿ ๐ž   ๐ž ก ๐ž ข ๐ž ฃ ๐ž ค ๐ž ฅ ๐ž ฆ ๐ž ง ๐ž จ ๐ž ฉ ๐ž ช ๐ž ซ ๐ž ฌ ๐ž ญ ๐ž ฎ ๐ž ฏ ๐ž ฐ ๐ž ฑ ๐ž ฒ ๐ž ณ ๐ž ด ๐ž ต ๐ž ถ ๐ž ท ๐ž ธ ๐ž น ๐ž บ ๐ž ป ๐ž ผ ๐ž ฝ ๐ž พ ๐ž ฟ ๐žก€ ๐žก ๐žก‚ ๐žกƒ ๐žก„ ๐žก… ๐žก† ๐žก‡ ๐žกˆ ๐žก‰ ๐žกŠ ๐žก‹ ๐žกŒ ๐žก ๐žกŽ ๐žก ๐žก ๐žก‘ ๐žก’ ๐žก“ ๐žก” ๐žก• ๐žก– ๐žก— ๐žก˜ ๐žก™ ๐žกš ๐žก› ๐žกœ ๐žก ๐žกž ๐žกŸ ๐žก  ๐žกก ๐žกข ๐žกฃ ๐žกค ๐žกฅ ๐žกฆ ๐žกง ๐žกจ ๐žกฉ ๐žกช ๐žกซ ๐žกฌ ๐žกญ ๐žกฎ ๐žกฏ ๐žกฐ ๐žกฑ ๐žกฒ ๐žกณ ๐žกด ๐žกต ๐žกถ ๐žกท ๐žกธ ๐žกน ๐žกบ ๐žกป ๐žกผ ๐žกฝ ๐žกพ ๐žกฟ ๐žข€ ๐žข ๐žข‚ ๐žขƒ ๐žข„ ๐žข… ๐žข† ๐žข‡ ๐žขˆ ๐žข‰ ๐žขŠ ๐žข‹ ๐žขŒ ๐žข ๐žขŽ ๐žข ๐žข ๐žข‘ ๐žข’ ๐žข“ ๐žข” ๐žข• ๐žข– ๐žข— ๐žข˜ ๐žข™ ๐žขš ๐žข› ๐žขœ ๐žข ๐žขž ๐žขŸ ๐žข  ๐žขก ๐žขข ๐žขฃ ๐žขค ๐žขฅ ๐žขฆ ๐žขง ๐žขจ ๐žขฉ ๐žขช ๐žขซ ๐žขฌ ๐žขญ ๐žขฎ ๐žขฏ ๐žขฐ ๐žขฑ ๐žขฒ ๐žขณ ๐žขด ๐žขต ๐žขถ ๐žขท ๐žขธ ๐žขน ๐žขบ ๐žขป ๐žขผ ๐žขฝ ๐žขพ ๐žขฟ ๐žฃ€ ๐žฃ ๐žฃ‚ ๐žฃƒ ๐žฃ„ ๐žฃ… ๐žฃ† ๐žฃ‡ ๐žฃˆ ๐žฃ‰ ๐žฃŠ ๐žฃ‹ ๐žฃŒ ๐žฃ ๐žฃŽ ๐žฃ ๐žฃ ๐žฃ‘ ๐žฃ’ ๐žฃ“ ๐žฃ” ๐žฃ• ๐žฃ– ๐žฃ— ๐žฃ˜ ๐žฃ™ ๐žฃš ๐žฃ› ๐žฃœ ๐žฃ ๐žฃž ๐žฃŸ +๐ž € ๐ž  ๐ž ‚ ๐ž ƒ ๐ž „ ๐ž … ๐ž † ๐ž ‡ ๐ž ˆ ๐ž ‰ ๐ž Š ๐ž ‹ ๐ž Œ ๐ž  ๐ž Ž ๐ž  ๐ž  ๐ž ‘ ๐ž ’ ๐ž “ ๐ž ” ๐ž • ๐ž – ๐ž — ๐ž ˜ ๐ž ™ ๐ž š ๐ž › ๐ž œ ๐ž  ๐ž ž ๐ž Ÿ ๐ž   ๐ž ก ๐ž ข ๐ž ฃ ๐ž ค ๐ž ฅ ๐ž ฆ ๐ž ง ๐ž จ ๐ž ฉ ๐ž ช ๐ž ซ ๐ž ฌ ๐ž ญ ๐ž ฎ ๐ž ฏ ๐ž ฐ ๐ž ฑ ๐ž ฒ ๐ž ณ ๐ž ด ๐ž ต ๐ž ถ ๐ž ท ๐ž ธ ๐ž น ๐ž บ ๐ž ป ๐ž ผ ๐ž ฝ ๐ž พ ๐ž ฟ ๐žก€ ๐žก ๐žก‚ ๐žกƒ ๐žก„ ๐žก… ๐žก† ๐žก‡ ๐žกˆ ๐žก‰ ๐žกŠ ๐žก‹ ๐žกŒ ๐žก ๐žกŽ ๐žก ๐žก ๐žก‘ ๐žก’ ๐žก“ ๐žก” ๐žก• ๐žก– ๐žก— ๐žก˜ ๐žก™ ๐žกš ๐žก› ๐žกœ ๐žก ๐žกž ๐žกŸ ๐žก  ๐žกก ๐žกข ๐žกฃ ๐žกค ๐žกฅ ๐žกฆ ๐žกง ๐žกจ ๐žกฉ ๐žกช ๐žกซ ๐žกฌ ๐žกญ ๐žกฎ ๐žกฏ ๐žกฐ ๐žกฑ ๐žกฒ ๐žกณ ๐žกด ๐žกต ๐žกถ ๐žกท ๐žกธ ๐žกน ๐žกบ ๐žกป ๐žกผ ๐žกฝ ๐žกพ ๐žกฟ ๐žข€ ๐žข ๐žข‚ ๐žขƒ ๐žข„ ๐žข… ๐žข† ๐žข‡ ๐žขˆ ๐žข‰ ๐žขŠ ๐žข‹ ๐žขŒ ๐žข ๐žขŽ ๐žข ๐žข ๐žข‘ ๐žข’ ๐žข“ ๐žข” ๐žข• ๐žข– ๐žข— ๐žข˜ ๐žข™ ๐žขš ๐žข› ๐žขœ ๐žข ๐žขž ๐žขŸ ๐žข  ๐žขก ๐žขข ๐žขฃ ๐žขค ๐žขฅ ๐žขฆ ๐žขง ๐žขจ ๐žขฉ ๐žขช ๐žขซ ๐žขฌ ๐žขญ ๐žขฎ ๐žขฏ ๐žขฐ ๐žขฑ ๐žขฒ ๐žขณ ๐žขด ๐žขต ๐žขถ ๐žขท ๐žขธ ๐žขน ๐žขบ ๐žขป ๐žขผ ๐žขฝ ๐žขพ ๐žขฟ ๐žฃ€ ๐žฃ ๐žฃ‚ ๐žฃƒ ๐žฃ„ ๐žฃ‡ ๐žฃˆ ๐žฃ‰ ๐žฃŠ ๐žฃ‹ ๐žฃŒ ๐žฃ ๐žฃŽ ๐žฃ ๐žฃ ๐žฃ‘ ๐žฃ’ ๐žฃ“ ๐žฃ” ๐žฃ• ๐žฃ– diff --git a/sample_texts/und-Modi_chars.txt b/sample_texts/und-Modi_chars.txt index f747a4ff..58460e18 100644 --- a/sample_texts/und-Modi_chars.txt +++ b/sample_texts/und-Modi_chars.txt @@ -1 +1 @@ -๐‘˜€ ๐‘˜ ๐‘˜‚ ๐‘˜ƒ ๐‘˜„ ๐‘˜… ๐‘˜† ๐‘˜‡ ๐‘˜ˆ ๐‘˜‰ ๐‘˜Š ๐‘˜‹ ๐‘˜Œ ๐‘˜ ๐‘˜Ž ๐‘˜ ๐‘˜ ๐‘˜‘ ๐‘˜’ ๐‘˜“ ๐‘˜” ๐‘˜• ๐‘˜– ๐‘˜— ๐‘˜˜ ๐‘˜™ ๐‘˜š ๐‘˜› ๐‘˜œ ๐‘˜ ๐‘˜ž ๐‘˜Ÿ ๐‘˜  ๐‘˜ก ๐‘˜ข ๐‘˜ฃ ๐‘˜ค ๐‘˜ฅ ๐‘˜ฆ ๐‘˜ง ๐‘˜จ ๐‘˜ฉ ๐‘˜ช ๐‘˜ซ ๐‘˜ฌ ๐‘˜ญ ๐‘˜ฎ ๐‘˜ฏ ๐‘˜ฐ ๐‘˜ฑ ๐‘˜ฒ ๐‘˜ณ ๐‘˜ด ๐‘˜ต ๐‘˜ถ ๐‘˜ท ๐‘˜ธ ๐‘˜น ๐‘˜บ ๐‘˜ป ๐‘˜ผ ๐‘˜ฝ ๐‘˜พ ๐‘˜ฟ ๐‘™€ ๐‘™ ๐‘™‚ ๐‘™ƒ ๐‘™„ ๐‘™… ๐‘™† ๐‘™‡ ๐‘™ˆ ๐‘™‰ ๐‘™Š ๐‘™‹ ๐‘™Œ ๐‘™ ๐‘™Ž ๐‘™ ๐‘™ ๐‘™‘ ๐‘™’ ๐‘™“ ๐‘™” ๐‘™• ๐‘™– ๐‘™— ๐‘™˜ ๐‘™™ ๐‘™š ๐‘™› ๐‘™œ ๐‘™ ๐‘™ž ๐‘™Ÿ +๐‘˜€ ๐‘˜ ๐‘˜‚ ๐‘˜ƒ ๐‘˜„ ๐‘˜… ๐‘˜† ๐‘˜‡ ๐‘˜ˆ ๐‘˜‰ ๐‘˜Š ๐‘˜‹ ๐‘˜Œ ๐‘˜ ๐‘˜Ž ๐‘˜ ๐‘˜ ๐‘˜‘ ๐‘˜’ ๐‘˜“ ๐‘˜” ๐‘˜• ๐‘˜– ๐‘˜— ๐‘˜˜ ๐‘˜™ ๐‘˜š ๐‘˜› ๐‘˜œ ๐‘˜ ๐‘˜ž ๐‘˜Ÿ ๐‘˜  ๐‘˜ก ๐‘˜ข ๐‘˜ฃ ๐‘˜ค ๐‘˜ฅ ๐‘˜ฆ ๐‘˜ง ๐‘˜จ ๐‘˜ฉ ๐‘˜ช ๐‘˜ซ ๐‘˜ฌ ๐‘˜ญ ๐‘˜ฎ ๐‘˜ฏ ๐‘˜ฐ ๐‘˜ฑ ๐‘˜ฒ ๐‘˜ณ ๐‘˜ด ๐‘˜ต ๐‘˜ถ ๐‘˜ท ๐‘˜ธ ๐‘˜น ๐‘˜บ ๐‘˜ป ๐‘˜ผ ๐‘˜ฝ ๐‘˜พ ๐‘˜ฟ ๐‘™€ ๐‘™ ๐‘™‚ ๐‘™ƒ ๐‘™„ ๐‘™ ๐‘™‘ ๐‘™’ ๐‘™“ ๐‘™” ๐‘™• ๐‘™– ๐‘™— ๐‘™˜ ๐‘™™ diff --git a/sample_texts/und-Mroo_chars.txt b/sample_texts/und-Mroo_chars.txt index 82d19023..dc5871a9 100644 --- a/sample_texts/und-Mroo_chars.txt +++ b/sample_texts/und-Mroo_chars.txt @@ -1 +1 @@ -๐–ฉ€ ๐–ฉ ๐–ฉ‚ ๐–ฉƒ ๐–ฉ„ ๐–ฉ… ๐–ฉ† ๐–ฉ‡ ๐–ฉˆ ๐–ฉ‰ ๐–ฉŠ ๐–ฉ‹ ๐–ฉŒ ๐–ฉ ๐–ฉŽ ๐–ฉ ๐–ฉ ๐–ฉ‘ ๐–ฉ’ ๐–ฉ“ ๐–ฉ” ๐–ฉ• ๐–ฉ– ๐–ฉ— ๐–ฉ˜ ๐–ฉ™ ๐–ฉš ๐–ฉ› ๐–ฉœ ๐–ฉ ๐–ฉž ๐–ฉŸ ๐–ฉ  ๐–ฉก ๐–ฉข ๐–ฉฃ ๐–ฉค ๐–ฉฅ ๐–ฉฆ ๐–ฉง ๐–ฉจ ๐–ฉฉ ๐–ฉช ๐–ฉซ ๐–ฉฌ ๐–ฉญ ๐–ฉฎ ๐–ฉฏ +๐–ฉ€ ๐–ฉ ๐–ฉ‚ ๐–ฉƒ ๐–ฉ„ ๐–ฉ… ๐–ฉ† ๐–ฉ‡ ๐–ฉˆ ๐–ฉ‰ ๐–ฉŠ ๐–ฉ‹ ๐–ฉŒ ๐–ฉ ๐–ฉŽ ๐–ฉ ๐–ฉ ๐–ฉ‘ ๐–ฉ’ ๐–ฉ“ ๐–ฉ” ๐–ฉ• ๐–ฉ– ๐–ฉ— ๐–ฉ˜ ๐–ฉ™ ๐–ฉš ๐–ฉ› ๐–ฉœ ๐–ฉ ๐–ฉž ๐–ฉ  ๐–ฉก ๐–ฉข ๐–ฉฃ ๐–ฉค ๐–ฉฅ ๐–ฉฆ ๐–ฉง ๐–ฉจ ๐–ฉฉ diff --git a/sample_texts/und-Mult_chars.txt b/sample_texts/und-Mult_chars.txt index 7d4619f4..8e06902b 100644 --- a/sample_texts/und-Mult_chars.txt +++ b/sample_texts/und-Mult_chars.txt @@ -1 +1 @@ -๐‘Š€ ๐‘Š ๐‘Š‚ ๐‘Šƒ ๐‘Š„ ๐‘Š… ๐‘Š† ๐‘Š‡ ๐‘Šˆ ๐‘Š‰ ๐‘ŠŠ ๐‘Š‹ ๐‘ŠŒ ๐‘Š ๐‘ŠŽ ๐‘Š ๐‘Š ๐‘Š‘ ๐‘Š’ ๐‘Š“ ๐‘Š” ๐‘Š• ๐‘Š– ๐‘Š— ๐‘Š˜ ๐‘Š™ ๐‘Šš ๐‘Š› ๐‘Šœ ๐‘Š ๐‘Šž ๐‘ŠŸ ๐‘Š  ๐‘Šก ๐‘Šข ๐‘Šฃ ๐‘Šค ๐‘Šฅ ๐‘Šฆ ๐‘Šง ๐‘Šจ ๐‘Šฉ ๐‘Šช ๐‘Šซ ๐‘Šฌ ๐‘Šญ ๐‘Šฎ ๐‘Šฏ +๐‘Š€ ๐‘Š ๐‘Š‚ ๐‘Šƒ ๐‘Š„ ๐‘Š… ๐‘Š† ๐‘Šˆ ๐‘ŠŠ ๐‘Š‹ ๐‘ŠŒ ๐‘Š ๐‘Š ๐‘Š ๐‘Š‘ ๐‘Š’ ๐‘Š“ ๐‘Š” ๐‘Š• ๐‘Š– ๐‘Š— ๐‘Š˜ ๐‘Š™ ๐‘Šš ๐‘Š› ๐‘Šœ ๐‘Š ๐‘ŠŸ ๐‘Š  ๐‘Šก ๐‘Šข ๐‘Šฃ ๐‘Šค ๐‘Šฅ ๐‘Šฆ ๐‘Šง ๐‘Šจ ๐‘Šฉ diff --git a/sample_texts/und-Nbat_chars.txt b/sample_texts/und-Nbat_chars.txt index f917d99a..3c222831 100644 --- a/sample_texts/und-Nbat_chars.txt +++ b/sample_texts/und-Nbat_chars.txt @@ -1 +1 @@ -๐ข€ ๐ข ๐ข‚ ๐ขƒ ๐ข„ ๐ข… ๐ข† ๐ข‡ ๐ขˆ ๐ข‰ ๐ขŠ ๐ข‹ ๐ขŒ ๐ข ๐ขŽ ๐ข ๐ข ๐ข‘ ๐ข’ ๐ข“ ๐ข” ๐ข• ๐ข– ๐ข— ๐ข˜ ๐ข™ ๐ขš ๐ข› ๐ขœ ๐ข ๐ขž ๐ขŸ ๐ข  ๐ขก ๐ขข ๐ขฃ ๐ขค ๐ขฅ ๐ขฆ ๐ขง ๐ขจ ๐ขฉ ๐ขช ๐ขซ ๐ขฌ ๐ขญ ๐ขฎ ๐ขฏ +๐ข€ ๐ข ๐ข‚ ๐ขƒ ๐ข„ ๐ข… ๐ข† ๐ข‡ ๐ขˆ ๐ข‰ ๐ขŠ ๐ข‹ ๐ขŒ ๐ข ๐ขŽ ๐ข ๐ข ๐ข‘ ๐ข’ ๐ข“ ๐ข” ๐ข• ๐ข– ๐ข— ๐ข˜ ๐ข™ ๐ขš ๐ข› ๐ขœ ๐ข ๐ขž ๐ขง ๐ขจ ๐ขฉ ๐ขช ๐ขซ ๐ขฌ ๐ขญ ๐ขฎ ๐ขฏ diff --git a/sample_texts/und-Newa_chars.txt b/sample_texts/und-Newa_chars.txt index e81353fa..f2af0f94 100644 --- a/sample_texts/und-Newa_chars.txt +++ b/sample_texts/und-Newa_chars.txt @@ -1 +1 @@ -๐‘€ ๐‘ ๐‘‚ ๐‘ƒ ๐‘„ ๐‘… ๐‘† ๐‘‡ ๐‘ˆ ๐‘‰ ๐‘Š ๐‘‹ ๐‘Œ ๐‘ ๐‘Ž ๐‘ ๐‘ ๐‘‘ ๐‘’ ๐‘“ ๐‘” ๐‘• ๐‘– ๐‘— ๐‘˜ ๐‘™ ๐‘š ๐‘› ๐‘œ ๐‘ ๐‘ž ๐‘Ÿ ๐‘  ๐‘ก ๐‘ข ๐‘ฃ ๐‘ค ๐‘ฅ ๐‘ฆ ๐‘ง ๐‘จ ๐‘ฉ ๐‘ช ๐‘ซ ๐‘ฌ ๐‘ญ ๐‘ฎ ๐‘ฏ ๐‘ฐ ๐‘ฑ ๐‘ฒ ๐‘ณ ๐‘ด ๐‘ต ๐‘ถ ๐‘ท ๐‘ธ ๐‘น ๐‘บ ๐‘ป ๐‘ผ ๐‘ฝ ๐‘พ ๐‘ฟ ๐‘‘€ ๐‘‘ ๐‘‘‚ ๐‘‘ƒ ๐‘‘„ ๐‘‘… ๐‘‘† ๐‘‘‡ ๐‘‘ˆ ๐‘‘‰ ๐‘‘Š ๐‘‘‹ ๐‘‘Œ ๐‘‘ ๐‘‘Ž ๐‘‘ ๐‘‘ ๐‘‘‘ ๐‘‘’ ๐‘‘“ ๐‘‘” ๐‘‘• ๐‘‘– ๐‘‘— ๐‘‘˜ ๐‘‘™ ๐‘‘š ๐‘‘› ๐‘‘œ ๐‘‘ ๐‘‘ž ๐‘‘Ÿ ๐‘‘  ๐‘‘ก ๐‘‘ข ๐‘‘ฃ ๐‘‘ค ๐‘‘ฅ ๐‘‘ฆ ๐‘‘ง ๐‘‘จ ๐‘‘ฉ ๐‘‘ช ๐‘‘ซ ๐‘‘ฌ ๐‘‘ญ ๐‘‘ฎ ๐‘‘ฏ ๐‘‘ฐ ๐‘‘ฑ ๐‘‘ฒ ๐‘‘ณ ๐‘‘ด ๐‘‘ต ๐‘‘ถ ๐‘‘ท ๐‘‘ธ ๐‘‘น ๐‘‘บ ๐‘‘ป ๐‘‘ผ ๐‘‘ฝ ๐‘‘พ ๐‘‘ฟ +๐‘€ ๐‘ ๐‘‚ ๐‘ƒ ๐‘„ ๐‘… ๐‘† ๐‘‡ ๐‘ˆ ๐‘‰ ๐‘Š ๐‘‹ ๐‘Œ ๐‘ ๐‘Ž ๐‘ ๐‘ ๐‘‘ ๐‘’ ๐‘“ ๐‘” ๐‘• ๐‘– ๐‘— ๐‘˜ ๐‘™ ๐‘š ๐‘› ๐‘œ ๐‘ ๐‘ž ๐‘Ÿ ๐‘  ๐‘ก ๐‘ข ๐‘ฃ ๐‘ค ๐‘ฅ ๐‘ฆ ๐‘ง ๐‘จ ๐‘ฉ ๐‘ช ๐‘ซ ๐‘ฌ ๐‘ญ ๐‘ฎ ๐‘ฏ ๐‘ฐ ๐‘ฑ ๐‘ฒ ๐‘ณ ๐‘ด ๐‘ต ๐‘ถ ๐‘ท ๐‘ธ ๐‘น ๐‘บ ๐‘ป ๐‘ผ ๐‘ฝ ๐‘พ ๐‘ฟ ๐‘‘€ ๐‘‘ ๐‘‘‚ ๐‘‘ƒ ๐‘‘„ ๐‘‘… ๐‘‘† ๐‘‘‡ ๐‘‘ˆ ๐‘‘‰ ๐‘‘Š ๐‘‘‹ ๐‘‘Œ ๐‘‘ ๐‘‘Ž ๐‘‘ ๐‘‘ ๐‘‘‘ ๐‘‘’ ๐‘‘“ ๐‘‘” ๐‘‘• ๐‘‘– ๐‘‘— ๐‘‘˜ ๐‘‘™ diff --git a/sample_texts/und-Palm_chars.txt b/sample_texts/und-Palm_chars.txt index 4a5e65e4..57313940 100644 --- a/sample_texts/und-Palm_chars.txt +++ b/sample_texts/und-Palm_chars.txt @@ -1 +1 @@ -๐ŸŒ€ ๐ŸŒ ๐ŸŒ‚ ๐ŸŒƒ ๐ŸŒ„ ๐ŸŒ… ๐ŸŒ† ๐ŸŒ‡ ๐ŸŒˆ ๐ŸŒ‰ ๐ŸŒŠ ๐ŸŒ‹ ๐ŸŒŒ ๐ŸŒ ๐ŸŒŽ ๐ŸŒ ๐ŸŒ ๐ŸŒ‘ ๐ŸŒ’ ๐ŸŒ“ ๐ŸŒ” ๐ŸŒ• ๐ŸŒ– ๐ŸŒ— ๐ŸŒ˜ ๐ŸŒ™ ๐ŸŒš ๐ŸŒ› ๐ŸŒœ ๐ŸŒ ๐ŸŒž ๐ŸŒŸ ๐ŸŒ  ๐ŸŒก ๐ŸŒข ๐ŸŒฃ ๐ŸŒค ๐ŸŒฅ ๐ŸŒฆ ๐ŸŒง ๐ŸŒจ ๐ŸŒฉ ๐ŸŒช ๐ŸŒซ ๐ŸŒฌ ๐ŸŒญ ๐ŸŒฎ ๐ŸŒฏ ๐ŸŒฐ ๐ŸŒฑ ๐ŸŒฒ ๐ŸŒณ ๐ŸŒด ๐ŸŒต ๐ŸŒถ ๐ŸŒท ๐ŸŒธ ๐ŸŒน ๐ŸŒบ ๐ŸŒป ๐ŸŒผ ๐ŸŒฝ ๐ŸŒพ ๐ŸŒฟ ๐Ÿ€ ๐Ÿ ๐Ÿ‚ ๐Ÿƒ ๐Ÿ„ ๐Ÿ… ๐Ÿ† ๐Ÿ‡ ๐Ÿˆ ๐Ÿ‰ ๐ŸŠ ๐Ÿ‹ ๐ŸŒ ๐Ÿ ๐ŸŽ ๐Ÿ ๐Ÿ ๐Ÿ‘ ๐Ÿ’ ๐Ÿ“ ๐Ÿ” ๐Ÿ• ๐Ÿ– ๐Ÿ— ๐Ÿ˜ ๐Ÿ™ ๐Ÿš ๐Ÿ› ๐Ÿœ ๐Ÿ ๐Ÿž ๐ŸŸ ๐Ÿ  ๐Ÿก ๐Ÿข ๐Ÿฃ ๐Ÿค ๐Ÿฅ ๐Ÿฆ ๐Ÿง ๐Ÿจ ๐Ÿฉ ๐Ÿช ๐Ÿซ ๐Ÿฌ ๐Ÿญ ๐Ÿฎ ๐Ÿฏ ๐Ÿฐ ๐Ÿฑ ๐Ÿฒ ๐Ÿณ ๐Ÿด ๐Ÿต ๐Ÿถ ๐Ÿท ๐Ÿธ ๐Ÿน ๐Ÿบ ๐Ÿป ๐Ÿผ ๐Ÿฝ ๐Ÿพ ๐Ÿฟ ๐ŸŽ€ ๐ŸŽ ๐ŸŽ‚ ๐ŸŽƒ ๐ŸŽ„ ๐ŸŽ… ๐ŸŽ† ๐ŸŽ‡ ๐ŸŽˆ ๐ŸŽ‰ ๐ŸŽŠ ๐ŸŽ‹ ๐ŸŽŒ ๐ŸŽ ๐ŸŽŽ ๐ŸŽ ๐ŸŽ ๐ŸŽ‘ ๐ŸŽ’ ๐ŸŽ“ ๐ŸŽ” ๐ŸŽ• ๐ŸŽ– ๐ŸŽ— ๐ŸŽ˜ ๐ŸŽ™ ๐ŸŽš ๐ŸŽ› ๐ŸŽœ ๐ŸŽ ๐ŸŽž ๐ŸŽŸ ๐ŸŽ  ๐ŸŽก ๐ŸŽข ๐ŸŽฃ ๐ŸŽค ๐ŸŽฅ ๐ŸŽฆ ๐ŸŽง ๐ŸŽจ ๐ŸŽฉ ๐ŸŽช ๐ŸŽซ ๐ŸŽฌ ๐ŸŽญ ๐ŸŽฎ ๐ŸŽฏ ๐ŸŽฐ ๐ŸŽฑ ๐ŸŽฒ ๐ŸŽณ ๐ŸŽด ๐ŸŽต ๐ŸŽถ ๐ŸŽท ๐ŸŽธ ๐ŸŽน ๐ŸŽบ ๐ŸŽป ๐ŸŽผ ๐ŸŽฝ ๐ŸŽพ ๐ŸŽฟ ๐Ÿ€ ๐Ÿ ๐Ÿ‚ ๐Ÿƒ ๐Ÿ„ ๐Ÿ… ๐Ÿ† ๐Ÿ‡ ๐Ÿˆ ๐Ÿ‰ ๐ŸŠ ๐Ÿ‹ ๐ŸŒ ๐Ÿ ๐ŸŽ ๐Ÿ ๐Ÿ ๐Ÿ‘ ๐Ÿ’ ๐Ÿ“ ๐Ÿ” ๐Ÿ• ๐Ÿ– ๐Ÿ— ๐Ÿ˜ ๐Ÿ™ ๐Ÿš ๐Ÿ› ๐Ÿœ ๐Ÿ ๐Ÿž ๐ŸŸ ๐Ÿ  ๐Ÿก ๐Ÿข ๐Ÿฃ ๐Ÿค ๐Ÿฅ ๐Ÿฆ ๐Ÿง ๐Ÿจ ๐Ÿฉ ๐Ÿช ๐Ÿซ ๐Ÿฌ ๐Ÿญ ๐Ÿฎ ๐Ÿฏ ๐Ÿฐ ๐Ÿฑ ๐Ÿฒ ๐Ÿณ ๐Ÿด ๐Ÿต ๐Ÿถ ๐Ÿท ๐Ÿธ ๐Ÿน ๐Ÿบ ๐Ÿป ๐Ÿผ ๐Ÿฝ ๐Ÿพ ๐Ÿฟ ๐Ÿ€ ๐Ÿ ๐Ÿ‚ ๐Ÿƒ ๐Ÿ„ ๐Ÿ… ๐Ÿ† ๐Ÿ‡ ๐Ÿˆ ๐Ÿ‰ ๐ŸŠ ๐Ÿ‹ ๐ŸŒ ๐Ÿ ๐ŸŽ ๐Ÿ ๐Ÿ ๐Ÿ‘ ๐Ÿ’ ๐Ÿ“ ๐Ÿ” ๐Ÿ• ๐Ÿ– ๐Ÿ— ๐Ÿ˜ ๐Ÿ™ ๐Ÿš ๐Ÿ› ๐Ÿœ ๐Ÿ ๐Ÿž ๐ŸŸ ๐Ÿ  ๐Ÿก ๐Ÿข ๐Ÿฃ ๐Ÿค ๐Ÿฅ ๐Ÿฆ ๐Ÿง ๐Ÿจ ๐Ÿฉ ๐Ÿช ๐Ÿซ ๐Ÿฌ ๐Ÿญ ๐Ÿฎ ๐Ÿฏ ๐Ÿฐ ๐Ÿฑ ๐Ÿฒ ๐Ÿณ ๐Ÿด ๐Ÿต ๐Ÿถ ๐Ÿท ๐Ÿธ ๐Ÿน ๐Ÿบ ๐Ÿป ๐Ÿผ ๐Ÿฝ ๐Ÿพ ๐Ÿฟ ๐Ÿ‘€ ๐Ÿ‘ ๐Ÿ‘‚ ๐Ÿ‘ƒ ๐Ÿ‘„ ๐Ÿ‘… ๐Ÿ‘† ๐Ÿ‘‡ ๐Ÿ‘ˆ ๐Ÿ‘‰ ๐Ÿ‘Š ๐Ÿ‘‹ ๐Ÿ‘Œ ๐Ÿ‘ ๐Ÿ‘Ž ๐Ÿ‘ ๐Ÿ‘ ๐Ÿ‘‘ ๐Ÿ‘’ ๐Ÿ‘“ ๐Ÿ‘” ๐Ÿ‘• ๐Ÿ‘– ๐Ÿ‘— ๐Ÿ‘˜ ๐Ÿ‘™ ๐Ÿ‘š ๐Ÿ‘› ๐Ÿ‘œ ๐Ÿ‘ ๐Ÿ‘ž ๐Ÿ‘Ÿ ๐Ÿ‘  ๐Ÿ‘ก ๐Ÿ‘ข ๐Ÿ‘ฃ ๐Ÿ‘ค ๐Ÿ‘ฅ ๐Ÿ‘ฆ ๐Ÿ‘ง ๐Ÿ‘จ ๐Ÿ‘ฉ ๐Ÿ‘ช ๐Ÿ‘ซ ๐Ÿ‘ฌ ๐Ÿ‘ญ ๐Ÿ‘ฎ ๐Ÿ‘ฏ ๐Ÿ‘ฐ ๐Ÿ‘ฑ ๐Ÿ‘ฒ ๐Ÿ‘ณ ๐Ÿ‘ด ๐Ÿ‘ต ๐Ÿ‘ถ ๐Ÿ‘ท ๐Ÿ‘ธ ๐Ÿ‘น ๐Ÿ‘บ ๐Ÿ‘ป ๐Ÿ‘ผ ๐Ÿ‘ฝ ๐Ÿ‘พ ๐Ÿ‘ฟ ๐Ÿ’€ ๐Ÿ’ ๐Ÿ’‚ ๐Ÿ’ƒ ๐Ÿ’„ ๐Ÿ’… ๐Ÿ’† ๐Ÿ’‡ ๐Ÿ’ˆ ๐Ÿ’‰ ๐Ÿ’Š ๐Ÿ’‹ ๐Ÿ’Œ ๐Ÿ’ ๐Ÿ’Ž ๐Ÿ’ ๐Ÿ’ ๐Ÿ’‘ ๐Ÿ’’ ๐Ÿ’“ ๐Ÿ’” ๐Ÿ’• ๐Ÿ’– ๐Ÿ’— ๐Ÿ’˜ ๐Ÿ’™ ๐Ÿ’š ๐Ÿ’› ๐Ÿ’œ ๐Ÿ’ ๐Ÿ’ž ๐Ÿ’Ÿ ๐Ÿ’  ๐Ÿ’ก ๐Ÿ’ข ๐Ÿ’ฃ ๐Ÿ’ค ๐Ÿ’ฅ ๐Ÿ’ฆ ๐Ÿ’ง ๐Ÿ’จ ๐Ÿ’ฉ ๐Ÿ’ช ๐Ÿ’ซ ๐Ÿ’ฌ ๐Ÿ’ญ ๐Ÿ’ฎ ๐Ÿ’ฏ ๐Ÿ’ฐ ๐Ÿ’ฑ ๐Ÿ’ฒ ๐Ÿ’ณ ๐Ÿ’ด ๐Ÿ’ต ๐Ÿ’ถ ๐Ÿ’ท ๐Ÿ’ธ ๐Ÿ’น ๐Ÿ’บ ๐Ÿ’ป ๐Ÿ’ผ ๐Ÿ’ฝ ๐Ÿ’พ ๐Ÿ’ฟ ๐Ÿ“€ ๐Ÿ“ ๐Ÿ“‚ ๐Ÿ“ƒ ๐Ÿ“„ ๐Ÿ“… ๐Ÿ“† ๐Ÿ“‡ ๐Ÿ“ˆ ๐Ÿ“‰ ๐Ÿ“Š ๐Ÿ“‹ ๐Ÿ“Œ ๐Ÿ“ ๐Ÿ“Ž ๐Ÿ“ ๐Ÿ“ ๐Ÿ“‘ ๐Ÿ“’ ๐Ÿ““ ๐Ÿ“” ๐Ÿ“• ๐Ÿ“– ๐Ÿ“— ๐Ÿ“˜ ๐Ÿ“™ ๐Ÿ“š ๐Ÿ“› ๐Ÿ“œ ๐Ÿ“ ๐Ÿ“ž ๐Ÿ“Ÿ ๐Ÿ“  ๐Ÿ“ก ๐Ÿ“ข ๐Ÿ“ฃ ๐Ÿ“ค ๐Ÿ“ฅ ๐Ÿ“ฆ ๐Ÿ“ง ๐Ÿ“จ ๐Ÿ“ฉ ๐Ÿ“ช ๐Ÿ“ซ ๐Ÿ“ฌ ๐Ÿ“ญ ๐Ÿ“ฎ ๐Ÿ“ฏ ๐Ÿ“ฐ ๐Ÿ“ฑ ๐Ÿ“ฒ ๐Ÿ“ณ ๐Ÿ“ด ๐Ÿ“ต ๐Ÿ“ถ ๐Ÿ“ท ๐Ÿ“ธ ๐Ÿ“น ๐Ÿ“บ ๐Ÿ“ป ๐Ÿ“ผ ๐Ÿ“ฝ ๐Ÿ“พ ๐Ÿ“ฟ ๐Ÿ”€ ๐Ÿ” ๐Ÿ”‚ ๐Ÿ”ƒ ๐Ÿ”„ ๐Ÿ”… ๐Ÿ”† ๐Ÿ”‡ ๐Ÿ”ˆ ๐Ÿ”‰ ๐Ÿ”Š ๐Ÿ”‹ ๐Ÿ”Œ ๐Ÿ” ๐Ÿ”Ž ๐Ÿ” ๐Ÿ” ๐Ÿ”‘ ๐Ÿ”’ ๐Ÿ”“ ๐Ÿ”” ๐Ÿ”• ๐Ÿ”– ๐Ÿ”— ๐Ÿ”˜ ๐Ÿ”™ ๐Ÿ”š ๐Ÿ”› ๐Ÿ”œ ๐Ÿ” ๐Ÿ”ž ๐Ÿ”Ÿ ๐Ÿ”  ๐Ÿ”ก ๐Ÿ”ข ๐Ÿ”ฃ ๐Ÿ”ค ๐Ÿ”ฅ ๐Ÿ”ฆ ๐Ÿ”ง ๐Ÿ”จ ๐Ÿ”ฉ ๐Ÿ”ช ๐Ÿ”ซ ๐Ÿ”ฌ ๐Ÿ”ญ ๐Ÿ”ฎ ๐Ÿ”ฏ ๐Ÿ”ฐ ๐Ÿ”ฑ ๐Ÿ”ฒ ๐Ÿ”ณ ๐Ÿ”ด ๐Ÿ”ต ๐Ÿ”ถ ๐Ÿ”ท ๐Ÿ”ธ ๐Ÿ”น ๐Ÿ”บ ๐Ÿ”ป ๐Ÿ”ผ ๐Ÿ”ฝ ๐Ÿ”พ ๐Ÿ”ฟ ๐Ÿ•€ ๐Ÿ• ๐Ÿ•‚ ๐Ÿ•ƒ ๐Ÿ•„ ๐Ÿ•… ๐Ÿ•† ๐Ÿ•‡ ๐Ÿ•ˆ ๐Ÿ•‰ ๐Ÿ•Š ๐Ÿ•‹ ๐Ÿ•Œ ๐Ÿ• ๐Ÿ•Ž ๐Ÿ• ๐Ÿ• ๐Ÿ•‘ ๐Ÿ•’ ๐Ÿ•“ ๐Ÿ•” ๐Ÿ•• ๐Ÿ•– ๐Ÿ•— ๐Ÿ•˜ ๐Ÿ•™ ๐Ÿ•š ๐Ÿ•› ๐Ÿ•œ ๐Ÿ• ๐Ÿ•ž ๐Ÿ•Ÿ ๐Ÿ•  ๐Ÿ•ก ๐Ÿ•ข ๐Ÿ•ฃ ๐Ÿ•ค ๐Ÿ•ฅ ๐Ÿ•ฆ ๐Ÿ•ง ๐Ÿ•จ ๐Ÿ•ฉ ๐Ÿ•ช ๐Ÿ•ซ ๐Ÿ•ฌ ๐Ÿ•ญ ๐Ÿ•ฎ ๐Ÿ•ฏ ๐Ÿ•ฐ ๐Ÿ•ฑ ๐Ÿ•ฒ ๐Ÿ•ณ ๐Ÿ•ด ๐Ÿ•ต ๐Ÿ•ถ ๐Ÿ•ท ๐Ÿ•ธ ๐Ÿ•น ๐Ÿ•บ ๐Ÿ•ป ๐Ÿ•ผ ๐Ÿ•ฝ ๐Ÿ•พ ๐Ÿ•ฟ ๐Ÿ–€ ๐Ÿ– ๐Ÿ–‚ ๐Ÿ–ƒ ๐Ÿ–„ ๐Ÿ–… ๐Ÿ–† ๐Ÿ–‡ ๐Ÿ–ˆ ๐Ÿ–‰ ๐Ÿ–Š ๐Ÿ–‹ ๐Ÿ–Œ ๐Ÿ– ๐Ÿ–Ž ๐Ÿ– ๐Ÿ– ๐Ÿ–‘ ๐Ÿ–’ ๐Ÿ–“ ๐Ÿ–” ๐Ÿ–• ๐Ÿ–– ๐Ÿ–— ๐Ÿ–˜ ๐Ÿ–™ ๐Ÿ–š ๐Ÿ–› ๐Ÿ–œ ๐Ÿ– ๐Ÿ–ž ๐Ÿ–Ÿ ๐Ÿ–  ๐Ÿ–ก ๐Ÿ–ข ๐Ÿ–ฃ ๐Ÿ–ค ๐Ÿ–ฅ ๐Ÿ–ฆ ๐Ÿ–ง ๐Ÿ–จ ๐Ÿ–ฉ ๐Ÿ–ช ๐Ÿ–ซ ๐Ÿ–ฌ ๐Ÿ–ญ ๐Ÿ–ฎ ๐Ÿ–ฏ ๐Ÿ–ฐ ๐Ÿ–ฑ ๐Ÿ–ฒ ๐Ÿ–ณ ๐Ÿ–ด ๐Ÿ–ต ๐Ÿ–ถ ๐Ÿ–ท ๐Ÿ–ธ ๐Ÿ–น ๐Ÿ–บ ๐Ÿ–ป ๐Ÿ–ผ ๐Ÿ–ฝ ๐Ÿ–พ ๐Ÿ–ฟ ๐Ÿ—€ ๐Ÿ— ๐Ÿ—‚ ๐Ÿ—ƒ ๐Ÿ—„ ๐Ÿ—… ๐Ÿ—† ๐Ÿ—‡ ๐Ÿ—ˆ ๐Ÿ—‰ ๐Ÿ—Š ๐Ÿ—‹ ๐Ÿ—Œ ๐Ÿ— ๐Ÿ—Ž ๐Ÿ— ๐Ÿ— ๐Ÿ—‘ ๐Ÿ—’ ๐Ÿ—“ ๐Ÿ—” ๐Ÿ—• ๐Ÿ—– ๐Ÿ—— ๐Ÿ—˜ ๐Ÿ—™ ๐Ÿ—š ๐Ÿ—› ๐Ÿ—œ ๐Ÿ— ๐Ÿ—ž ๐Ÿ—Ÿ ๐Ÿ—  ๐Ÿ—ก ๐Ÿ—ข ๐Ÿ—ฃ ๐Ÿ—ค ๐Ÿ—ฅ ๐Ÿ—ฆ ๐Ÿ—ง ๐Ÿ—จ ๐Ÿ—ฉ ๐Ÿ—ช ๐Ÿ—ซ ๐Ÿ—ฌ ๐Ÿ—ญ ๐Ÿ—ฎ ๐Ÿ—ฏ ๐Ÿ—ฐ ๐Ÿ—ฑ ๐Ÿ—ฒ ๐Ÿ—ณ ๐Ÿ—ด ๐Ÿ—ต ๐Ÿ—ถ ๐Ÿ—ท ๐Ÿ—ธ ๐Ÿ—น ๐Ÿ—บ ๐Ÿ—ป ๐Ÿ—ผ ๐Ÿ—ฝ ๐Ÿ—พ ๐Ÿ—ฟ +๐ก  ๐กก ๐กข ๐กฃ ๐กค ๐กฅ ๐กฆ ๐กง ๐กจ ๐กฉ ๐กช ๐กซ ๐กฌ ๐กญ ๐กฎ ๐กฏ ๐กฐ ๐กฑ ๐กฒ ๐กณ ๐กด ๐กต ๐กถ ๐กท ๐กธ ๐กน ๐กบ ๐กป ๐กผ ๐กฝ ๐กพ ๐กฟ diff --git a/sample_texts/und-Pauc_chars.txt b/sample_texts/und-Pauc_chars.txt index 3d7ee67b..b9e760a2 100644 --- a/sample_texts/und-Pauc_chars.txt +++ b/sample_texts/und-Pauc_chars.txt @@ -1 +1 @@ -๐‘ซ€ ๐‘ซ ๐‘ซ‚ ๐‘ซƒ ๐‘ซ„ ๐‘ซ… ๐‘ซ† ๐‘ซ‡ ๐‘ซˆ ๐‘ซ‰ ๐‘ซŠ ๐‘ซ‹ ๐‘ซŒ ๐‘ซ ๐‘ซŽ ๐‘ซ ๐‘ซ ๐‘ซ‘ ๐‘ซ’ ๐‘ซ“ ๐‘ซ” ๐‘ซ• ๐‘ซ– ๐‘ซ— ๐‘ซ˜ ๐‘ซ™ ๐‘ซš ๐‘ซ› ๐‘ซœ ๐‘ซ ๐‘ซž ๐‘ซŸ ๐‘ซ  ๐‘ซก ๐‘ซข ๐‘ซฃ ๐‘ซค ๐‘ซฅ ๐‘ซฆ ๐‘ซง ๐‘ซจ ๐‘ซฉ ๐‘ซช ๐‘ซซ ๐‘ซฌ ๐‘ซญ ๐‘ซฎ ๐‘ซฏ ๐‘ซฐ ๐‘ซฑ ๐‘ซฒ ๐‘ซณ ๐‘ซด ๐‘ซต ๐‘ซถ ๐‘ซท ๐‘ซธ ๐‘ซน ๐‘ซบ ๐‘ซป ๐‘ซผ ๐‘ซฝ ๐‘ซพ ๐‘ซฟ +๐‘ซ€ ๐‘ซ ๐‘ซ‚ ๐‘ซƒ ๐‘ซ„ ๐‘ซ… ๐‘ซ† ๐‘ซ‡ ๐‘ซˆ ๐‘ซ‰ ๐‘ซŠ ๐‘ซ‹ ๐‘ซŒ ๐‘ซ ๐‘ซŽ ๐‘ซ ๐‘ซ ๐‘ซ‘ ๐‘ซ’ ๐‘ซ“ ๐‘ซ” ๐‘ซ• ๐‘ซ– ๐‘ซ— ๐‘ซ˜ ๐‘ซ™ ๐‘ซš ๐‘ซ› ๐‘ซœ ๐‘ซ ๐‘ซž ๐‘ซŸ ๐‘ซ  ๐‘ซก ๐‘ซข ๐‘ซฃ ๐‘ซค ๐‘ซฅ ๐‘ซฆ ๐‘ซง ๐‘ซจ ๐‘ซฉ ๐‘ซช ๐‘ซซ ๐‘ซฌ ๐‘ซญ ๐‘ซฎ ๐‘ซฏ ๐‘ซฐ ๐‘ซฑ ๐‘ซฒ ๐‘ซณ ๐‘ซด ๐‘ซต ๐‘ซถ ๐‘ซท ๐‘ซธ diff --git a/sample_texts/und-Perm_chars.txt b/sample_texts/und-Perm_chars.txt index ae7ea02d..a7d37bbe 100644 --- a/sample_texts/und-Perm_chars.txt +++ b/sample_texts/und-Perm_chars.txt @@ -1 +1 @@ -๐ ๐‘ ๐’ ๐“ ๐” ๐• ๐– ๐— ๐˜ ๐™ ๐š ๐› ๐œ ๐ ๐ž ๐Ÿ ๐  ๐ก ๐ข ๐ฃ ๐ค ๐ฅ ๐ฆ ๐ง ๐จ ๐ฉ ๐ช ๐ซ ๐ฌ ๐ญ ๐ฎ ๐ฏ ๐ฐ ๐ฑ ๐ฒ ๐ณ ๐ด ๐ต ๐ถ ๐ท ๐ธ ๐น ๐บ ๐ป ๐ผ ๐ฝ ๐พ ๐ฟ +๐ ๐‘ ๐’ ๐“ ๐” ๐• ๐– ๐— ๐˜ ๐™ ๐š ๐› ๐œ ๐ ๐ž ๐Ÿ ๐  ๐ก ๐ข ๐ฃ ๐ค ๐ฅ ๐ฆ ๐ง ๐จ ๐ฉ ๐ช ๐ซ ๐ฌ ๐ญ ๐ฎ ๐ฏ ๐ฐ ๐ฑ ๐ฒ ๐ณ ๐ด ๐ต ๐ถ ๐ท ๐ธ ๐น ๐บ diff --git a/sample_texts/und-Phlp_chars.txt b/sample_texts/und-Phlp_chars.txt index d3dfce21..27bd9d8f 100644 --- a/sample_texts/und-Phlp_chars.txt +++ b/sample_texts/und-Phlp_chars.txt @@ -1 +1 @@ -๐ฎ€ ๐ฎ ๐ฎ‚ ๐ฎƒ ๐ฎ„ ๐ฎ… ๐ฎ† ๐ฎ‡ ๐ฎˆ ๐ฎ‰ ๐ฎŠ ๐ฎ‹ ๐ฎŒ ๐ฎ ๐ฎŽ ๐ฎ ๐ฎ ๐ฎ‘ ๐ฎ’ ๐ฎ“ ๐ฎ” ๐ฎ• ๐ฎ– ๐ฎ— ๐ฎ˜ ๐ฎ™ ๐ฎš ๐ฎ› ๐ฎœ ๐ฎ ๐ฎž ๐ฎŸ ๐ฎ  ๐ฎก ๐ฎข ๐ฎฃ ๐ฎค ๐ฎฅ ๐ฎฆ ๐ฎง ๐ฎจ ๐ฎฉ ๐ฎช ๐ฎซ ๐ฎฌ ๐ฎญ ๐ฎฎ ๐ฎฏ +๐ฎ€ ๐ฎ ๐ฎ‚ ๐ฎƒ ๐ฎ„ ๐ฎ… ๐ฎ† ๐ฎ‡ ๐ฎˆ ๐ฎ‰ ๐ฎŠ ๐ฎ‹ ๐ฎŒ ๐ฎ ๐ฎŽ ๐ฎ ๐ฎ ๐ฎ‘ ๐ฎฉ ๐ฎช ๐ฎซ ๐ฎฌ ๐ฎญ ๐ฎฎ ๐ฎฏ diff --git a/sample_texts/und-Plrd_chars.txt b/sample_texts/und-Plrd_chars.txt index 5d565877..fef96ab4 100644 --- a/sample_texts/und-Plrd_chars.txt +++ b/sample_texts/und-Plrd_chars.txt @@ -1 +1 @@ -๐–ผ€ ๐–ผ ๐–ผ‚ ๐–ผƒ ๐–ผ„ ๐–ผ… ๐–ผ† ๐–ผ‡ ๐–ผˆ ๐–ผ‰ ๐–ผŠ ๐–ผ‹ ๐–ผŒ ๐–ผ ๐–ผŽ ๐–ผ ๐–ผ ๐–ผ‘ ๐–ผ’ ๐–ผ“ ๐–ผ” ๐–ผ• ๐–ผ– ๐–ผ— ๐–ผ˜ ๐–ผ™ ๐–ผš ๐–ผ› ๐–ผœ ๐–ผ ๐–ผž ๐–ผŸ ๐–ผ  ๐–ผก ๐–ผข ๐–ผฃ ๐–ผค ๐–ผฅ ๐–ผฆ ๐–ผง ๐–ผจ ๐–ผฉ ๐–ผช ๐–ผซ ๐–ผฌ ๐–ผญ ๐–ผฎ ๐–ผฏ ๐–ผฐ ๐–ผฑ ๐–ผฒ ๐–ผณ ๐–ผด ๐–ผต ๐–ผถ ๐–ผท ๐–ผธ ๐–ผน ๐–ผบ ๐–ผป ๐–ผผ ๐–ผฝ ๐–ผพ ๐–ผฟ ๐–ฝ€ ๐–ฝ ๐–ฝ‚ ๐–ฝƒ ๐–ฝ„ ๐–ฝ… ๐–ฝ† ๐–ฝ‡ ๐–ฝˆ ๐–ฝ‰ ๐–ฝŠ ๐–ฝ‹ ๐–ฝŒ ๐–ฝ ๐–ฝŽ ๐–ฝ ๐–ฝ ๐–ฝ‘ ๐–ฝ’ ๐–ฝ“ ๐–ฝ” ๐–ฝ• ๐–ฝ– ๐–ฝ— ๐–ฝ˜ ๐–ฝ™ ๐–ฝš ๐–ฝ› ๐–ฝœ ๐–ฝ ๐–ฝž ๐–ฝŸ ๐–ฝ  ๐–ฝก ๐–ฝข ๐–ฝฃ ๐–ฝค ๐–ฝฅ ๐–ฝฆ ๐–ฝง ๐–ฝจ ๐–ฝฉ ๐–ฝช ๐–ฝซ ๐–ฝฌ ๐–ฝญ ๐–ฝฎ ๐–ฝฏ ๐–ฝฐ ๐–ฝฑ ๐–ฝฒ ๐–ฝณ ๐–ฝด ๐–ฝต ๐–ฝถ ๐–ฝท ๐–ฝธ ๐–ฝน ๐–ฝบ ๐–ฝป ๐–ฝผ ๐–ฝฝ ๐–ฝพ ๐–ฝฟ ๐–พ€ ๐–พ ๐–พ‚ ๐–พƒ ๐–พ„ ๐–พ… ๐–พ† ๐–พ‡ ๐–พˆ ๐–พ‰ ๐–พŠ ๐–พ‹ ๐–พŒ ๐–พ ๐–พŽ ๐–พ ๐–พ ๐–พ‘ ๐–พ’ ๐–พ“ ๐–พ” ๐–พ• ๐–พ– ๐–พ— ๐–พ˜ ๐–พ™ ๐–พš ๐–พ› ๐–พœ ๐–พ ๐–พž ๐–พŸ +๐–ผ€ ๐–ผ ๐–ผ‚ ๐–ผƒ ๐–ผ„ ๐–ผ… ๐–ผ† ๐–ผ‡ ๐–ผˆ ๐–ผ‰ ๐–ผŠ ๐–ผ‹ ๐–ผŒ ๐–ผ ๐–ผŽ ๐–ผ ๐–ผ ๐–ผ‘ ๐–ผ’ ๐–ผ“ ๐–ผ” ๐–ผ• ๐–ผ– ๐–ผ— ๐–ผ˜ ๐–ผ™ ๐–ผš ๐–ผ› ๐–ผœ ๐–ผ ๐–ผž ๐–ผŸ ๐–ผ  ๐–ผก ๐–ผข ๐–ผฃ ๐–ผค ๐–ผฅ ๐–ผฆ ๐–ผง ๐–ผจ ๐–ผฉ ๐–ผช ๐–ผซ ๐–ผฌ ๐–ผญ ๐–ผฎ ๐–ผฏ ๐–ผฐ ๐–ผฑ ๐–ผฒ ๐–ผณ ๐–ผด ๐–ผต ๐–ผถ ๐–ผท ๐–ผธ ๐–ผน ๐–ผบ ๐–ผป ๐–ผผ ๐–ผฝ ๐–ผพ ๐–ผฟ ๐–ฝ€ ๐–ฝ ๐–ฝ‚ ๐–ฝƒ ๐–ฝ„ ๐–พ ๐–พ ๐–พ‘ ๐–พ’ ๐–พ“ ๐–พ” ๐–พ• ๐–พ– ๐–พ— ๐–พ˜ ๐–พ™ ๐–พš ๐–พ› ๐–พœ ๐–พ ๐–พž ๐–พŸ diff --git a/sample_texts/und-Shrd_chars.txt b/sample_texts/und-Shrd_chars.txt index e14ca4a5..c1f2b496 100644 --- a/sample_texts/und-Shrd_chars.txt +++ b/sample_texts/und-Shrd_chars.txt @@ -1 +1 @@ -๐‘†€ ๐‘† ๐‘†‚ ๐‘†ƒ ๐‘†„ ๐‘†… ๐‘†† ๐‘†‡ ๐‘†ˆ ๐‘†‰ ๐‘†Š ๐‘†‹ ๐‘†Œ ๐‘† ๐‘†Ž ๐‘† ๐‘† ๐‘†‘ ๐‘†’ ๐‘†“ ๐‘†” ๐‘†• ๐‘†– ๐‘†— ๐‘†˜ ๐‘†™ ๐‘†š ๐‘†› ๐‘†œ ๐‘† ๐‘†ž ๐‘†Ÿ ๐‘†  ๐‘†ก ๐‘†ข ๐‘†ฃ ๐‘†ค ๐‘†ฅ ๐‘†ฆ ๐‘†ง ๐‘†จ ๐‘†ฉ ๐‘†ช ๐‘†ซ ๐‘†ฌ ๐‘†ญ ๐‘†ฎ ๐‘†ฏ ๐‘†ฐ ๐‘†ฑ ๐‘†ฒ ๐‘†ณ ๐‘†ด ๐‘†ต ๐‘†ถ ๐‘†ท ๐‘†ธ ๐‘†น ๐‘†บ ๐‘†ป ๐‘†ผ ๐‘†ฝ ๐‘†พ ๐‘†ฟ ๐‘‡€ ๐‘‡ ๐‘‡‚ ๐‘‡ƒ ๐‘‡„ ๐‘‡… ๐‘‡† ๐‘‡‡ ๐‘‡ˆ ๐‘‡‰ ๐‘‡Š ๐‘‡‹ ๐‘‡Œ ๐‘‡ ๐‘‡Ž ๐‘‡ ๐‘‡ ๐‘‡‘ ๐‘‡’ ๐‘‡“ ๐‘‡” ๐‘‡• ๐‘‡– ๐‘‡— ๐‘‡˜ ๐‘‡™ ๐‘‡š ๐‘‡› ๐‘‡œ ๐‘‡ ๐‘‡ž ๐‘‡Ÿ +๐‘†€ ๐‘† ๐‘†‚ ๐‘†ƒ ๐‘†„ ๐‘†… ๐‘†† ๐‘†‡ ๐‘†ˆ ๐‘†‰ ๐‘†Š ๐‘†‹ ๐‘†Œ ๐‘† ๐‘†Ž ๐‘† ๐‘† ๐‘†‘ ๐‘†’ ๐‘†“ ๐‘†” ๐‘†• ๐‘†– ๐‘†— ๐‘†˜ ๐‘†™ ๐‘†š ๐‘†› ๐‘†œ ๐‘† ๐‘†ž ๐‘†Ÿ ๐‘†  ๐‘†ก ๐‘†ข ๐‘†ฃ ๐‘†ค ๐‘†ฅ ๐‘†ฆ ๐‘†ง ๐‘†จ ๐‘†ฉ ๐‘†ช ๐‘†ซ ๐‘†ฌ ๐‘†ญ ๐‘†ฎ ๐‘†ฏ ๐‘†ฐ ๐‘†ฑ ๐‘†ฒ ๐‘†ณ ๐‘†ด ๐‘†ต ๐‘†ถ ๐‘†ท ๐‘†ธ ๐‘†น ๐‘†บ ๐‘†ป ๐‘†ผ ๐‘†ฝ ๐‘†พ ๐‘†ฟ ๐‘‡€ ๐‘‡ ๐‘‡‚ ๐‘‡ƒ ๐‘‡„ ๐‘‡… ๐‘‡† ๐‘‡‡ ๐‘‡ˆ ๐‘‡‰ ๐‘‡Š ๐‘‡‹ ๐‘‡Œ ๐‘‡ ๐‘‡ ๐‘‡‘ ๐‘‡’ ๐‘‡“ ๐‘‡” ๐‘‡• ๐‘‡– ๐‘‡— ๐‘‡˜ ๐‘‡™ ๐‘‡š ๐‘‡› ๐‘‡œ ๐‘‡ ๐‘‡ž ๐‘‡Ÿ diff --git a/sample_texts/und-Sind_chars.txt b/sample_texts/und-Sind_chars.txt index 588456e4..cc1cc4f5 100644 --- a/sample_texts/und-Sind_chars.txt +++ b/sample_texts/und-Sind_chars.txt @@ -1 +1 @@ -๐‘Šฐ ๐‘Šฑ ๐‘Šฒ ๐‘Šณ ๐‘Šด ๐‘Šต ๐‘Šถ ๐‘Šท ๐‘Šธ ๐‘Šน ๐‘Šบ ๐‘Šป ๐‘Šผ ๐‘Šฝ ๐‘Šพ ๐‘Šฟ ๐‘‹€ ๐‘‹ ๐‘‹‚ ๐‘‹ƒ ๐‘‹„ ๐‘‹… ๐‘‹† ๐‘‹‡ ๐‘‹ˆ ๐‘‹‰ ๐‘‹Š ๐‘‹‹ ๐‘‹Œ ๐‘‹ ๐‘‹Ž ๐‘‹ ๐‘‹ ๐‘‹‘ ๐‘‹’ ๐‘‹“ ๐‘‹” ๐‘‹• ๐‘‹– ๐‘‹— ๐‘‹˜ ๐‘‹™ ๐‘‹š ๐‘‹› ๐‘‹œ ๐‘‹ ๐‘‹ž ๐‘‹Ÿ ๐‘‹  ๐‘‹ก ๐‘‹ข ๐‘‹ฃ ๐‘‹ค ๐‘‹ฅ ๐‘‹ฆ ๐‘‹ง ๐‘‹จ ๐‘‹ฉ ๐‘‹ช ๐‘‹ซ ๐‘‹ฌ ๐‘‹ญ ๐‘‹ฎ ๐‘‹ฏ ๐‘‹ฐ ๐‘‹ฑ ๐‘‹ฒ ๐‘‹ณ ๐‘‹ด ๐‘‹ต ๐‘‹ถ ๐‘‹ท ๐‘‹ธ ๐‘‹น ๐‘‹บ ๐‘‹ป ๐‘‹ผ ๐‘‹ฝ ๐‘‹พ ๐‘‹ฟ +๐‘Šฐ ๐‘Šฑ ๐‘Šฒ ๐‘Šณ ๐‘Šด ๐‘Šต ๐‘Šถ ๐‘Šท ๐‘Šธ ๐‘Šน ๐‘Šบ ๐‘Šป ๐‘Šผ ๐‘Šฝ ๐‘Šพ ๐‘Šฟ ๐‘‹€ ๐‘‹ ๐‘‹‚ ๐‘‹ƒ ๐‘‹„ ๐‘‹… ๐‘‹† ๐‘‹‡ ๐‘‹ˆ ๐‘‹‰ ๐‘‹Š ๐‘‹‹ ๐‘‹Œ ๐‘‹ ๐‘‹Ž ๐‘‹ ๐‘‹ ๐‘‹‘ ๐‘‹’ ๐‘‹“ ๐‘‹” ๐‘‹• ๐‘‹– ๐‘‹— ๐‘‹˜ ๐‘‹™ ๐‘‹š ๐‘‹› ๐‘‹œ ๐‘‹ ๐‘‹ž ๐‘‹Ÿ ๐‘‹  ๐‘‹ก ๐‘‹ข ๐‘‹ฃ ๐‘‹ค ๐‘‹ฅ ๐‘‹ฆ ๐‘‹ง ๐‘‹จ ๐‘‹ฉ ๐‘‹ช diff --git a/sample_texts/und-Sora_chars.txt b/sample_texts/und-Sora_chars.txt index 0c56e619..098d9259 100644 --- a/sample_texts/und-Sora_chars.txt +++ b/sample_texts/und-Sora_chars.txt @@ -1 +1 @@ -๐‘ƒ ๐‘ƒ‘ ๐‘ƒ’ ๐‘ƒ“ ๐‘ƒ” ๐‘ƒ• ๐‘ƒ– ๐‘ƒ— ๐‘ƒ˜ ๐‘ƒ™ ๐‘ƒš ๐‘ƒ› ๐‘ƒœ ๐‘ƒ ๐‘ƒž ๐‘ƒŸ ๐‘ƒ  ๐‘ƒก ๐‘ƒข ๐‘ƒฃ ๐‘ƒค ๐‘ƒฅ ๐‘ƒฆ ๐‘ƒง ๐‘ƒจ ๐‘ƒฉ ๐‘ƒช ๐‘ƒซ ๐‘ƒฌ ๐‘ƒญ ๐‘ƒฎ ๐‘ƒฏ ๐‘ƒฐ ๐‘ƒฑ ๐‘ƒฒ ๐‘ƒณ ๐‘ƒด ๐‘ƒต ๐‘ƒถ ๐‘ƒท ๐‘ƒธ ๐‘ƒน ๐‘ƒบ ๐‘ƒป ๐‘ƒผ ๐‘ƒฝ ๐‘ƒพ ๐‘ƒฟ +๐‘ƒ ๐‘ƒ‘ ๐‘ƒ’ ๐‘ƒ“ ๐‘ƒ” ๐‘ƒ• ๐‘ƒ– ๐‘ƒ— ๐‘ƒ˜ ๐‘ƒ™ ๐‘ƒš ๐‘ƒ› ๐‘ƒœ ๐‘ƒ ๐‘ƒž ๐‘ƒŸ ๐‘ƒ  ๐‘ƒก ๐‘ƒข ๐‘ƒฃ ๐‘ƒค ๐‘ƒฅ ๐‘ƒฆ ๐‘ƒง ๐‘ƒจ diff --git a/sample_texts/und-Takr_chars.txt b/sample_texts/und-Takr_chars.txt index d97c0fc0..aa0afc6b 100644 --- a/sample_texts/und-Takr_chars.txt +++ b/sample_texts/und-Takr_chars.txt @@ -1 +1 @@ -๐‘š€ ๐‘š ๐‘š‚ ๐‘šƒ ๐‘š„ ๐‘š… ๐‘š† ๐‘š‡ ๐‘šˆ ๐‘š‰ ๐‘šŠ ๐‘š‹ ๐‘šŒ ๐‘š ๐‘šŽ ๐‘š ๐‘š ๐‘š‘ ๐‘š’ ๐‘š“ ๐‘š” ๐‘š• ๐‘š– ๐‘š— ๐‘š˜ ๐‘š™ ๐‘šš ๐‘š› ๐‘šœ ๐‘š ๐‘šž ๐‘šŸ ๐‘š  ๐‘šก ๐‘šข ๐‘šฃ ๐‘šค ๐‘šฅ ๐‘šฆ ๐‘šง ๐‘šจ ๐‘šฉ ๐‘šช ๐‘šซ ๐‘šฌ ๐‘šญ ๐‘šฎ ๐‘šฏ ๐‘šฐ ๐‘šฑ ๐‘šฒ ๐‘šณ ๐‘šด ๐‘šต ๐‘šถ ๐‘šท ๐‘šธ ๐‘šน ๐‘šบ ๐‘šป ๐‘šผ ๐‘šฝ ๐‘šพ ๐‘šฟ ๐‘›€ ๐‘› ๐‘›‚ ๐‘›ƒ ๐‘›„ ๐‘›… ๐‘›† ๐‘›‡ ๐‘›ˆ ๐‘›‰ ๐‘›Š ๐‘›‹ ๐‘›Œ ๐‘› ๐‘›Ž ๐‘› +๐‘š€ ๐‘š ๐‘š‚ ๐‘šƒ ๐‘š„ ๐‘š… ๐‘š† ๐‘š‡ ๐‘šˆ ๐‘š‰ ๐‘šŠ ๐‘š‹ ๐‘šŒ ๐‘š ๐‘šŽ ๐‘š ๐‘š ๐‘š‘ ๐‘š’ ๐‘š“ ๐‘š” ๐‘š• ๐‘š– ๐‘š— ๐‘š˜ ๐‘š™ ๐‘šš ๐‘š› ๐‘šœ ๐‘š ๐‘šž ๐‘šŸ ๐‘š  ๐‘šก ๐‘šข ๐‘šฃ ๐‘šค ๐‘šฅ ๐‘šฆ ๐‘šง ๐‘šจ ๐‘šฉ ๐‘šช ๐‘šซ ๐‘šฌ ๐‘šญ ๐‘šฎ ๐‘šฏ ๐‘šฐ ๐‘šฑ ๐‘šฒ ๐‘šณ ๐‘šด ๐‘šต ๐‘šถ ๐‘šท diff --git a/sample_texts/und-Tirh_chars.txt b/sample_texts/und-Tirh_chars.txt index d0341602..e3d8483b 100644 --- a/sample_texts/und-Tirh_chars.txt +++ b/sample_texts/und-Tirh_chars.txt @@ -1 +1 @@ -๐‘’€ ๐‘’ ๐‘’‚ ๐‘’ƒ ๐‘’„ ๐‘’… ๐‘’† ๐‘’‡ ๐‘’ˆ ๐‘’‰ ๐‘’Š ๐‘’‹ ๐‘’Œ ๐‘’ ๐‘’Ž ๐‘’ ๐‘’ ๐‘’‘ ๐‘’’ ๐‘’“ ๐‘’” ๐‘’• ๐‘’– ๐‘’— ๐‘’˜ ๐‘’™ ๐‘’š ๐‘’› ๐‘’œ ๐‘’ ๐‘’ž ๐‘’Ÿ ๐‘’  ๐‘’ก ๐‘’ข ๐‘’ฃ ๐‘’ค ๐‘’ฅ ๐‘’ฆ ๐‘’ง ๐‘’จ ๐‘’ฉ ๐‘’ช ๐‘’ซ ๐‘’ฌ ๐‘’ญ ๐‘’ฎ ๐‘’ฏ ๐‘’ฐ ๐‘’ฑ ๐‘’ฒ ๐‘’ณ ๐‘’ด ๐‘’ต ๐‘’ถ ๐‘’ท ๐‘’ธ ๐‘’น ๐‘’บ ๐‘’ป ๐‘’ผ ๐‘’ฝ ๐‘’พ ๐‘’ฟ ๐‘“€ ๐‘“ ๐‘“‚ ๐‘“ƒ ๐‘“„ ๐‘“… ๐‘“† ๐‘“‡ ๐‘“ˆ ๐‘“‰ ๐‘“Š ๐‘“‹ ๐‘“Œ ๐‘“ ๐‘“Ž ๐‘“ ๐‘“ ๐‘“‘ ๐‘“’ ๐‘““ ๐‘“” ๐‘“• ๐‘“– ๐‘“— ๐‘“˜ ๐‘“™ ๐‘“š ๐‘“› ๐‘“œ ๐‘“ ๐‘“ž ๐‘“Ÿ +๐‘’€ ๐‘’ ๐‘’‚ ๐‘’ƒ ๐‘’„ ๐‘’… ๐‘’† ๐‘’‡ ๐‘’ˆ ๐‘’‰ ๐‘’Š ๐‘’‹ ๐‘’Œ ๐‘’ ๐‘’Ž ๐‘’ ๐‘’ ๐‘’‘ ๐‘’’ ๐‘’“ ๐‘’” ๐‘’• ๐‘’– ๐‘’— ๐‘’˜ ๐‘’™ ๐‘’š ๐‘’› ๐‘’œ ๐‘’ ๐‘’ž ๐‘’Ÿ ๐‘’  ๐‘’ก ๐‘’ข ๐‘’ฃ ๐‘’ค ๐‘’ฅ ๐‘’ฆ ๐‘’ง ๐‘’จ ๐‘’ฉ ๐‘’ช ๐‘’ซ ๐‘’ฌ ๐‘’ญ ๐‘’ฎ ๐‘’ฏ ๐‘’ฐ ๐‘’ฑ ๐‘’ฒ ๐‘’ณ ๐‘’ด ๐‘’ต ๐‘’ถ ๐‘’ท ๐‘’ธ ๐‘’น ๐‘’บ ๐‘’ป ๐‘’ผ ๐‘’ฝ ๐‘’พ ๐‘’ฟ ๐‘“€ ๐‘“ ๐‘“‚ ๐‘“ƒ ๐‘“„ ๐‘“… ๐‘“† ๐‘“‡ diff --git a/sample_texts/und-Wara_chars.txt b/sample_texts/und-Wara_chars.txt index 99ddfe34..bdcf78de 100644 --- a/sample_texts/und-Wara_chars.txt +++ b/sample_texts/und-Wara_chars.txt @@ -1 +1 @@ -๐‘ข  ๐‘ขก ๐‘ขข ๐‘ขฃ ๐‘ขค ๐‘ขฅ ๐‘ขฆ ๐‘ขง ๐‘ขจ ๐‘ขฉ ๐‘ขช ๐‘ขซ ๐‘ขฌ ๐‘ขญ ๐‘ขฎ ๐‘ขฏ ๐‘ขฐ ๐‘ขฑ ๐‘ขฒ ๐‘ขณ ๐‘ขด ๐‘ขต ๐‘ขถ ๐‘ขท ๐‘ขธ ๐‘ขน ๐‘ขบ ๐‘ขป ๐‘ขผ ๐‘ขฝ ๐‘ขพ ๐‘ขฟ ๐‘ฃ€ ๐‘ฃ ๐‘ฃ‚ ๐‘ฃƒ ๐‘ฃ„ ๐‘ฃ… ๐‘ฃ† ๐‘ฃ‡ ๐‘ฃˆ ๐‘ฃ‰ ๐‘ฃŠ ๐‘ฃ‹ ๐‘ฃŒ ๐‘ฃ ๐‘ฃŽ ๐‘ฃ ๐‘ฃ ๐‘ฃ‘ ๐‘ฃ’ ๐‘ฃ“ ๐‘ฃ” ๐‘ฃ• ๐‘ฃ– ๐‘ฃ— ๐‘ฃ˜ ๐‘ฃ™ ๐‘ฃš ๐‘ฃ› ๐‘ฃœ ๐‘ฃ ๐‘ฃž ๐‘ฃŸ ๐‘ฃ  ๐‘ฃก ๐‘ฃข ๐‘ฃฃ ๐‘ฃค ๐‘ฃฅ ๐‘ฃฆ ๐‘ฃง ๐‘ฃจ ๐‘ฃฉ ๐‘ฃช ๐‘ฃซ ๐‘ฃฌ ๐‘ฃญ ๐‘ฃฎ ๐‘ฃฏ ๐‘ฃฐ ๐‘ฃฑ ๐‘ฃฒ ๐‘ฃณ ๐‘ฃด ๐‘ฃต ๐‘ฃถ ๐‘ฃท ๐‘ฃธ ๐‘ฃน ๐‘ฃบ ๐‘ฃป ๐‘ฃผ ๐‘ฃฝ ๐‘ฃพ ๐‘ฃฟ +๐‘ข  ๐‘ขก ๐‘ขข ๐‘ขฃ ๐‘ขค ๐‘ขฅ ๐‘ขฆ ๐‘ขง ๐‘ขจ ๐‘ขฉ ๐‘ขช ๐‘ขซ ๐‘ขฌ ๐‘ขญ ๐‘ขฎ ๐‘ขฏ ๐‘ขฐ ๐‘ขฑ ๐‘ขฒ ๐‘ขณ ๐‘ขด ๐‘ขต ๐‘ขถ ๐‘ขท ๐‘ขธ ๐‘ขน ๐‘ขบ ๐‘ขป ๐‘ขผ ๐‘ขฝ ๐‘ขพ ๐‘ขฟ ๐‘ฃ€ ๐‘ฃ ๐‘ฃ‚ ๐‘ฃƒ ๐‘ฃ„ ๐‘ฃ… ๐‘ฃ† ๐‘ฃ‡ ๐‘ฃˆ ๐‘ฃ‰ ๐‘ฃŠ ๐‘ฃ‹ ๐‘ฃŒ ๐‘ฃ ๐‘ฃŽ ๐‘ฃ ๐‘ฃ ๐‘ฃ‘ ๐‘ฃ’ ๐‘ฃ“ ๐‘ฃ” ๐‘ฃ• ๐‘ฃ– ๐‘ฃ— ๐‘ฃ˜ ๐‘ฃ™ ๐‘ฃš ๐‘ฃ› ๐‘ฃœ ๐‘ฃ ๐‘ฃž ๐‘ฃŸ ๐‘ฃ  ๐‘ฃก ๐‘ฃข ๐‘ฃฃ ๐‘ฃค ๐‘ฃฅ ๐‘ฃฆ ๐‘ฃง ๐‘ฃจ ๐‘ฃฉ ๐‘ฃช ๐‘ฃซ ๐‘ฃฌ ๐‘ฃญ ๐‘ฃฎ ๐‘ฃฏ ๐‘ฃฐ ๐‘ฃฑ ๐‘ฃฒ ๐‘ฃฟ diff --git a/sample_texts/und-Zsym-muse_chars.txt b/sample_texts/und-Zsym-muse_chars.txt new file mode 100644 index 00000000..f3c5e59c --- /dev/null +++ b/sample_texts/und-Zsym-muse_chars.txt @@ -0,0 +1 @@ +๐€€ ๐€ ๐€‚ ๐€ƒ ๐€„ ๐€… ๐€† ๐€‡ ๐€ˆ ๐€‰ ๐€Š ๐€‹ ๐€Œ ๐€ ๐€Ž ๐€ ๐€ ๐€‘ ๐€’ ๐€“ ๐€” ๐€• ๐€– ๐€— ๐€˜ ๐€™ ๐€š ๐€› ๐€œ ๐€ ๐€ž ๐€Ÿ ๐€  ๐€ก ๐€ข ๐€ฃ ๐€ค ๐€ฅ ๐€ฆ ๐€ง ๐€จ ๐€ฉ ๐€ช ๐€ซ ๐€ฌ ๐€ญ ๐€ฎ ๐€ฏ ๐€ฐ ๐€ฑ ๐€ฒ ๐€ณ ๐€ด ๐€ต ๐€ถ ๐€ท ๐€ธ ๐€น ๐€บ ๐€ป ๐€ผ ๐€ฝ ๐€พ ๐€ฟ ๐€ ๐ ๐‚ ๐ƒ ๐„ ๐… ๐† ๐‡ ๐ˆ ๐‰ ๐Š ๐‹ ๐Œ ๐ ๐Ž ๐ ๐ ๐‘ ๐’ ๐“ ๐” ๐• ๐– ๐— ๐˜ ๐™ ๐š ๐› ๐œ ๐ ๐ž ๐Ÿ ๐  ๐ก ๐ข ๐ฃ ๐ค ๐ฅ ๐ฆ ๐ง ๐จ ๐ฉ ๐ช ๐ซ ๐ฌ ๐ญ ๐ฎ ๐ฏ ๐ฐ ๐ฑ ๐ฒ ๐ณ ๐ด ๐ต ๐ถ ๐ท ๐ธ ๐น ๐บ ๐ป ๐ผ ๐ฝ ๐พ ๐ฟ ๐‚€ ๐‚ ๐‚‚ ๐‚ƒ ๐‚„ ๐‚… ๐‚† ๐‚‡ ๐‚ˆ ๐‚‰ ๐‚Š ๐‚‹ ๐‚Œ ๐‚ ๐‚Ž ๐‚ ๐‚ ๐‚‘ ๐‚’ ๐‚“ ๐‚” ๐‚• ๐‚– ๐‚— ๐‚˜ ๐‚™ ๐‚š ๐‚› ๐‚œ ๐‚ ๐‚ž ๐‚Ÿ ๐‚  ๐‚ก ๐‚ข ๐‚ฃ ๐‚ค ๐‚ฅ ๐‚ฆ ๐‚ง ๐‚จ ๐‚ฉ ๐‚ช ๐‚ซ ๐‚ฌ ๐‚ญ ๐‚ฎ ๐‚ฏ ๐‚ฐ ๐‚ฑ ๐‚ฒ ๐‚ณ ๐‚ด ๐‚ต ๐‚ถ ๐‚ท ๐‚ธ ๐‚น ๐‚บ ๐‚ป ๐‚ผ ๐‚ฝ ๐‚พ ๐‚ฟ ๐ƒ€ ๐ƒ ๐ƒ‚ ๐ƒƒ ๐ƒ„ ๐ƒ… ๐ƒ† ๐ƒ‡ ๐ƒˆ ๐ƒ‰ ๐ƒŠ ๐ƒ‹ ๐ƒŒ ๐ƒ ๐ƒŽ ๐ƒ ๐ƒ ๐ƒ‘ ๐ƒ’ ๐ƒ“ ๐ƒ” ๐ƒ• ๐ƒ– ๐ƒ— ๐ƒ˜ ๐ƒ™ ๐ƒš ๐ƒ› ๐ƒœ ๐ƒ ๐ƒž ๐ƒŸ ๐ƒ  ๐ƒก ๐ƒข ๐ƒฃ ๐ƒค ๐ƒฅ ๐ƒฆ ๐ƒง ๐ƒจ ๐ƒฉ ๐ƒช ๐ƒซ ๐ƒฌ ๐ƒญ ๐ƒฎ ๐ƒฏ ๐ƒฐ ๐ƒฑ ๐ƒฒ ๐ƒณ ๐ƒด ๐ƒต ๐„€ ๐„ ๐„‚ ๐„ƒ ๐„„ ๐„… ๐„† ๐„‡ ๐„ˆ ๐„‰ ๐„Š ๐„‹ ๐„Œ ๐„ ๐„Ž ๐„ ๐„ ๐„‘ ๐„’ ๐„“ ๐„” ๐„• ๐„– ๐„— ๐„˜ ๐„™ ๐„š ๐„› ๐„œ ๐„ ๐„ž ๐„Ÿ ๐„  ๐„ก ๐„ข ๐„ฃ ๐„ค ๐„ฅ ๐„ฆ ๐„ฉ ๐„ช ๐„ซ ๐„ฌ ๐„ญ ๐„ฎ ๐„ฏ ๐„ฐ ๐„ฑ ๐„ฒ ๐„ณ ๐„ด ๐„ต ๐„ถ ๐„ท ๐„ธ ๐„น ๐„บ ๐„ป ๐„ผ ๐„ฝ ๐„พ ๐„ฟ ๐…€ ๐… ๐…‚ ๐…ƒ ๐…„ ๐…… ๐…† ๐…‡ ๐…ˆ ๐…‰ ๐…Š ๐…‹ ๐…Œ ๐… ๐…Ž ๐… ๐… ๐…‘ ๐…’ ๐…“ ๐…” ๐…• ๐…– ๐…— ๐…˜ ๐…™ ๐…š ๐…› ๐…œ ๐… ๐…ž ๐…Ÿ ๐…  ๐…ก ๐…ข ๐…ฃ ๐…ค ๐…ฅ ๐…ฆ ๐…ง ๐…จ ๐…ฉ ๐…ช ๐…ซ ๐…ฌ ๐…ญ ๐…ฎ ๐…ฏ ๐…ฐ ๐…ฑ ๐…ฒ ๐…ณ ๐…ด ๐…ต ๐…ถ ๐…ท ๐…ธ ๐…น ๐…บ ๐…ป ๐…ผ ๐…ฝ ๐…พ ๐…ฟ ๐†€ ๐† ๐†‚ ๐†ƒ ๐†„ ๐†… ๐†† ๐†‡ ๐†ˆ ๐†‰ ๐†Š ๐†‹ ๐†Œ ๐† ๐†Ž ๐† ๐† ๐†‘ ๐†’ ๐†“ ๐†” ๐†• ๐†– ๐†— ๐†˜ ๐†™ ๐†š ๐†› ๐†œ ๐† ๐†ž ๐†Ÿ ๐†  ๐†ก ๐†ข ๐†ฃ ๐†ค ๐†ฅ ๐†ฆ ๐†ง ๐†จ ๐†ฉ ๐†ช ๐†ซ ๐†ฌ ๐†ญ ๐†ฎ ๐†ฏ ๐†ฐ ๐†ฑ ๐†ฒ ๐†ณ ๐†ด ๐†ต ๐†ถ ๐†ท ๐†ธ ๐†น ๐†บ ๐†ป ๐†ผ ๐†ฝ ๐†พ ๐†ฟ ๐‡€ ๐‡ ๐‡‚ ๐‡ƒ ๐‡„ ๐‡… ๐‡† ๐‡‡ ๐‡ˆ ๐‡‰ ๐‡Š ๐‡‹ ๐‡Œ ๐‡ ๐‡Ž ๐‡ ๐‡ ๐‡‘ ๐‡’ ๐‡“ ๐‡” ๐‡• ๐‡– ๐‡— ๐‡˜ ๐‡™ ๐‡š ๐‡› ๐‡œ ๐‡ ๐‡ž ๐‡Ÿ ๐‡  ๐‡ก ๐‡ข ๐‡ฃ ๐‡ค ๐‡ฅ ๐‡ฆ ๐‡ง ๐‡จ From 5ed24f8820b2a48044582bf52672addd1833084f Mon Sep 17 00:00:00 2001 From: punchcutter Date: Tue, 27 Nov 2018 12:01:42 -0800 Subject: [PATCH 07/14] Add Hanifi Rohingya to Noto lint --- nototools/data/family_name_info_p3.xml | 1 + nototools/data/noto_cmap_phase3.xml | 1 + nototools/noto_names.py | 1 + 3 files changed, 3 insertions(+) diff --git a/nototools/data/family_name_info_p3.xml b/nototools/data/family_name_info_p3.xml index 3066c9af..52ac3b77 100644 --- a/nototools/data/family_name_info_p3.xml +++ b/nototools/data/family_name_info_p3.xml @@ -131,6 +131,7 @@ + diff --git a/nototools/data/noto_cmap_phase3.xml b/nototools/data/noto_cmap_phase3.xml index 4d72819c..76479b47 100644 --- a/nototools/data/noto_cmap_phase3.xml +++ b/nototools/data/noto_cmap_phase3.xml @@ -105,6 +105,7 @@ Plrd,Miao,138,0000 000d 0020 00a0 25cc 16f00-16f44 16f50-16f7e 16f8f-16f9f,-1, Prti,Inscriptional Parthian,34,0000 000d 0020 00a0 10b40-10b55 10b58-10b5f,0, Rjng,Rejang,45,0000 000d 0020 00a0 200b-200d 25cc a930-a953 a95f,13,002c 002e 003a 00d7 2012-2015 2022 25fb-25fe + Rohg,Hanifi Rohingya,66,0000 000d 0020 00a0 002c 002e 00ab 00bb 200c-200d 25cc 061b 061f 0640 06D4 0660 10d00-10d27 10d30-10d39,-1, Runr,Runic,93,0000 000d 0020 00a0 16a0-16f8,0, SYM2,Symbols2,2184,0000-0020 0023 002a 0030-0039 007f-00a0 2022 20e2-20e3 21af 21e6-21f0 21f3 2218-2219 2299 22c4-22c6 2316 2318 231a-231b 2324-2328 232b 237b 237d-237f 2394 23ce-23cf 23e9-23ea 23ed-23ef 23f1-23fe 2400-2426 2440-244a 25a0-2609 260e-2612 2614-2623 2630-2637 263c 2654-2668 267f-268f 269e-26a1 26aa-26ac 26bd-26cd 26cf-26e1 2700-2704 2706-2709 270b-271c 2722-2727 2729-274b 274d 274f-2753 2756-2775 2794 2798-27af 27b1-27be 2800-28ff 2981 29bf 29eb 2b00-2b0d 2b12-2b2f 2b4d-2b73 2b76-2b95 2b98-2bb9 2bbd-2bc8 2bca-2bd1 2bec-2bef 4dc0-4dff fff9-fffb 10140-1018e 10190-1019b 101a0 101d0-101fd 102e0-102fb 10e60-10e7e 1d300-1d356 1d360-1d371 1f000-1f02b 1f030-1f093 1f0a0-1f0ae 1f0b1-1f0bf 1f0c1-1f0cf 1f0d1-1f0f5 1f30d-1f30f 1f315 1f31c 1f321-1f32c 1f336 1f378 1f37d 1f393-1f39f 1f3a7 1f3ac-1f3ae 1f3c2 1f3c4 1f3c6 1f3ca-1f3ce 1f3d4-1f3e0 1f3ed 1f3f1-1f3f3 1f3f5-1f3f7 1f408 1f415 1f41f 1f426 1f43f 1f441-1f442 1f446-1f449 1f44c-1f44e 1f453 1f46a 1f47d 1f4a3 1f4b0 1f4b3 1f4b9 1f4bb 1f4bf 1f4c8-1f4cb 1f4da 1f4df 1f4e4-1f4e6 1f4ea-1f4ed 1f4f7 1f4f9-1f4fb 1f4fd-1f4fe 1f503 1f507-1f50a 1f50d 1f512-1f513 1f53e-1f545 1f54a 1f550-1f579 1f57b-1f594 1f597-1f5a3 1f5a5-1f5fa 1f650-1f67f 1f687 1f68d 1f691 1f694 1f698 1f6ad 1f6b2 1f6b9-1f6ba 1f6bc 1f6c6-1f6cb 1f6cd-1f6cf 1f6e0-1f6ea 1f6f0-1f6f3 1f780-1f7d4 1f800-1f80b 1f810-1f847 1f850-1f859 1f860-1f887 1f890-1f8ad 1f93b 1f946,-1, Samr,Samaritan,67,0000 000d 0020 00a0 0800-082d 0830-083e 25cc 2e31,0, diff --git a/nototools/noto_names.py b/nototools/noto_names.py index 819fbc6e..40067be7 100755 --- a/nototools/noto_names.py +++ b/nototools/noto_names.py @@ -323,6 +323,7 @@ def _original_parts(family_parts, subfamily_parts, no_style_linking=False): 'Old South Arabian': 'OldSouArab', # Sarb 'Psalter Pahlavi': 'PsaPahlavi', # Phlp 'Meetei Mayek': 'MeetMayek', # Mtei + 'Hanifi Rohingya': 'HanifiRohg', # Rohg 'Sora Sompeng': 'SoraSomp', # Sora 'Inscriptional Parthian': 'InsParthi', # Prti 'Pau Cin Hau': 'PauCinHau', # Pauc From d4dccc91a4a516bf43abaed7e7436ed67ffaf927 Mon Sep 17 00:00:00 2001 From: Marek Jeziorek Date: Fri, 30 Nov 2018 14:50:56 -0800 Subject: [PATCH 08/14] added more sample texts --- sample_texts/und-Aghb.txt | 0 sample_texts/und-Ahom.txt | 0 sample_texts/und-Bhks.txt | 0 sample_texts/und-Dupl.txt | 0 sample_texts/und-Elba.txt | 0 sample_texts/und-Gran.txt | 0 sample_texts/und-Hatr.txt | 0 sample_texts/und-Hmng.txt | 0 sample_texts/und-Hung.txt | 0 sample_texts/und-Khoj_chars.txt | 1 + sample_texts/und-Lina.txt | 0 sample_texts/und-Mahj.txt | 0 sample_texts/und-Mani.txt | 0 sample_texts/und-Marc.txt | 0 sample_texts/und-Mend.txt | 0 sample_texts/und-Mero_chars.txt | 1 + sample_texts/und-Modi.txt | 0 sample_texts/und-Mroo.txt | 0 sample_texts/und-Mult.txt | 0 sample_texts/und-Narb.txt | 0 sample_texts/und-Nbat.txt | 0 sample_texts/und-Newa.txt | 0 sample_texts/und-Palm.txt | 0 sample_texts/und-Pauc.txt | 0 sample_texts/und-Perm.txt | 0 sample_texts/und-Phlp.txt | 0 sample_texts/und-Plrd.txt | 0 sample_texts/und-Shrd.txt | 0 sample_texts/und-Sind.txt | 0 sample_texts/und-Sora.txt | 0 sample_texts/und-Takr.txt | 0 sample_texts/und-Tirh.txt | 0 sample_texts/und-Wara.txt | 0 33 files changed, 2 insertions(+) create mode 100644 sample_texts/und-Aghb.txt create mode 100644 sample_texts/und-Ahom.txt create mode 100644 sample_texts/und-Bhks.txt create mode 100644 sample_texts/und-Dupl.txt create mode 100644 sample_texts/und-Elba.txt create mode 100644 sample_texts/und-Gran.txt create mode 100644 sample_texts/und-Hatr.txt create mode 100644 sample_texts/und-Hmng.txt create mode 100644 sample_texts/und-Hung.txt create mode 100644 sample_texts/und-Khoj_chars.txt create mode 100644 sample_texts/und-Lina.txt create mode 100644 sample_texts/und-Mahj.txt create mode 100644 sample_texts/und-Mani.txt create mode 100644 sample_texts/und-Marc.txt create mode 100644 sample_texts/und-Mend.txt create mode 100644 sample_texts/und-Mero_chars.txt create mode 100644 sample_texts/und-Modi.txt create mode 100644 sample_texts/und-Mroo.txt create mode 100644 sample_texts/und-Mult.txt create mode 100644 sample_texts/und-Narb.txt create mode 100644 sample_texts/und-Nbat.txt create mode 100644 sample_texts/und-Newa.txt create mode 100644 sample_texts/und-Palm.txt create mode 100644 sample_texts/und-Pauc.txt create mode 100644 sample_texts/und-Perm.txt create mode 100644 sample_texts/und-Phlp.txt create mode 100644 sample_texts/und-Plrd.txt create mode 100644 sample_texts/und-Shrd.txt create mode 100644 sample_texts/und-Sind.txt create mode 100644 sample_texts/und-Sora.txt create mode 100644 sample_texts/und-Takr.txt create mode 100644 sample_texts/und-Tirh.txt create mode 100644 sample_texts/und-Wara.txt diff --git a/sample_texts/und-Aghb.txt b/sample_texts/und-Aghb.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Ahom.txt b/sample_texts/und-Ahom.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Bhks.txt b/sample_texts/und-Bhks.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Dupl.txt b/sample_texts/und-Dupl.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Elba.txt b/sample_texts/und-Elba.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Gran.txt b/sample_texts/und-Gran.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Hatr.txt b/sample_texts/und-Hatr.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Hmng.txt b/sample_texts/und-Hmng.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Hung.txt b/sample_texts/und-Hung.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Khoj_chars.txt b/sample_texts/und-Khoj_chars.txt new file mode 100644 index 00000000..88951ee2 --- /dev/null +++ b/sample_texts/und-Khoj_chars.txt @@ -0,0 +1 @@ + ๐‘ˆ€ ๐‘ˆ ๐‘ˆ‚ ๐‘ˆƒ ๐‘ˆ„ ๐‘ˆ… ๐‘ˆ† ๐‘ˆ‡ ๐‘ˆˆ ๐‘ˆ‰ ๐‘ˆŠ ๐‘ˆ‹ ๐‘ˆŒ ๐‘ˆ ๐‘ˆŽ ๐‘ˆ ๐‘ˆ ๐‘ˆ‘ ๐‘ˆ“ ๐‘ˆ” ๐‘ˆ• ๐‘ˆ– ๐‘ˆ— ๐‘ˆ˜ ๐‘ˆ™ ๐‘ˆš ๐‘ˆ› ๐‘ˆœ ๐‘ˆ ๐‘ˆž ๐‘ˆŸ ๐‘ˆ  ๐‘ˆก ๐‘ˆข ๐‘ˆฃ ๐‘ˆค ๐‘ˆฅ ๐‘ˆฆ ๐‘ˆง ๐‘ˆจ ๐‘ˆฉ ๐‘ˆช ๐‘ˆซ ๐‘ˆฌ ๐‘ˆญ ๐‘ˆฎ diff --git a/sample_texts/und-Lina.txt b/sample_texts/und-Lina.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Mahj.txt b/sample_texts/und-Mahj.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Mani.txt b/sample_texts/und-Mani.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Marc.txt b/sample_texts/und-Marc.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Mend.txt b/sample_texts/und-Mend.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Mero_chars.txt b/sample_texts/und-Mero_chars.txt new file mode 100644 index 00000000..78314e5b --- /dev/null +++ b/sample_texts/und-Mero_chars.txt @@ -0,0 +1 @@ +๐ฆ€ ๐ฆ ๐ฆ‚ ๐ฆƒ ๐ฆ„ ๐ฆ… ๐ฆ† ๐ฆ‡ ๐ฆˆ ๐ฆ‰ ๐ฆŠ ๐ฆ‹ ๐ฆŒ ๐ฆ ๐ฆŽ ๐ฆ ๐ฆ ๐ฆ‘ ๐ฆ’ ๐ฆ“ ๐ฆ” ๐ฆ• ๐ฆ– ๐ฆ— ๐ฆ˜ ๐ฆ™ ๐ฆš ๐ฆ› ๐ฆœ ๐ฆ ๐ฆž ๐ฆŸ diff --git a/sample_texts/und-Modi.txt b/sample_texts/und-Modi.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Mroo.txt b/sample_texts/und-Mroo.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Mult.txt b/sample_texts/und-Mult.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Narb.txt b/sample_texts/und-Narb.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Nbat.txt b/sample_texts/und-Nbat.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Newa.txt b/sample_texts/und-Newa.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Palm.txt b/sample_texts/und-Palm.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Pauc.txt b/sample_texts/und-Pauc.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Perm.txt b/sample_texts/und-Perm.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Phlp.txt b/sample_texts/und-Phlp.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Plrd.txt b/sample_texts/und-Plrd.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Shrd.txt b/sample_texts/und-Shrd.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Sind.txt b/sample_texts/und-Sind.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Sora.txt b/sample_texts/und-Sora.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Takr.txt b/sample_texts/und-Takr.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Tirh.txt b/sample_texts/und-Tirh.txt new file mode 100644 index 00000000..e69de29b diff --git a/sample_texts/und-Wara.txt b/sample_texts/und-Wara.txt new file mode 100644 index 00000000..e69de29b From d59043005ba77dbeccbadf0d7e7425b4d01e65bc Mon Sep 17 00:00:00 2001 From: Marek Jeziorek Date: Fri, 30 Nov 2018 15:20:23 -0800 Subject: [PATCH 09/14] removing 0 length samples --- sample_texts/und-Aghb.txt | 0 sample_texts/und-Ahom.txt | 0 sample_texts/und-Bhks.txt | 0 sample_texts/und-Dupl.txt | 0 sample_texts/und-Elba.txt | 0 sample_texts/und-Gran.txt | 0 sample_texts/und-Hatr.txt | 0 sample_texts/und-Hmng.txt | 0 sample_texts/und-Hung.txt | 0 sample_texts/und-Lina.txt | 0 sample_texts/und-Mahj.txt | 0 sample_texts/und-Mani.txt | 0 sample_texts/und-Marc.txt | 0 sample_texts/und-Mend.txt | 0 sample_texts/und-Modi.txt | 0 sample_texts/und-Mroo.txt | 0 sample_texts/und-Mult.txt | 0 sample_texts/und-Narb.txt | 0 sample_texts/und-Nbat.txt | 0 sample_texts/und-Newa.txt | 0 sample_texts/und-Palm.txt | 0 sample_texts/und-Pauc.txt | 0 sample_texts/und-Perm.txt | 0 sample_texts/und-Phlp.txt | 0 sample_texts/und-Plrd.txt | 0 sample_texts/und-Shrd.txt | 0 sample_texts/und-Sind.txt | 0 sample_texts/und-Sora.txt | 0 sample_texts/und-Takr.txt | 0 sample_texts/und-Tirh.txt | 0 sample_texts/und-Wara.txt | 0 31 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 sample_texts/und-Aghb.txt delete mode 100644 sample_texts/und-Ahom.txt delete mode 100644 sample_texts/und-Bhks.txt delete mode 100644 sample_texts/und-Dupl.txt delete mode 100644 sample_texts/und-Elba.txt delete mode 100644 sample_texts/und-Gran.txt delete mode 100644 sample_texts/und-Hatr.txt delete mode 100644 sample_texts/und-Hmng.txt delete mode 100644 sample_texts/und-Hung.txt delete mode 100644 sample_texts/und-Lina.txt delete mode 100644 sample_texts/und-Mahj.txt delete mode 100644 sample_texts/und-Mani.txt delete mode 100644 sample_texts/und-Marc.txt delete mode 100644 sample_texts/und-Mend.txt delete mode 100644 sample_texts/und-Modi.txt delete mode 100644 sample_texts/und-Mroo.txt delete mode 100644 sample_texts/und-Mult.txt delete mode 100644 sample_texts/und-Narb.txt delete mode 100644 sample_texts/und-Nbat.txt delete mode 100644 sample_texts/und-Newa.txt delete mode 100644 sample_texts/und-Palm.txt delete mode 100644 sample_texts/und-Pauc.txt delete mode 100644 sample_texts/und-Perm.txt delete mode 100644 sample_texts/und-Phlp.txt delete mode 100644 sample_texts/und-Plrd.txt delete mode 100644 sample_texts/und-Shrd.txt delete mode 100644 sample_texts/und-Sind.txt delete mode 100644 sample_texts/und-Sora.txt delete mode 100644 sample_texts/und-Takr.txt delete mode 100644 sample_texts/und-Tirh.txt delete mode 100644 sample_texts/und-Wara.txt diff --git a/sample_texts/und-Aghb.txt b/sample_texts/und-Aghb.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Ahom.txt b/sample_texts/und-Ahom.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Bhks.txt b/sample_texts/und-Bhks.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Dupl.txt b/sample_texts/und-Dupl.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Elba.txt b/sample_texts/und-Elba.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Gran.txt b/sample_texts/und-Gran.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Hatr.txt b/sample_texts/und-Hatr.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Hmng.txt b/sample_texts/und-Hmng.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Hung.txt b/sample_texts/und-Hung.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Lina.txt b/sample_texts/und-Lina.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Mahj.txt b/sample_texts/und-Mahj.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Mani.txt b/sample_texts/und-Mani.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Marc.txt b/sample_texts/und-Marc.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Mend.txt b/sample_texts/und-Mend.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Modi.txt b/sample_texts/und-Modi.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Mroo.txt b/sample_texts/und-Mroo.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Mult.txt b/sample_texts/und-Mult.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Narb.txt b/sample_texts/und-Narb.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Nbat.txt b/sample_texts/und-Nbat.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Newa.txt b/sample_texts/und-Newa.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Palm.txt b/sample_texts/und-Palm.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Pauc.txt b/sample_texts/und-Pauc.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Perm.txt b/sample_texts/und-Perm.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Phlp.txt b/sample_texts/und-Phlp.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Plrd.txt b/sample_texts/und-Plrd.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Shrd.txt b/sample_texts/und-Shrd.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Sind.txt b/sample_texts/und-Sind.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Sora.txt b/sample_texts/und-Sora.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Takr.txt b/sample_texts/und-Takr.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Tirh.txt b/sample_texts/und-Tirh.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/sample_texts/und-Wara.txt b/sample_texts/und-Wara.txt deleted file mode 100644 index e69de29b..00000000 From 07421818f26bb7a15720b6ff918c4f6aedb99115 Mon Sep 17 00:00:00 2001 From: "Marek Z. Jeziorek" Date: Fri, 30 Nov 2018 19:00:41 -0500 Subject: [PATCH 10/14] changes required to support new languages/font updates --- nototools/generate_lang_font_table.py | 3 ++- nototools/generate_website_2_data.py | 9 +++++++-- nototools/noto_fonts.py | 5 ++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/nototools/generate_lang_font_table.py b/nototools/generate_lang_font_table.py index f7a40a17..2bef60c8 100755 --- a/nototools/generate_lang_font_table.py +++ b/nototools/generate_lang_font_table.py @@ -45,7 +45,8 @@ def accept_font(f): f.family == 'Noto' and # exclude Arimo, Tinos, Cousine f.style != 'Nastaliq' and # exclude Nastaliq, not suitable for maps f.script != 'HST' and # exclude Historic, tool limitation - f.weight == 'Regular' and # to limit members of fonts, we don't + f.weight == 'Regular' and # to limit members of fonts + f.width == 'Regular' and # to limit members of fonts, we don't not f.slope and # care about weights f.fmt in ['ttf', 'otf'] and # only support these formats (not f.is_cjk or f.subset)) # 'small' language-specific CJK subsets diff --git a/nototools/generate_website_2_data.py b/nototools/generate_website_2_data.py index 97eccc45..3cfd187e 100755 --- a/nototools/generate_website_2_data.py +++ b/nototools/generate_website_2_data.py @@ -507,6 +507,9 @@ def get_family_id_to_default_lang_scr(family_id_to_lang_scrs, families): if script_key == 'Aran': # patch for Nastaliq lang = 'ur' + elif script_key == 'Hatr': + # patch for Hatran + lang = 'und' else: lang = lang_data.script_to_default_lang(primary_script) lang_scr = lang + '-' + primary_script @@ -862,7 +865,7 @@ def build_data_json(self, family_id_to_lang_scr_to_sample_key, num_fonts = sum( 1 for f in (family.hinted_members or family.unhinted_members) if not f.is_UI) - if num_fonts not in [1, 2, 4, 9, 36, 72]: + if num_fonts not in [1, 2, 3, 4, 9, 12, 36, 72]: print 'family %s (%s) has %d fonts' % (k, family.name, num_fonts) print '\n'.join(f.filepath for f in sorted(family.hinted_members or family.unhinted_members)) fail = True @@ -1180,7 +1183,9 @@ def build_subset_zips(self): readme_path = self.get_readme_path('cjk') readme_pair = (readme_path, path.basename(readme_path)) for style in ['Sans', 'Serif']: - for subset in ['KR', 'JP', 'SC', 'TC']: + for subset in ['KR', 'JP', 'SC', 'TC', 'HK']: + if style == 'Serif' and subset == 'HK': + continue base_name = 'Noto%s%s' % (style, subset) zip_name = '%s.zip' % base_name zip_path = path.join(self.pkgs, zip_name) diff --git a/nototools/noto_fonts.py b/nototools/noto_fonts.py index c4c4245a..20606f56 100644 --- a/nototools/noto_fonts.py +++ b/nototools/noto_fonts.py @@ -44,11 +44,14 @@ 'CJKkr': 'Kore', 'CJKsc': 'Hans', 'CJKtc': 'Hant', + 'CJKhk': 'Hant', 'JP': 'Jpan', 'KR': 'Kore', 'SC': 'Hans', 'TC': 'Hant', + 'HK': 'Hant', 'NKo': 'Nkoo', + 'Meroitic': 'Mero', 'SumeroAkkadianCuneiform': 'Xsux', 'Symbols': 'Zsym', 'Emoji': 'Zsye', @@ -173,7 +176,7 @@ def get_noto_font(filepath, family_name='Arimo|Cousine|Tinos|Noto', license_type = 'sil' - if script in ['JP', 'KR', 'TC', 'SC']: + if script in ['JP', 'KR', 'TC', 'SC', 'HK']: subset = script else: subset = None From e6be66d6a80658bb77f33b8dbad1027c3f25d388 Mon Sep 17 00:00:00 2001 From: "Marek Z. Jeziorek" Date: Sat, 22 Dec 2018 23:35:33 -0500 Subject: [PATCH 11/14] adding Math sample --- sample_texts/und-Zmth_chars.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 sample_texts/und-Zmth_chars.txt diff --git a/sample_texts/und-Zmth_chars.txt b/sample_texts/und-Zmth_chars.txt new file mode 100644 index 00000000..9cba37e4 --- /dev/null +++ b/sample_texts/und-Zmth_chars.txt @@ -0,0 +1 @@ +โˆƒโˆ‘โˆšโˆโˆŸโˆ โˆซโ‰…โ‰ˆโ‰ โŒ โŒก๐››๐ž‰โฆŸโ‰Œโฉฏโ‰Šโฆžโฆคโจ‘โˆณโฉฐโ‰†โง‚โงƒโจโˆฒโˆฑโˆโฉญโˆฎโˆ›โˆฌโซฎโˆคโŠฎโŠฌโฉดโชฃโซฝโงจโงฉโ‹ตโ‹นโ‹ธโฉณโงฃโงคโˆนโจโซœโˆœโชโชŽโชŠโชˆโช†โฉผโˆปโงฅโงœโงกโจ•โจโจŽโจ™โจ—โจ›โจ˜โจœโจšโˆพโจžโŸœโชโชโช‰โช‡โช…โฉปโจ”โจ’โจ“โซฆโŸŒโŸ“โฆ›โฆซโฆชโฆฏโฆญโฆฎโฆฌโฆฉโฆจโฆโˆกโจฉโ‰‚โŠงโจŠโŠธโŠŒโŠฏโ‰‡โ‰‰โ‰„โ‰ขโˆฆโ‰โŠญโฆงโฆฆโซณโจคโจฆโชทโชนโชตโชฑโจŒโจ–โ‰ŸโงทโฆฃโฆฅโฆœโงŽโŠพโŠฟโงดโˆ–โช โชŸโฉฌโชžโชโˆฟโฉ˜โฉ—โงถโฆ โˆขโซ‰โˆ‚๐œ•๐๐Ÿƒโซ‹โซ‡โชธโชบโชถโชฒโจ‹โซŠโซŒโซˆโˆฏโˆ„โŸ€โฉชโฉซโซปโˆญโ‰‹โŠชโฆขโง‰โŸ”โซขโˆฐโ‰€โจพโจŸโจ โจกโ‹ฟโŸโŸ‹ From 82bdd2a1026a8d4029d4f79a549fd488f8266a67 Mon Sep 17 00:00:00 2001 From: "Marek Z. Jeziorek" Date: Wed, 2 Jan 2019 20:44:43 -0500 Subject: [PATCH 12/14] adding zmth handling --- nototools/noto_lint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nototools/noto_lint.py b/nototools/noto_lint.py index 94d7a8ef..245bce3e 100755 --- a/nototools/noto_lint.py +++ b/nototools/noto_lint.py @@ -400,6 +400,7 @@ def _get_cmap_data_for_phase(phase): "NotoSansEmoji-Regular.ttf": ("Sans", "Zsye", None, "Regular"), "NotoSansKufiArabic-Regular.ttf": ("Kufi", "Arab", None, "Regular"), "NotoSansKufiArabic-Bold.ttf": ("Kufi", "Arab", None, "Bold"), + "NotoSansMath-Regular.ttf": ("Sans", "Zmth", None, "Regular"), "NotoSansSymbols-Regular.ttf": ("Sans", "Zsym", None, "Regular"), "NotoNastaliqUrduDraft.ttf": ("Nastaliq", "Urdu", None, "Regular"), "NotoNastaliq-Regular.ttf": ("Nastaliq", "Urdu", None, "Regular") From 5ed2f7cb54ccf816d75168be13642dcdbf1ddf0a Mon Sep 17 00:00:00 2001 From: "Marek Z. Jeziorek" Date: Wed, 2 Jan 2019 20:52:50 -0500 Subject: [PATCH 13/14] Added handling of math --- nototools/cldr_data.py | 2 +- nototools/extra_locale_data.py | 1 + nototools/lint_cmap_reqs.py | 9 +++++++++ nototools/noto_data.py | 1 + 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/nototools/cldr_data.py b/nototools/cldr_data.py index 2ea57b9f..f98db561 100755 --- a/nototools/cldr_data.py +++ b/nototools/cldr_data.py @@ -317,7 +317,7 @@ def is_script_rtl(script): # updated. Also special case locale-script codes, we have some. if script == 'Adlm': return True - if script in ['Zsym', 'Zsye', 'Hrkt', 'Jpan']: + if script in ['Zsym', 'Zsye', 'Zmth', 'Hrkt', 'Jpan']: return False # we really should throw an exception if _DEBUG: diff --git a/nototools/extra_locale_data.py b/nototools/extra_locale_data.py index d3b7a60c..bff15024 100755 --- a/nototools/extra_locale_data.py +++ b/nototools/extra_locale_data.py @@ -501,6 +501,7 @@ 'und-Tglg': r'[\u1700-\u170c \u170e-\u1711]', 'und-Ugar': r'[\U010380-\U01039d \U01039f]', 'und-Xsux': r'[\U012000-\U01202f]', + 'und-Zmth': r'[\U010AC0-\U010AE6 \U010AEB-\U010AF6]', 'und-Zsye': r'[\u2049\u231a\u231b\u2600\u260e\u2614\u2615\u26fa\u2708\u2709' r'\u270f\u3297\U01f004\U01f170\U01f193\U01f197\U01f30d\U01f318' r'\U01f332\U01f334\U01f335\U01f344\U01f346\U01f352\U01f381' diff --git a/nototools/lint_cmap_reqs.py b/nototools/lint_cmap_reqs.py index b021e8a6..93b3b2b0 100755 --- a/nototools/lint_cmap_reqs.py +++ b/nototools/lint_cmap_reqs.py @@ -51,6 +51,12 @@ def _symbol_set(): return _code_range_to_set(ranges) +def _math_set(): + """Returns set of characters that should be supported in Noto Math.""" + ranges = unicode_data._parse_code_ranges(noto_data.MATH_RANGES_TXT) + return _code_range_to_set(ranges) + + def _cjk_set(): """Returns set of characters that will be provided in CJK fonts.""" ranges = unicode_data._parse_code_ranges(noto_data.CJK_RANGES_TXT) @@ -69,6 +75,9 @@ def _get_script_required( # TODO: Check emoji coverage if not unicode_only: needed_chars = _emoji_pua_set() # legacy PUA for android emoji + elif script == 'Zmth': # Math + if not unicode_only: + needed_chars = _math_set() elif script == 'Zsym': # Symbols if not unicode_only: needed_chars = _symbol_set() diff --git a/nototools/noto_data.py b/nototools/noto_data.py index 6ce66537..c504d2a5 100755 --- a/nototools/noto_data.py +++ b/nototools/noto_data.py @@ -114,6 +114,7 @@ 'Geor', # Georgian 'Hebr', # Hebrew 'Sinh', # Sinhala + 'Zmth', # Math 'Zsye', # Emoji }) From 995b8953bc6c31d7cfd2f88c761f7626151ddec1 Mon Sep 17 00:00:00 2001 From: punchcutter Date: Mon, 18 Feb 2019 19:50:50 -0800 Subject: [PATCH 14/14] Add newer Unicode scripts to Noto lint Update Sans Balinese and Sans Ahom to Serif Balinese and Serif Ahom Update fonttools requirements to fix noto_lint failure when calling subset --- nototools/data/family_name_info_p3.xml | 14 +- nototools/data/familyname_and_styles.txt | 15 +- nototools/data/noto_cmap_phase3.xml | 10 + nototools/noto_lint.py | 483 +++++++++++------------ nototools/noto_names.py | 59 +-- nototools/notoconfig.py | 6 +- requirements.txt | 2 +- 7 files changed, 306 insertions(+), 283 deletions(-) diff --git a/nototools/data/family_name_info_p3.xml b/nototools/data/family_name_info_p3.xml index 52ac3b77..2a546aea 100644 --- a/nototools/data/family_name_info_p3.xml +++ b/nototools/data/family_name_info_p3.xml @@ -13,13 +13,13 @@ - + - + @@ -42,6 +42,7 @@ + @@ -49,6 +50,8 @@ + + @@ -94,9 +97,11 @@ + + @@ -114,6 +119,7 @@ + @@ -143,7 +149,10 @@ + + + @@ -175,6 +184,7 @@ + diff --git a/nototools/data/familyname_and_styles.txt b/nototools/data/familyname_and_styles.txt index a0919000..5c924104 100644 --- a/nototools/data/familyname_and_styles.txt +++ b/nototools/data/familyname_and_styles.txt @@ -115,10 +115,10 @@ NotoSansSignwriting NotoSansTakri NotoSansTirhuta NotoSansWarangCiti -NotoSansAhom +NotoSerifAhom NotoSansAnatolianHieroglyphs NotoSansAvestan -NotoSansBalinese +NotoSerifBalinese NotoSansBamum NotoSansBassaVah NotoSansBatak @@ -131,12 +131,14 @@ NotoSansCaucasianAlbanian NotoSansCoptic NotoSansCypriot NotoSansDeseret +NotoSansDogra NotoSansDuployan NotoSansElbasan NotoSansEgyptianHieroglyphs NotoSansGlagolitic NotoSansGothic NotoSansGrantha +NotoSansGunjalaGondi NotoSansHanunoo NotoSansHatran NotoSansImperialAramaic @@ -156,8 +158,11 @@ NotoSansLisu NotoSansLycian NotoSansLydian NotoSansMahajani +NotoSansMakasar NotoSansMandaic NotoSansManichaean +NotoSansMasaramGondi +NotoSansMedefaidrin NotoSansMeeteiMayek NotoSansMeroiticCursive NotoSansMeroiticHieroglyphs @@ -167,6 +172,7 @@ NotoSansMultani NotoSansNKo NotoSansNabataean NotoSansNewTaiLue +NotoSansNushu NotoSansOgham NotoSansOlChiki NotoSansOldHungarian @@ -174,6 +180,7 @@ NotoSansOldItalic NotoSansOldNorthArabian NotoSansOldPermic NotoSansOldPersian +NotoSansOldSogdian NotoSansOldSouthArabian NotoSansOldTurkic NotoSansOsmanya @@ -188,7 +195,9 @@ NotoSansSamaritan NotoSansSaurashtra NotoSansShavian # NotoSansShorthandFormatControls (spreadsheet had this, it's Duployan) +NotoSansSogdian NotoSansSoraSompeng +NotoSansSoyombo NotoSansSundanese NotoSansSumeroAkkadianCuneiform NotoSansSylotiNagri @@ -202,7 +211,7 @@ NotoSansTifinagh NotoSansUgaritic NotoSansVai NotoSansYi -# NotoSansZanabazarSquare (not in Unicode 9) +NotoSansZanabazarSquare #extras -- RB/R/RI -- diff --git a/nototools/data/noto_cmap_phase3.xml b/nototools/data/noto_cmap_phase3.xml index 76479b47..b7d9b3f9 100644 --- a/nototools/data/noto_cmap_phase3.xml +++ b/nototools/data/noto_cmap_phase3.xml @@ -33,6 +33,7 @@ Copt,Coptic,188,0000 000d 0020 002d 00a0 0300-0302 0304-0305 0307-0308 0323 033f 0361 0374-0375 03e2-03ef 1dcd 2010 25cc 2c80-2cf3 2cf9-2cff fe24-fe26 102e0-102fb,6,002e 003a-003b 00b7 2019 2e17 Cprt,Cypriot,59,0000 000d 0020 00a0 10800-10805 10808 1080a-10835 10837-10838 1083c 1083f,0, Deva,Devanagari,270,0000 000d 0020-0023 0025 0027-003f 005b-005f 007b-007e 00a0 00ad 00d7 00f7 02bc 0900-097f 1cd0-1cf6 1cf8-1cf9 200b-200d 2010 2013-2014 2018-2019 201c-201d 2026 20b9 20f0 2212 25cc a830-a839 a8e0-a8fd,-1, + Dogr,Dogra,66,0000 000d 0020 0964-0965 25cc 11800-1183b,-1, Dsrt,Deseret,84,0000 000d 0020 00a0 10400-1044f,0, Dupl,Duployan,154,0000 000d 0020 00a0 200c-200d 25cc 1bc00-1bc6a 1bc70-1bc7c 1bc80-1bc88 1bc90-1bc99 1bc9c-1bca3,-1, EXCL,EXCL,137996,0000-001f 007f-009f 332c e000-f8ff fa70-fad9 fe00-fe0f feff 1b000-1b001 e0001 e0020-e007f e0100-e01ef f0000-ffffd 100000-10fffd,-1, @@ -41,6 +42,8 @@ Ethi,Ethiopic,505,0000 000d 0020 002d 00a0 0308 030e 1200-1248 124a-124d 1250-1256 1258 125a-125d 1260-1288 128a-128d 1290-12b0 12b2-12b5 12b8-12be 12c0 12c2-12c5 12c8-12d6 12d8-1310 1312-1315 1318-135a 135d-137c 1380-1399 2010 22ee 25cc 2d80-2d96 2da0-2da6 2da8-2dae 2db0-2db6 2db8-2dbe 2dc0-2dc6 2dc8-2dce 2dd0-2dd6 2dd8-2dde ab01-ab06 ab09-ab0e ab11-ab16 ab20-ab26 ab28-ab2e,20,0021-0022 0027-0029 002b-002c 002e-002f 003d 003f 00ab 00bb 2018-2019 201c-201d 2026 2039-203a Geor,Georgian,136,0000 000d 0020 002d 00a0 0589 10a0-10c5 10c7 10cd 10d0-10ff 2010 20be 2d00-2d25 2d27 2d2d,31,0021-0022 0025 0028-0029 002c 002e 003a-003b 003f 00ab 00b7 00bb 2014 201c 201e 2026 2056-205e 2e2a-2e2d 2e31 Glag,Glagolitic,142,0000 000d 0020 00a0 0303 0305 0484 0487 25cc 2c00-2c2e 2c30-2c5e a66f 1e000-1e006 1e008-1e018 1e01b-1e021 1e023-1e024 1e026-1e02a,11,0022 002c 002e 003b 00b7 201c-201d 2056 2058-2059 2e43 + Gong,Gunjala Gondi,70,0000 000d 0020 00a0 0964-0965 25cc 11d60-11d65 11d67-11d68 11d6a-11d8e 11d90-11d91 11d93-11d98 11da0-11da9,-1, + Gonm,Masaram Gondi,82,0000 000d 0020 00a0 0964-0965 25cc 11d00-11d06 11d08-11d09 11d0b-11d36 11d3a 11d3c-11d3d 11d3f-11d47 11d50-11d59,-1, Goth,Gothic,36,0000 000d 0020 00a0 0304-0305 0308 0331 25cc 10330-1034a,2,003a 00b7 Gran,Grantha,120,0000 000d 0020 00a0 0951-0952 0964-0965 0baa 0bb5 0be6-0bf2 1cd0 1cd2-1cd3 1cf2-1cf4 1cf8-1cf9 200c-200d 20f0 25cc 11300-11303 11305-1130c 1130f-11310 11313-11328 1132a-11330 11332-11333 11335-11339 1133c-11344 11347-11348 1134b-1134d 11350 11357 1135d-11363 11366-1136c 11370-11374,-1, Gujr,Gujarati,158,0000 000d 0020-0023 0025 0027-003f 005b-005f 007b-007e 00a0 00ad 00d7 00f7 0951-0952 0964-0965 0a81-0a83 0a85-0a8d 0a8f-0a91 0a93-0aa8 0aaa-0ab0 0ab2-0ab3 0ab5-0ab9 0abc-0ac5 0ac7-0ac9 0acb-0acd 0ad0 0ae0-0ae3 0ae6-0af1 0af9 200b-200d 2010 2013-2014 2018-2019 201c-201d 2026 20b9 2212 25cc a830-a839,-1, @@ -72,9 +75,11 @@ MONO,MONO,3324,0000 000d 0020-007e 00a0-0377 037a-037f 0384-038a 038c 038e-03a1 03a3-03e1 03f0-052f 1ab0-1abe 1c80-1c88 1d00-1df5 1dfb-1f15 1f18-1f1d 1f20-1f45 1f48-1f4d 1f50-1f57 1f59 1f5b 1f5d 1f5f-1f7d 1f80-1fb4 1fb6-1fc4 1fc6-1fd3 1fd6-1fdb 1fdd-1fef 1ff2-1ff4 1ff6-1ffe 2000-2064 2066-2071 2074-208e 2090-209c 20a0-20be 20f0 2100-215f 2184 2189 2190-2195 219c-219e 21a0 21a2-21a4 21a6 21d0-21d4 21da-21db 21e6 21e8 2200-220e 2210 2212 2218-221a 221e 2220 2223 2227-222a 2234-2238 223c-223d 2241 2243 2245 2247-224b 2254-2255 2257 225f-2262 2264-2265 226c 226e-2275 227a-227b 2282-2289 228e 2291-229c 22a2-22a5 22b4-22b5 22b8 22c2-22c4 22c6 22c8-22ca 22cd-22ce 22d0-22d1 22e2-22e3 2308-230b 2310 2319 2320-2321 2336-237a 2395 239b-23ae 23b0-23bd 23dc-23e1 2474-2475 2500-25ff 266d-266f 2736 2758-275a 27d5-27d7 27e6-27eb 27f5-27f6 2987-2988 29b8 2a00 2a05-2a06 2c60-2c7f 2de0-2e44 a640-a69f a700-a7ae a7b0-a7b7 a7f7-a7ff a92e ab30-ab65 fb00-fb06 fe00 fe20-fe2f feff ff5b ff5d fffc-fffd 1f67c-1f67f,-1, MUSE,Music,559,0000 000d 0020 00a0 25cc 2669-266f 1d000-1d0f5 1d100-1d126 1d129-1d1e8 1d200-1d245,-1, Mahj,Mahajani,72,0000 000d 0020 002d 003a 00a0 00b7 0964-096f 200c-200d 2013 25cc a830-a839 11150-11176,-1, + Maka,Makasar,29,0000 000d 0020 25cc 11ee0-11ef8,-1, Mand,Mandaic,37,0000 000d 0020 00a0 0640 0840-085b 085e 200c-200d 25cc,0, Mani,Manichaean,59,0000 000d 0020 00a0 0640 200c-200d 25cc 10ac0-10ae6 10aeb-10af6,-1, Marc,Marchen,73,0000 000d 0020 00a0 25cc 11c70-11c8f 11c92-11ca7 11ca9-11cb6,-1, + Medf,Medefaidrin,96,0000 000d 0020 00a0 25cc 16e40-16e9a,-1, Mend,Mende Kikakui,218,0000 000d 0020 00a0 25cc 1e800-1e8c4 1e8c7-1e8d6,-1, Merc,Meroitic Cursive,97,0000 000d 0020 003a 00a0 2026 205d 109a0-109b7 109bc-109cf 109d2-109ff,-1, Mero,Meroitic Hieroglyphs,36,0000 000d 0020 00a0 10980-1099f,-1, @@ -89,6 +94,7 @@ Nbat,Nabataean,44,0000 000d 0020 00a0 10880-1089e 108a7-108af,-1, Newa,Newa,97,0000 000d 0020 00a0 25cc 11400-11459 1145b 1145d,-1, Nkoo,N'Ko,76,0000 000d 0020 00a0 060c 061b 061f 066a 07c0-07fa 200c-200f 25cc 2e1c-2e1d fd3e-fd3f,0, + Nshu,Nushu,401,0000 000d 0020 00a0 1b170-1b2fb,-1, Ogam,Ogham,33,0000 000d 0020 00a0 1680-169c,0, Olck,Ol Chiki,52,0000 000d 0020 00a0 1c50-1c7f,8,0021 002c 003f 2014 2018-2019 201c-201d Orkh,Old Turkic,77,0000 000d 0020 00a0 10c00-10c48,2,205a 2e30 @@ -117,7 +123,10 @@ Sidd,Siddham,99,0000 000d 0020 00a0 200c-200d 25cc 11580-115b5 115b8-115dd,-1, Sind,Khudawadi,93,0000 000d 0020 002e 003a-003b 00a0 0964-0965 200c-200d 2013-2014 25cc a830-a839 112b0-112ea 112f0-112f9,-1, Sinh,Sinhala,169,0000 000d 0020-0023 0025 0027-003f 005b-005f 007b-007e 00a0 00ad 00d7 00f7 0964-0965 0d82-0d83 0d85-0d96 0d9a-0db1 0db3-0dbb 0dbd 0dc0-0dc6 0dca 0dcf-0dd4 0dd6 0dd8-0ddf 0de6-0def 0df2-0df4 200b-200d 2013-2014 2018-2019 201c-201d 2026 2212 25cc 111e1-111f4,-1, + Sogd,Sogdian,47,0000 000d 0020 00a0 25cc 10f30-10f59,-1, + Sogo,Old Sogdian,44,0000 000d 0020 00a0 10f00-10f27,-1, Sora,Sora Sompeng,47,0000 000d 0020-0021 0028-0029 002c-002e 003b 00a0 2010 110d0-110e8 110f0-110f9,-1, + Soyo,Soyombo,81,0000 000d 0020 00a0 11a50-11a83 11a86-11aa2,-1, Sund,Sundanese,82,0000 000d 0020 002d 00a0 1b80-1bbf 1cc0-1cc7 200b-200d 2010 25cc,16,0022 003c 003e-003f 00d7 2012-2015 201c-201d 2022 25fb-25fe Sylo,Syloti Nagri,67,0000 000d 0020 00a0 0964-0965 09e6-09ef 200b-200d 2010-2011 2055 25cc a800-a82b,14,002c 002e 003a-003b 00d7 2012-2015 2022 25fb-25fe Syrc,Syriac,151,0000 000d 0020-0021 0028-002b 002d-002f 003a 003d 005b-005d 00a0 00ab 00b0 00bb 0303-0304 0307-0308 030a 0320 0323-0325 032d-032e 0330-0331 060c 061b 061f 0621 0640 064b-0655 0660-066c 0670-0671 0700-070d 070f-074a 074d-074f 200c-200f 2026 2044 2212 25cc 2670-2671,-1, @@ -141,6 +150,7 @@ Xpeo,Old Persian,54,0000 000d 0020 00a0 103a0-103c3 103c8-103d5,0, Xsux,Cuneiform,1238,0000 000d 0020 00a0 12000-12399 12400-1246e 12470-12474 12480-12543,0, Yiii,Yi,1252,0000 000d 0020 002c 00a0 3001-3002 3008-3011 3014-301b 30fb a000-a48c a490-a4c6 ff0c ff61-ff65,0, + Zanb,Zanabazar Square,72,0000 000d 0020 00a0 25cc 11a00-11a47 ,-1, Zmth,Math,2471,0000 000d 0020-007e 00a0 00a7 00ac 00b1 00d7 00f7 0302-0303 0305 0307-0308 0330 0391-03a1 03a3-03a9 03b1-03c9 03d1 03d5-03d6 03f0-03f1 03f4-03f5 2032-2037 2057 20d0-20dc 20e1 20e5-20ef 2102 210a-210e 2110-2112 2115 2119-211d 2124 2128 212c-212d 212f-2131 2133-2138 213c-2140 2145-2149 2190-21ae 21b0-21e5 21f1-21f2 21f4-22ff 2308-230b 2310 2319 231c-2321 2336-237a 237c 2395 239b-23b6 23d0 23dc-23e1 2474-2475 25af 25b3 25b7 25bd 25c1 25ca 25cc 25fb 266d-266f 27c0-27ff 2900-2aff 2b0e-2b11 2b30-2b4c ff5b ff5d 1d400-1d454 1d456-1d49c 1d49e-1d49f 1d4a2 1d4a5-1d4a6 1d4a9-1d4ac 1d4ae-1d4b9 1d4bb 1d4bd-1d4c3 1d4c5-1d505 1d507-1d50a 1d50d-1d514 1d516-1d51c 1d51e-1d539 1d53b-1d53e 1d540-1d544 1d546 1d54a-1d550 1d552-1d6a5 1d6a8-1d7cb 1d7ce-1d7ff 1ee00-1ee03 1ee05-1ee1f 1ee21-1ee22 1ee24 1ee27 1ee29-1ee32 1ee34-1ee37 1ee39 1ee3b 1ee42 1ee47 1ee49 1ee4b 1ee4d-1ee4f 1ee51-1ee52 1ee54 1ee57 1ee59 1ee5b 1ee5d 1ee5f 1ee61-1ee62 1ee64 1ee67-1ee6a 1ee6c-1ee72 1ee74-1ee77 1ee79-1ee7c 1ee7e 1ee80-1ee89 1ee8b-1ee9b 1eea1-1eea3 1eea5-1eea9 1eeab-1eebb 1eef0-1eef1,-1, Zsye,Emoji,1151,0000 000d 0020 0023 002a 0030-0039 00a0 00a9 00ae 203c 2049 2122 2139 2194-2199 21a9-21aa 231a-231b 2328 23cf 23e9-23f3 23f8-23fa 24c2 25aa-25ab 25b6 25c0 25fb-25fe 2600-2604 260e 2611 2614-2615 2618 261d 2620 2622-2623 2626 262a 262e-262f 2638-263a 2640 2642 2648-2653 2660 2663 2665-2666 2668 267b 267f 2692-2697 2699 269b-269c 26a0-26a1 26aa-26ab 26b0-26b1 26bd-26be 26c4-26c5 26c8 26ce-26cf 26d1 26d3-26d4 26e9-26ea 26f0-26f5 26f7-26fa 26fd 2702 2705 2708-270d 270f 2712 2714 2716 271d 2721 2728 2733-2734 2744 2747 274c 274e 2753-2755 2757 2763-2764 2795-2797 27a1 27b0 27bf 2934-2935 2b05-2b07 2b1b-2b1c 2b50 2b55 3030 303d 3297 3299 1f004 1f0cf 1f170-1f171 1f17e-1f17f 1f18e 1f191-1f19a 1f1e6-1f1ff 1f201-1f202 1f21a 1f22f 1f232-1f23a 1f250-1f251 1f300-1f321 1f324-1f393 1f396-1f397 1f399-1f39b 1f39e-1f3f0 1f3f3-1f3f5 1f3f7-1f4fd 1f4ff-1f53d 1f549-1f54e 1f550-1f567 1f56f-1f570 1f573-1f57a 1f587 1f58a-1f58d 1f590 1f595-1f596 1f5a4-1f5a5 1f5a8 1f5b1-1f5b2 1f5bc 1f5c2-1f5c4 1f5d1-1f5d3 1f5dc-1f5de 1f5e1 1f5e3 1f5e8 1f5ef 1f5f3 1f5fa-1f64f 1f680-1f6c5 1f6cb-1f6d2 1f6e0-1f6e5 1f6e9 1f6eb-1f6ec 1f6f0 1f6f3-1f6f6 1f910-1f91e 1f920-1f927 1f930 1f933-1f93a 1f93c-1f93e 1f940-1f945 1f947-1f94b 1f950-1f95e 1f980-1f991 1f9c0 fe4e5-fe4ee fe82c fe82e-fe837,-1, Zsym,Symbols,838,0000 000d 0020 0030-0039 0041-005a 0061-007a 00a0 20dd-20e0 20e2-20e4 2160-2183 2185-2188 218a-218b 2190-2199 2300-230f 2311-2315 2317 231c-231f 2322-2323 2329-232a 232c-2335 237c 2380-2394 2396-239a 23af 23be-23cd 23d0-23db 23e2-23e8 2460-24ff 25cc 260a-260d 2613 2624-262f 2638-263b 263d-2653 2669-267e 2690-269d 26a2-26a9 26ad-26bc 26ce 26e2-26ff 271d-2721 2776-2793 2921-2922 1f100-1f10c 1f110-1f12e 1f130-1f16b 1f170-1f190 1f19b-1f1ac 1f546-1f549 1f54f 1f610 1f700-1f773,-1, diff --git a/nototools/noto_lint.py b/nototools/noto_lint.py index 245bce3e..f668f305 100755 --- a/nototools/noto_lint.py +++ b/nototools/noto_lint.py @@ -21,7 +21,6 @@ "behdad@google.com (Behdad Esfahbod), and " "stuartg@google.com (Stuart Gill)") - import argparse import collections import itertools @@ -59,6 +58,7 @@ '0000-007f 00A0-00ff 20ac 201a 0192 201e 2026 2020 2021 02c6 2030 0160 2039 0152 017d' '2018 2019 201c 201d 2022 2013 2014 02dc 2122 0161 203a 0153 017e 0178') + def all_scripts(): """Extends unicode scripts with pseudo-script 'Urdu'.""" result = set(unicode_data.all_scripts()) @@ -67,7 +67,7 @@ def all_scripts(): def printable_unicode_range(input_char_set): - char_set = set(input_char_set) # copy + char_set = set(input_char_set) # copy parts_list = [] while char_set: last = first = min(char_set) @@ -77,7 +77,7 @@ def printable_unicode_range(input_char_set): if last == first + 1: part = "%04X" % first else: - part = "%04X..%04X" % (first, last-1) + part = "%04X..%04X" % (first, last - 1) parts_list.append(part) return ", ".join(parts_list) @@ -90,7 +90,7 @@ def next_circular_point(current_point, start_of_range, end_of_range): def curve_between( - coordinates, start_at, end_at, start_of_contour, end_of_contour): + coordinates, start_at, end_at, start_of_contour, end_of_contour): """Returns indices of a part of a contour between start and end of a curve. The contour is the cycle between start_of_contour and end_of_contour, @@ -107,13 +107,13 @@ def curve_between( the contour if necessary. """ if end_at > start_at: - return list(coordinates[start_at:end_at+1]) + return list(coordinates[start_at:end_at + 1]) elif start_of_contour == end_of_contour: # single-point contour assert start_at == end_at == start_of_contour return [coordinates[start_at]] else: # the curve goes around the range - return (list(coordinates[start_at:end_of_contour+1]) + - list(coordinates[start_of_contour:end_at+1])) + return (list(coordinates[start_at:end_of_contour + 1]) + + list(coordinates[start_of_contour:end_at + 1])) def curve_has_off_curve_extrema(curve): @@ -148,10 +148,10 @@ def curve_has_off_curve_extrema(curve): # these set to +pi and then with all set to -pi. If the curve is proper in # at least one case, we assume the curve has no missing extrema. - ninety_deg = math.pi/2 + ninety_deg = math.pi / 2 score = 0 for sign in [-1, +1]: - angles = [sign*math.pi if math.fabs(angle) == math.pi else angle + angles = [sign * math.pi if math.fabs(angle) == math.pi else angle for angle in angles] min_quarter = math.floor(min(angles) / ninety_deg) max_quarter = math.ceil(max(angles) / ninety_deg) @@ -163,6 +163,7 @@ def curve_has_off_curve_extrema(curve): return 0 + # Finds out the how far away the off-curve extrema lies from the on-curve # points. This is done by comparing the bounding box of the endpoints with that # of the bezier curve. If there are implicit on-curve points, the curve is @@ -178,7 +179,7 @@ def out_of_box_size(curve): ax, ay = curve[1] bx, by = curve[2] # Implicit point is the mid point of first two off-curve points. - implicit_point = ((ax + bx)/2, (ay + by)/2) + implicit_point = ((ax + bx) / 2, (ay + by) / 2) first_curve = curve[:2] + [implicit_point] remaining_curve = [implicit_point] + curve[2:] else: @@ -198,7 +199,7 @@ def out_of_box_size(curve): # The out-of-box size for the entire curve will be maximum of the deviation # for the first curve and that of the remaining curve. delta = max(ex1 - bx1, ey1 - by1, bx2 - ex2, by2 - ey2, - out_of_box_size(remaining_curve)) + out_of_box_size(remaining_curve)) # ignore very small deviations return 0 if delta < 1 else delta @@ -228,6 +229,7 @@ def cut_piece_in_half(piece): piece[0], piece[1], piece[2], 0.5) + def cut_ends(piece, cut_amount): if len(piece) == 2: return (interpolate_segment(piece, cut_amount), @@ -244,9 +246,10 @@ def probably_intersect(piece1, piece2): return arrayTools.sectRect(bounds1, bounds2)[0] -_EPSILON = 1.0/(2**14) +_EPSILON = 1.0 / (2 ** 14) _MAX_DEPTH = 30 + def curve_pieces_intersect(piece1, piece2, ignore_ends): if ignore_ends: piece1 = cut_ends(piece1, _EPSILON) @@ -274,7 +277,8 @@ def curve_pieces_intersect(piece1, piece2, ignore_ends): return True else: pairs_to_investigate.append( - (first_section, second_section, level+1)) + (first_section, second_section, level + 1)) + def to_float_tuples(curve): coord_list = [] @@ -310,13 +314,13 @@ def curves_intersect(contour_list): all_pieces = sum(all_contours, []) if len(set(all_pieces)) != len(all_pieces): - print 'some pieces are duplicates' # No piece should be repeated + print('some pieces are duplicates') # No piece should be repeated adjacent_pairs = set() for contour_pieces in all_contours: - for i in range(len(contour_pieces)-1): + for i in range(len(contour_pieces) - 1): adjacent_pairs.add( - frozenset({contour_pieces[i], contour_pieces[i+1]})) + frozenset({contour_pieces[i], contour_pieces[i + 1]})) if len(contour_pieces) > 2: adjacent_pairs.add( frozenset({contour_pieces[-1], contour_pieces[0]})) @@ -341,11 +345,11 @@ def printable_font_revision(font, accuracy=2): font_revision = font["head"].fontRevision font_revision_int = int(font_revision) font_revision_frac = int( - round((font_revision - font_revision_int) * 10**accuracy)) + round((font_revision - font_revision_int) * 10 ** accuracy)) font_revision_int = str(font_revision_int) font_revision_frac = str(font_revision_frac).zfill(accuracy) - return font_revision_int+"."+font_revision_frac + return font_revision_int + "." + font_revision_frac def printable_font_versions(font): @@ -356,7 +360,7 @@ def printable_font_versions(font): minor_version = match.group(2) accuracy = len(minor_version) font_revision = printable_font_revision(font, accuracy) - if font_revision == major_version+"."+minor_version: + if font_revision == major_version + "." + minor_version: return version else: font_revision = printable_font_revision(font, 3) @@ -374,6 +378,8 @@ def _build_cmap_dict(filename): _phase_2_map = None _phase_3_map = None + + def _get_cmap_data_for_phase(phase): global _phase_2_map, _phase_3_map if phase < 3: @@ -412,7 +418,6 @@ def _get_cmap_data_for_phase(phase): UI_ASCENT = 2189 UI_DESCENT = -600 - _cur_file_name = None _printed_file_name = False _processed_files = 0 @@ -434,8 +439,7 @@ def font_properties_from_name(file_path, phase): is_google = True vendor = ('Adobe' if noto_font.is_cjk - else 'KhmerType' if noto_font.script in ['Khmr', 'Cham', 'Laoo'] - else 'Monotype') + else 'KhmerType' if noto_font.script in ['Khmr', 'Cham', 'Laoo'] else 'Monotype') char_version = 6.0 if noto_font.family == 'Noto' else 8.0 return FontProps(is_google, vendor, char_version, *noto_font) @@ -467,13 +471,13 @@ def check_font(font_props, filename_error, _processed_files += 1 def _noto_font_from_font_props(font_props): - fields = """ + fields = """ filepath,family,style,script,variant,width,weight,slope,fmt, manufacturer,license_type,is_hinted,is_mono,is_UI,is_UI_metrics, is_display,is_cjk,subset """.split(',') - vals = [getattr(font_props, p.strip()) for p in fields] - return noto_fonts.NotoFont(*vals) + vals = [getattr(font_props, p.strip()) for p in fields] + return noto_fonts.NotoFont(*vals) noto_font = _noto_font_from_font_props(font_props) @@ -485,12 +489,12 @@ def print_file_name(): global _printed_file_name if not _printed_file_name: _printed_file_name = True - print "---\nAutomatic testing for '%s', %s:" % ( + print("---\nAutomatic testing for '%s', %s:" % ( _cur_file_name, - printable_font_versions(font)) + printable_font_versions(font))) if check_test and not tests.check(test_name): - return + return interesting_part_of_file_name = ",".join(font_props.filepath.split("/")[-2:]) if interesting_part_of_file_name != _cur_file_name: @@ -515,17 +519,17 @@ def pluralize_errmsg(count, is_error=True): print_file_name() se = suppressed_err_count[0] if not se: - print "Found %s." % pluralize_errmsg(ec) + print("Found %s." % pluralize_errmsg(ec)) else: - print "Found %s (%s hidden)." % (pluralize_errmsg(ec), - "all" if se == ec else se) + print("Found %s (%s hidden)." % (pluralize_errmsg(ec), + "all" if se == ec else se)) if wc and not nowarn: sw = suppressed_warn_count[0] if not sw and wc: - print "Found %s." % pluralize_errmsg(wc, False) + print("Found %s." % pluralize_errmsg(wc, False)) elif wc: - print "Found %s (%s hidden)." % (pluralize_errmsg(wc, False), - "all" if sw == wc else sw) + print("Found %s (%s hidden)." % (pluralize_errmsg(wc, False), + "all" if sw == wc else sw)) if ec: _processed_files_with_errors += 1 @@ -564,7 +568,7 @@ def pluralize_errmsg(count, is_error=True): if font_props.slope: names.append(font_props.slope) subfamily = ''.join(names) - print ('%s,%s,%s,%s,%s,%s,%s,%s,%s,"%s"' % ( + print(('%s,%s,%s,%s,%s,%s,%s,%s,%s,"%s"' % ( err_type, noto_fonts.script_name_for_report(font_props.script), font_props.style if font_props.style else '', @@ -574,12 +578,11 @@ def pluralize_errmsg(count, is_error=True): category_name, interesting_part_of_file_name, printable_font_revision(font), - message)).encode('UTF-8') + message)).encode('UTF-8')) else: - print "%s <%s> %s" % (err_type[0], test_name, message.encode('UTF-8')) + print("%s <%s> %s" % (err_type[0], test_name, message.encode('UTF-8'))) sys.stdout.flush() - _script_key_to_font_name = { 'Aran': 'Urdu', 'HST': 'Historic', @@ -587,23 +590,22 @@ def pluralize_errmsg(count, is_error=True): 'Zsye': None, } - def _check_unused_names(): - # For now, just a warning, and we don't actually check if other tables use it. - # Add those checks as we need them. See the GPOS/GSUB checks of name references - # for an example of how we'd check. - if not tests.check('name/unused'): - return - names = font_data.get_name_records(font) - for i in names: - # names 255 and below are reserved for standard names - # names 256-32767 are for use by font tables - # names 23 and 24 are for use by CPAL, so it might be considered a mistake if - # these are present and no CPAL table is present or it doesn't use them. Not - # checking this for now. - if i >= 256: - warn('name/unused', 'Name', 'Name table has record #%d: "%s"' % - (i, names[i]), is_error=False) + # For now, just a warning, and we don't actually check if other tables use it. + # Add those checks as we need them. See the GPOS/GSUB checks of name references + # for an example of how we'd check. + if not tests.check('name/unused'): + return + names = font_data.get_name_records(font) + for i in names: + # names 255 and below are reserved for standard names + # names 256-32767 are for use by font tables + # names 23 and 24 are for use by CPAL, so it might be considered a mistake if + # these are present and no CPAL table is present or it doesn't use them. Not + # checking this for now. + if i >= 256: + warn('name/unused', 'Name', 'Name table has record #%d: "%s"' % + (i, names[i]), is_error=False) def _check_name(actual, expected, keyname, is_re): """Set expected to '-' to require any name, set it to None if a name @@ -635,10 +637,9 @@ def _check_name(actual, expected, keyname, is_re): warn(test_key, keyname, "Expected no %s, but got '%s'" % (keyname, actual)) - def check_name_table(): if not tests.check('name'): - return + return _check_unused_names() @@ -654,9 +655,9 @@ def check_name_table(): names = font_data.get_name_records(font) def _check_idx(idx, expected, keyname): - actual = names.get(idx, None) - is_re = expected and expected[0] == '^' and expected[-1] == '$' - _check_name(actual, expected, keyname, is_re=is_re) + actual = names.get(idx, None) + is_re = expected and expected[0] == '^' and expected[-1] == '$' + _check_name(actual, expected, keyname, is_re=is_re) _check_idx(0, name_data.copyright_re, 'copyright') _check_idx(1, name_data.original_family, "family") @@ -688,12 +689,13 @@ def _check_idx(idx, expected, keyname): match = re.match(name_data.version_re, names[5]) if not match: - return # already caught above + return # already caught above major_version = match.group(1) minor_version = match.group(2) + version_string = '%s.%s' % (major_version, minor_version) if ((0 <= int(major_version) <= 65535) - and (0 <= int(minor_version) <= 65535)): + and (0 <= int(minor_version) <= 65535)): accuracy = len(minor_version) font_revision = printable_font_revision(font, accuracy) if font_revision != major_version + "." + minor_version: @@ -706,20 +708,17 @@ def _check_idx(idx, expected, keyname): "Version string has numerical parts outside the range " "[0, 65535]: '%s'." % version_string) - def _get_required_chars(noto_font, noto_phase, test_key): - script_to_chars = _get_cmap_data_for_phase(noto_phase) - # do we need to map font names/families to scripts differently based - # on the phase? - try: - return script_to_chars[noto_font.script] - except KeyError: - warn(test_key, "Chars", - "no char data for script %s in %s" % ( - noto_font.script, noto_font.filepath)) + script_to_chars = _get_cmap_data_for_phase(noto_phase) + # do we need to map font names/families to scripts differently based + # on the phase? + try: + return script_to_chars[noto_font.script] + except KeyError: + warn(test_key, "Chars", "no char data for script %s in %s" % ( + noto_font.script, noto_font.filepath)) return None - def _check_needed_chars(cmap, char_filter): # TODO(roozbeh): check the glyph requirements for controls specified at # https://www.microsoft.com/typography/otspec/recom.htm @@ -727,7 +726,7 @@ def _check_needed_chars(cmap, char_filter): needed_chars = _get_required_chars( noto_font, noto_phase, 'cmap/script_required') if needed_chars == None: - return + return # TODO: also check character coverage against Unicode blocks for # characters of script Common or Inherited @@ -736,7 +735,7 @@ def _check_needed_chars(cmap, char_filter): # old_needed_size = len(needed_chars) needed_chars = set(itertools.ifilter(char_filter[1].accept, needed_chars)) # TODO(dougfelt): figure out how to make this info available without messing up output - # print 'filter needed char size: %d -> %d' % (old_needed_size, len(needed_chars)) + # print('filter needed char size: %d -> %d' % (old_needed_size, len(needed_chars)) missing_chars = needed_chars - set(cmap.keys()) if missing_chars: @@ -745,7 +744,6 @@ def _check_needed_chars(cmap, char_filter): % (len(missing_chars), printable_unicode_range(missing_chars)), check_test=False) - def _check_unexpected_chars(cmap, char_filter): expected_chars = _get_required_chars( noto_font, noto_phase, 'cmap/unexpected') @@ -760,7 +758,6 @@ def _check_unexpected_chars(cmap, char_filter): % (len(unexpected_chars), printable_unicode_range(unexpected_chars)), is_error=False, check_test=False) - def check_cmap_table(): cmap_table = font['cmap'] cmaps = {} @@ -775,8 +772,8 @@ def check_cmap_table(): (6, 1, 1), # Japanese (6, 1, 2), # Traditional Chinese (6, 1, 3), # Korean - (6, 1, 25), # Simplified Chinese - ]) + (6, 1, 25), # Simplified Chinese + ]) for table in cmap_table.tables: if (table.format, table.platformID, @@ -794,7 +791,7 @@ def check_cmap_table(): "'cmap' has two format %d subtables that are not" " aliases" % table.format) else: - cmaps[table.format] = table.cmap + cmaps[table.format] = table.cmap if 4 not in cmaps: warn("cmap/tables/missing", "cmap", @@ -804,9 +801,9 @@ def check_cmap_table(): cmap = cmaps[12] # if there is a format 12 table, it should have non-BMP characters if max(cmap.keys()) <= 0xFFFF: - warn("cmap/tables/format_12_has_bmp", "cmap", - "'cmap' has a format 12 subtable but no " - "non-BMP characters.") + warn("cmap/tables/format_12_has_bmp", "cmap", + "'cmap' has a format 12 subtable but no " + "non-BMP characters.") # format 4 table should be a subset of the format 12 one if tests.check('cmap/tables/format_4_subset_of_12') and 4 in cmaps: @@ -823,18 +820,17 @@ def check_cmap_table(): else: cmap = cmaps[4] - if tests.check('cmap/required'): required_in_all_fonts = [ - 0x0000, # .null - 0x000D, # CR - 0x0020] # space + 0x0000, # .null + 0x000D, # CR + 0x0020] # space for code in required_in_all_fonts: if code not in cmap: warn("cmap/required", "cmap", "U+%04X is not mapped in cmap, but it should be (see " "https://www.microsoft.com/typography/otspec/recom.htm)." - % code, + % code, check_test=False) if not font_props.is_cjk and tests.check('cmap/script_required'): @@ -846,6 +842,7 @@ def check_cmap_table(): pua_filter = tests.get_filter('cmap/private_use') if pua_filter: pua_filter = pua_filter[1].accept + def is_unwanted_pua(char): if char in needed_chars: return False @@ -866,7 +863,7 @@ def is_unwanted_pua(char): if tests.check('cmap/non_characters'): non_characters = frozenset( - range(0xFDD0, 0xFDEF+1) + range(0xFDD0, 0xFDEF + 1) + [0xFFFE + plane_no * 0x10000 for plane_no in range(0, 17)] + [0xFFFF + plane_no * 0x10000 for plane_no in range(0, 17)]) non_characters_in_cmap = non_characters & set(cmap.keys()) @@ -874,19 +871,19 @@ def is_unwanted_pua(char): warn("cmap/non_characters", "Chars", "There should be no non-characters defined in the font, but " "there are: %s." - % printable_unicode_range(non_characters_in_cmap), + % printable_unicode_range(non_characters_in_cmap), check_test=False) if tests.check('cmap/disallowed_ascii') and not ( - font_props.script == "Zsye" or - font_props.script == "Latn" or - font_props.script == "LGC" or - font_props.is_cjk): + font_props.script == "Zsye" or + font_props.script == "Latn" or + font_props.script == "LGC" or + font_props.is_cjk): ascii_letters = noto_data.ascii_letters() contained_letters = ascii_letters & set(cmap.keys()) if contained_letters: warn("cmap/disallowed_ascii", "Chars", - "There should not be ASCII letters in the font, but there are: %s." + "There should not be ASCII letters in the font, but there are: %s." % printable_unicode_range(contained_letters), check_test=False) @@ -911,7 +908,7 @@ def check_variants(): num = len(cps_with_variants) info = lint_config.write_int_ranges(cps_with_variants, sep=', ') if len(info) > 50: - info = "not shown" + info = "not shown" warn("cmap/variants", "Variants", "Font contains %d characters with standard variants, but has " "no variation selector cmap table (%s)." % (num, info)) @@ -923,7 +920,7 @@ def check_variants(): warn("cmap/variants", "Variants", "Char %04x has standard variant selector %04x, but " "this selector is not in the variant table." % - (cp, sel), check_test=False ) + (cp, sel), check_test=False) continue sel_info = None for t in vs_cmap.uvsDict[sel]: @@ -955,7 +952,6 @@ def check_variants(): (cp, sel, sel_glyphid, varcp, expected_glyphid), check_test=False) - def check_head_tables(cmap): if not tests.check('head'): return @@ -984,34 +980,34 @@ def check_ul_unicode_range(): (bucket_index, range_name, set_unset, chars_in_bucket, size_of_bucket), check_test=False) - # print printable_unicode_range(set(cmap.keys())) - # print "expected %s" % font_data.unicoderange_bitmap_to_string(expected_bitmap) - # print "have %s" % font_data.unicoderange_bitmap_to_string(bitmap) + # print(printable_unicode_range(set(cmap.keys())) + # print("expected %s" % font_data.unicoderange_bitmap_to_string(expected_bitmap) + # print("have %s" % font_data.unicoderange_bitmap_to_string(bitmap) hhea_table = font["hhea"] upem = font['head'].unitsPerEm if upem == 2048: - ui_ascent = UI_ASCENT - ui_descent = UI_DESCENT + ui_ascent = UI_ASCENT + ui_descent = UI_DESCENT else: - ui_ascent = int(math.ceil(UI_ASCENT * upem / 2048.0)) - ui_descent = int(math.floor(UI_DESCENT * upem / 2048.0)) + ui_ascent = int(math.ceil(UI_ASCENT * upem / 2048.0)) + ui_descent = int(math.floor(UI_DESCENT * upem / 2048.0)) if tests.check('head/hhea'): if font_props.is_UI_metrics: if hhea_table.ascent > ui_ascent: warn("head/hhea/ascent", "Bounds", "Value of ascent in 'hhea' table is %d, but should be %d." - % (hhea_table.ascent, ui_ascent)) + % (hhea_table.ascent, ui_ascent)) if hhea_table.descent < ui_descent: warn("head/hhea/descent", "Bounds", "Value of descent in 'hhea' table is %d, but should be %d." - % (hhea_table.descent, ui_descent)) + % (hhea_table.descent, ui_descent)) if hhea_table.lineGap != 0: warn("head/hhea/linegap", "Line Gap", "Value of lineGap in 'hhea' table is %d, but should be 0." - % hhea_table.lineGap) + % hhea_table.lineGap) vhea_table = font.get("vhea") if tests.check('head/vhea') and vhea_table: @@ -1069,7 +1065,7 @@ def check_ul_unicode_range(): # hack for windows GDI # remove this for phase 3 if noto_phase <= 2: - expected_weight = max(expected_weight, 250) + expected_weight = max(expected_weight, 250) if os2_table.usWeightClass != expected_weight: warn("head/os2/weight_class", "OS/2", @@ -1096,9 +1092,9 @@ def check_ul_unicode_range(): check_ul_unicode_range() if os2_table.panose.bFamilyType != 2: - warn("head/os2/panose/family", "OS/2", - "Panose family value is %s but expected 2" % - os2_table.panose.bSerifStyle) + warn("head/os2/panose/family", "OS/2", + "Panose family value is %s but expected 2" % + os2_table.panose.bSerifStyle) expect_serif = noto_font.style == 'Serif' or noto_font.family in [ 'Cousine', 'Tinos'] @@ -1106,13 +1102,13 @@ def check_ul_unicode_range(): serif_val = os2_table.panose.bSerifStyle is_serif = 1 < serif_val < 11 if serif_val == 1: - warn("head/os2/panose/serif", "OS/2", - "Panose serif value is 1 (no_fit) but expected 0 or %s" % - expected_serif_range_str) + warn("head/os2/panose/serif", "OS/2", + "Panose serif value is 1 (no_fit) but expected 0 or %s" % + expected_serif_range_str) elif serif_val != 0 and expect_serif != is_serif: - warn("head/os2/panose/serif", "OS/2", - "Panose serif value is %s but expected %s" % - (serif_val, expected_serif_range_str)) + warn("head/os2/panose/serif", "OS/2", + "Panose serif value is %s but expected %s" % + (serif_val, expected_serif_range_str)) # TODO(dougfelt): check condensed, semicondensed proportions? expect_mono = noto_font.is_mono or noto_font.family == 'Cousine' @@ -1121,17 +1117,16 @@ def check_ul_unicode_range(): is_mono = proportion_val == 9 if proportion_val <= 1: - warn("head/os2/panose/proportion", "OS/2", - "Panose proportion value is %s (%s) but " - "expected %s" % ( - proportion_val, - 'no_fit' if proportion_val == 1 else 'any', - expect_mono_range_str)) + warn("head/os2/panose/proportion", "OS/2", + "Panose proportion value is %s (%s) but " + "expected %s" % ( + proportion_val, + 'no_fit' if proportion_val == 1 else 'any', + expect_mono_range_str)) elif expect_mono != is_mono: - warn("head/os2/panose/proportion", "OS/2", - "Panose proportion value is %s but expected %s" % - (proportion_val, expect_mono_range_str)) - + warn("head/os2/panose/proportion", "OS/2", + "Panose proportion value is %s but expected %s" % + (proportion_val, expect_mono_range_str)) def check_vertical_limits(): if 'glyf' not in font: @@ -1142,11 +1137,11 @@ def check_vertical_limits(): upem = font['head'].unitsPerEm if upem == 2048: - max_ui_height = MAX_UI_HEIGHT - min_ui_height = MIN_UI_HEIGHT + max_ui_height = MAX_UI_HEIGHT + min_ui_height = MIN_UI_HEIGHT else: - max_ui_height = int(math.ceil(MAX_UI_HEIGHT * upem / 2048.0)) - min_ui_height = int(math.floor(MIN_UI_HEIGHT * upem / 2048.0)) + max_ui_height = int(math.ceil(MAX_UI_HEIGHT * upem / 2048.0)) + min_ui_height = int(math.floor(MIN_UI_HEIGHT * upem / 2048.0)) glyf_table = font['glyf'] us_win_ascent = font['OS/2'].usWinAscent @@ -1161,8 +1156,8 @@ def check_vertical_limits(): tmp_gids = set() cmap = font_data.get_cmap(font) for cp in lint_config.parse_int_ranges(WIN_ANSI_CODEPOINTS, True): - if cp in cmap: - tmp_gids.add(font.getGlyphID(cmap[cp], requireReal=True)) + if cp in cmap: + tmp_gids.add(font.getGlyphID(cmap[cp], requireReal=True)) win_ansi_gids = frozenset(tmp_gids) font_ymin = None @@ -1183,26 +1178,26 @@ def check_vertical_limits(): is_win_ansi = glyph_index in win_ansi_gids if is_win_ansi: - ascent_limit = us_win_ascent - ascent_name = 'usWinAscent' - descent_limit = -us_win_descent - descent_name = 'usWinDescent' + ascent_limit = us_win_ascent + ascent_name = 'usWinAscent' + descent_limit = -us_win_descent + descent_name = 'usWinDescent' else: - ascent_limit = typo_ascent - ascent_name = 'sTypoAscent' - descent_limit = typo_descent - descent_name = 'sTypoDescent' + ascent_limit = typo_ascent + ascent_name = 'sTypoAscent' + descent_limit = typo_descent + descent_name = 'sTypoDescent' if font_props.is_UI_metrics: if (tests.checkvalue('bounds/glyph/ui_ymax', glyph_index) and - ymax is not None and ymax > max_ui_height): + ymax is not None and ymax > max_ui_height): warn("bounds/glyph/ui_ymax", "UI Bounds", "Real yMax for glyph %d (%s) is %d, which is more than " "max ui height %d." % ( glyph_index, glyph_name, ymax, max_ui_height), check_test=False) if (tests.checkvalue('bounds/glyph/ui_ymin', glyph_index) and - ymin is not None and ymin < min_ui_height): + ymin is not None and ymin < min_ui_height): warn("bounds/glyph/ui_ymin", "UI Bounds", "Real yMin for glyph %d (%s) is %d, which is less than " "min ui height %d." % ( @@ -1210,7 +1205,7 @@ def check_vertical_limits(): check_test=False) if (tests.checkvalue('bounds/glyph/ymax', glyph_index) and ymax is not None and - ymax > ascent_limit): + ymax > ascent_limit): warn("bounds/glyph/ymax", "Bounds", "Real yMax for glyph %d (%s) is %d, which is higher than " "the font's %s (%d), resulting in clipping." % @@ -1218,7 +1213,7 @@ def check_vertical_limits(): check_test=False) if (tests.checkvalue('bounds/glyph/ymin', glyph_index) and ymin is not None and - ymin < descent_limit): + ymin < descent_limit): warn("bounds/glyph/ymin", "Bounds", "Real yMin for glyph %d (%s) is %d, which is lower than " "the font's %s (%d), resulting in clipping." % @@ -1295,7 +1290,7 @@ def check_for_intersections_and_off_curve_extrema(): curves_in_contour.append(curve) if not check_extrema: - continue + continue out_of_box = curve_has_off_curve_extrema(curve) if out_of_box > 0: warn("paths/extrema", "Extrema", @@ -1308,7 +1303,7 @@ def check_for_intersections_and_off_curve_extrema(): next_point, glyph.coordinates[next_point], out_of_box), - extrema_details, + extrema_details, check_test=False) start_point = end_point + 1 all_contours.append(curves_in_contour) @@ -1375,9 +1370,9 @@ def check_gdef_table(cmap): if glyph in class_def: klass = class_def[glyph] if (tests.checkvalue('gdef/classdef/not_combining_mismatch', code) and - klass == 3 - and unicode_data.category(code) != "Mn" - and code not in noto_data.ACCEPTABLE_AS_COMBINING): + klass == 3 + and unicode_data.category(code) != "Mn" + and code not in noto_data.ACCEPTABLE_AS_COMBINING): warn("gdef/classdef/not_combining_mismatch", "Glyph Class", "Glyph %s (U+%04X %s) is defined as class 3 " "(non-spacing) in the GDEF/GlyphClassDef table, " @@ -1449,20 +1444,19 @@ def check_complex_stylistic_set_name_ids(gsub_or_gpos): GSUB_OR_GPOS = gsub_or_gpos.upper() table = font[GSUB_OR_GPOS].table if not table.FeatureList: - return + return name_id_set = None for index in range(table.FeatureList.FeatureCount): - record = table.FeatureList.FeatureRecord[index] - params = record.Feature.FeatureParams - if isinstance(params, otTables.FeatureParamsStylisticSet): - if not name_id_set: - name_id_set = {r.nameID for r in font['name'].names} - if not params.UINameID in name_id_set: - warn("complex/%s/ui_name_id" % gsub_or_gpos, GSUB_OR_GPOS, - "Feature index %s (%s) has UINameID %d but it is not in the name table" % ( - index, record.FeatureTag, params.UINameID)) - + record = table.FeatureList.FeatureRecord[index] + params = record.Feature.FeatureParams + if isinstance(params, otTables.FeatureParamsStylisticSet): + if not name_id_set: + name_id_set = {r.nameID for r in font['name'].names} + if not params.UINameID in name_id_set: + warn("complex/%s/ui_name_id" % gsub_or_gpos, GSUB_OR_GPOS, + "Feature index %s (%s) has UINameID %d but it is not in the name table" % ( + index, record.FeatureTag, params.UINameID)) def check_shaping(font_file, strs, context, errors): text = '\n'.join(strs) @@ -1478,7 +1472,7 @@ def check_shaping(font_file, strs, context, errors): if context: features.append(context) command.append('--features=%s' % ','.join(features)) - # print "command: %s" % ' '.join(command) + # print("command: %s" % ' '.join(command) result = subprocess.check_output(command) for src, res in zip(strs, result.splitlines()): if res.find('|') != -1: @@ -1486,7 +1480,6 @@ def check_shaping(font_file, strs, context, errors): finally: temp_file.close() - def check_gsub_variants(): """Checks if harfbuzz can use GSUB to generate standard variants""" if not tests.check("complex/gsub/variants"): @@ -1505,16 +1498,16 @@ def check_gsub_variants(): for sel, _, ctx in sorted(data): line = unichr(cp) + unichr(sel); if ctx == 0: - any_strs.append(line) - continue + any_strs.append(line) + continue if ctx & 1: - isolate_strs.append(line) + isolate_strs.append(line) if ctx & 2: - initial_strs.append(line) + initial_strs.append(line) if ctx & 4: - medial_strs.append(line) + medial_strs.append(line) if ctx & 8: - final_strs.append(line) + final_strs.append(line) errors = [] font_file = font_props.filepath @@ -1584,7 +1577,7 @@ def check_gpos_and_gsub_tables(): check_complex_stylistic_set_name_ids('gsub') check_gsub_variants() - #TODO: Add more script-specific checks + # TODO: Add more script-specific checks def check_for_bidi_pairs(cmap): """Checks for proper support of bidi mirroring in the font. @@ -1604,11 +1597,11 @@ def check_for_bidi_pairs(cmap): rtlm = {} if "GSUB" in font: try: - feature_record = font["GSUB"].table.FeatureList.FeatureRecord + feature_record = font["GSUB"].table.FeatureList.FeatureRecord except AttributeError: - warn("bidi", "bidi", - "GSUB table with no feature record", is_error=False) - feature_record = [] + warn("bidi", "bidi", + "GSUB table with no feature record", is_error=False) + feature_record = [] for record in feature_record: if record.FeatureTag == "rtlm": # FIXME for lookup_number in record.Feature.LookupListIndex: @@ -1738,7 +1731,7 @@ def expect_width(codepoint, expected, low_divisor=None, high_divisor=None, else: # note name switch, since the higher divisor returns the lower value high_exp = int(round(float(expected) / low_divisor)) - low_exp = int(round(float(expected) / high_divisor)) + low_exp = int(round(float(expected) / high_divisor)) if not (low_exp - slop <= adv <= high_exp + slop): glyph_name = cmap[codepoint] glyph_id = font.getGlyphID(glyph_name) @@ -1776,7 +1769,7 @@ def expect_width(codepoint, expected, low_divisor=None, high_divisor=None, if tests.check('advances/whitespace'): if font_props.is_mono: space_width = get_horizontal_advance(space_char) - cps = [ tab_char, nbsp_char ] + range(0x2000, 0x200B) + cps = [tab_char, nbsp_char] + range(0x2000, 0x200B) for cp in cps: if cp in cmap: expect_width(cp, space_width) @@ -1790,34 +1783,34 @@ def expect_width(codepoint, expected, low_divisor=None, high_divisor=None, expect_width(nbsp_char, space_width) em_width = font['head'].unitsPerEm - expect_width(0x2000, em_width, 2) # en_quad - expect_width(0x2001, em_width) # em_quad - expect_width(0x2002, em_width, 2) # en_space - expect_width(0x2003, em_width) # em_space - expect_width(0x2004, em_width, 3) # three-per-em space - expect_width(0x2005, em_width, 4) # four-per-em space - expect_width(0x2006, em_width, 6) # six-per-em space + expect_width(0x2000, em_width, 2) # en_quad + expect_width(0x2001, em_width) # em_quad + expect_width(0x2002, em_width, 2) # en_space + expect_width(0x2003, em_width) # em_space + expect_width(0x2004, em_width, 3) # three-per-em space + expect_width(0x2005, em_width, 4) # four-per-em space + expect_width(0x2006, em_width, 6) # six-per-em space if digit_char in cmap: - expect_width(0x2007, digit_width) # figure space + expect_width(0x2007, digit_width) # figure space if period_char in cmap: - expect_width(0x2008, period_width) # punctuation space + expect_width(0x2008, period_width) # punctuation space # see http://unicode.org/charts/PDF/U2000.pdf, but microsoft (below) # says French uses 1/8 em. - expect_width(0x2009, em_width, 5, 6) # thin space + expect_width(0x2009, em_width, 5, 6) # thin space # see http://www.microsoft.com/typography/developers/fdsspec/spaces.htm - expect_width(0x200A, em_width, 10, 16) # hair space - expect_width(0x200B, 0) # zero width space + expect_width(0x200A, em_width, 10, 16) # hair space + expect_width(0x200B, 0) # zero width space if tests.check('advances/spacing_marks'): - spacing_marks = lint_config.parse_int_ranges( - "02C8 02CA-02D7 02DE 02DF 02EC 02ED 02EF-02F2 02F4-02FF", True) - for cp in spacing_marks: - if cp not in cmap: - continue - if not get_horizontal_advance(cp): - warn("advances/spacing_marks", "Advances", - "The spacing mark %s (%04x) should have a non-zero advance." % ( - unichr(cp), cp)); + spacing_marks = lint_config.parse_int_ranges( + "02C8 02CA-02D7 02DE 02DF 02EC 02ED 02EF-02F2 02F4-02FF", True) + for cp in spacing_marks: + if cp not in cmap: + continue + if not get_horizontal_advance(cp): + warn("advances/spacing_marks", "Advances", + "The spacing mark %s (%04x) should have a non-zero advance." % ( + unichr(cp), cp)); def check_stems(cmap): if not 'glyf' in font: @@ -1829,8 +1822,8 @@ def check_stems(cmap): # Only implemented for Ogham, currently # FIXME: Add support for Arabic, Syriac, Mongolian, Phags-Pa, # Devanagari, Bengali, etc - joins_to_right = set(range(0x1680, 0x169B+1)) - joins_to_left = set(range(0x1680, 0x169A+1) + [0x169C]) + joins_to_right = set(range(0x1680, 0x169B + 1)) + joins_to_left = set(range(0x1680, 0x169A + 1) + [0x169C]) all_joining = joins_to_right | joins_to_left glyf_table = font['glyf'] @@ -1876,13 +1869,13 @@ def check_accessiblity(cmap): subsetter = subset.Subsetter() subsetter.populate(unicodes=cmap.keys()) try: - subsetter._closure_glyphs(font) + subsetter._closure_glyphs(font) except Exception as e: - warn("reachable", "Reachability", - "Subsetter failure, bad/missing tables?: '%s'" % e) - return + warn("reachable", "Reachability", + "Subsetter failure, bad/missing tables?: '%s'" % e) + return - unreachable_glyphs = all_glyphs - subsetter.glyphs_all + unreachable_glyphs = all_glyphs - subsetter.glyphs_retained if unreachable_glyphs: reported_glyphs = set() reported_list = [] @@ -1899,10 +1892,8 @@ def check_accessiblity(cmap): (len(reported_glyphs), report_info), check_test=False) - ### actual start of check_font fn - # python 2.7 does not have nonlocal, so hack around it suppressed_err_count = [0] err_count = [0] @@ -1915,7 +1906,7 @@ def check_accessiblity(cmap): is_indic = font_props.script in { "Deva", "Beng", "Guru", "Gujr", "Orya", "Taml", "Telu", "Knda", "Mlym", "Sinh", - "Khmr" } + "Khmr"} fi = lint_config.FontInfo( filename=path.basename(font_path), @@ -1940,7 +1931,6 @@ def check_accessiblity(cmap): "File name '%s' does not match the Noto font naming guidelines." % path.basename(font_props.filepath)) - check_name_table() cmap = check_cmap_table() check_variants() @@ -1965,18 +1955,18 @@ def check_accessiblity(cmap): log = sorted(tests.runlog()) count = len(log) if count: - print 'Ran %d test%s:\n %s' % (count, 's' if count != 1 else '', - '\n '.join(log)) + print('Ran %d test%s:\n %s' % (count, 's' if count != 1 else '', + '\n '.join(log))) else: - print 'Ran no tests.' + print('Ran no tests.') if skiplog: log = sorted(tests.skiplog()) count = len(log) if len(log): - print 'Skipped %d test/group%s:\n %s' % (count, 's' if count != 1 else '', - '\n '.join(log)) + print('Skipped %d test/group%s:\n %s' % (count, 's' if count != 1 else '', + '\n '.join(log))) else: - print 'Skipped no tests' + print('Skipped no tests') # TODO(roozbeh): # * Check that hintedness based on data in the glyf table @@ -1988,25 +1978,25 @@ def check_accessiblity(cmap): def get_lint_spec(spec_file, extra_specs): - """Return a LintSpec from spec_file supplemented with extra_specs. + """Return a LintSpec from spec_file supplemented with extra_specs. If spec_file is None, only use extra_specs.""" - spec = None - if spec_file != 'None': - spec = lint_config.parse_spec_file(spec_file) - return lint_config.parse_spec(extra_specs, spec) + spec = None + if spec_file != 'None': + spec = lint_config.parse_spec_file(spec_file) + return lint_config.parse_spec(extra_specs, spec) def parse_font_props(font_props_file): - """Return a list of FontProps objects.""" - with open(font_props_file) as f: - font_spec = f.read() - spec_data = json.loads(font_spec) - return [FontProps(**m) for m in spec_data] + """Return a list of FontProps objects.""" + with open(font_props_file) as f: + font_spec = f.read() + spec_data = json.loads(font_spec) + return [FontProps(**m) for m in spec_data] def write_font_props(font_props): - print json.dumps(font_props._asdict()) + print(json.dumps(font_props._asdict())) def main(): @@ -2087,7 +2077,7 @@ def main(): font_props, filename_error = get_font_properties_with_fallback( font_file_path, phase=arguments.phase) if filename_error: - print '#Error for %s: %s' % (font_file_path, filename_error) + print('#Error for %s: %s' % (font_file_path, filename_error)) else: write_font_props(font_props) return @@ -2104,7 +2094,7 @@ def main(): font_props, filename_error = get_font_properties_with_fallback( font_file_path, phase=arguments.phase) if not font_props: - print '## ERROR: cannot parse %s' % font_file_path + print('## ERROR: cannot parse %s' % font_file_path) else: check_font(font_props, filename_error, @@ -2121,34 +2111,35 @@ def main(): if arguments.font_props_file: font_props_list = parse_font_props(arguments.font_props_file) for font_props in font_props_list: - check_font(font_props, - '', - lint_spec, - arguments.runlog, - arguments.skiplog, - arguments.csv, - arguments.info, - arguments.extrema_details, - arguments.nowarn, - arguments.quiet, - arguments.phase, - arguments.variable) + check_font(font_props, + '', + lint_spec, + arguments.runlog, + arguments.skiplog, + arguments.csv, + arguments.info, + arguments.extrema_details, + arguments.nowarn, + arguments.quiet, + arguments.phase, + arguments.variable) if not arguments.csv: - print "------" + print("------") if _processed_files == 1: - print "Finished linting 1 file." + print("Finished linting 1 file.") else: - print "Finished linting %d files." % _processed_files + print("Finished linting %d files." % _processed_files) if _processed_files > 1: if _processed_files_with_errors: - print "%d file%s had errors." % ( + print("%d file%s had errors." % ( _processed_files_with_errors, - '' if _processed_files_with_errors == 1 else 's') + '' if _processed_files_with_errors == 1 else 's')) if _processed_files_with_warnings: - print "%d file%s had warnings." % ( + print("%d file%s had warnings." % ( _processed_files_with_warnings, - '' if _processed_files_with_warnings == 1 else 's') + '' if _processed_files_with_warnings == 1 else 's')) + if __name__ == "__main__": main() diff --git a/nototools/noto_names.py b/nototools/noto_names.py index 40067be7..55c31d8e 100755 --- a/nototools/noto_names.py +++ b/nototools/noto_names.py @@ -38,6 +38,7 @@ name table names. So it is not useful for non-noto fonts. """ +from __future__ import print_function import argparse import collections import datetime @@ -328,6 +329,10 @@ def _original_parts(family_parts, subfamily_parts, no_style_linking=False): 'Inscriptional Parthian': 'InsParthi', # Prti 'Pau Cin Hau': 'PauCinHau', # Pauc 'Old Hungarian': 'OldHung', # Hung + 'Masaram Gondi': 'MasaramGon', # Gonm + 'Gunjala Gondi': 'GunjalaGon', # Gonj + 'Zanabazar Square': 'Zanabazar', # Zanb + 'Medefaidrin': 'Medfaidrin', # Medf } def _name_style_for_length(parts, limit): @@ -421,8 +426,7 @@ def repl_fn(m): result = re.sub('CJK(JP|KR|SC|TC)', repl_fn, result) if len(result) > 63: - print >> sys.stderr, 'postscript name longer than 63 characters:\n"%s"' % ( - result) + print('postscript name longer than 63 characters:\n"%s"' % (result), file=sys.stderr) return result @@ -581,10 +585,10 @@ def name_table_data(noto_font, family_to_name_info, phase): """Returns a NameTableData for this font given the family_to_name_info.""" family_id = noto_fonts.noto_font_to_wws_family_id(noto_font) try: - info = family_to_name_info[family_id] + info = family_to_name_info[family_id] except KeyError: - print >> sys.stderr, 'no family name info for "%s"' % family_id - return None + print('no family name info for "%s"' % family_id, file=sys.stderr) + return None family_parts, subfamily_parts = _wws_parts(*_preferred_parts(noto_font)) if not info.use_preferred and subfamily_parts not in [ @@ -592,10 +596,9 @@ def name_table_data(noto_font, family_to_name_info, phase): ['Bold'], ['Italic'], ['Bold', 'Italic']]: - print >> sys.stderr, ( - 'Error in family name info: %s requires preferred names, but info ' - 'says none are required.' % path.basename(noto_font.filepath)) - print >> sys.stderr, subfamily_parts + print('Error in family name info: %s requires preferred names, but info says none are required.' + % path.basename(noto_font.filepath), file=sys.stderr) + print(subfamily_parts, file=sys.stderr) return None # for phase 3 we'll now force include_regular @@ -841,13 +844,13 @@ def _create_family_to_faces(notofonts, name_fn): def _dump_family_to_faces(family_to_faces): for family in sorted(family_to_faces): - print '%s:\n %s' % ( - family, '\n '.join(sorted(family_to_faces[family]))) + print('%s:\n %s' % ( + family, '\n '.join(sorted(family_to_faces[family])))) def _dump_name_data(name_data): if not name_data: - print ' Error: no name data' + print(' Error: no name data') return True err = False @@ -855,11 +858,11 @@ def _dump_name_data(name_data): value = getattr(name_data, attr) if value: if attr == 'original_family' and len(value) > ORIGINAL_FAMILY_LIMIT: - print '## family too long (%2d): %s' % (len(value), value) + print('## family too long (%2d): %s' % (len(value), value)) err = True - print ' %20s: %s' % (attr, value) + print(' %20s: %s' % (attr, value)) else: - print ' %20s: ' % attr + print(' %20s: ' % attr) return err @@ -867,13 +870,13 @@ def _dump_family_names(notofonts, family_to_name_info, phase): err_names = [] for font in sorted(notofonts, key=lambda f: f.filepath): name_data = name_table_data(font, family_to_name_info, phase) - print - print font.filepath + print() + print(font.filepath) if _dump_name_data(name_data): err_names.append(font.filepath) if err_names: - print '## %d names too long:\n %s' % ( - len(err_names), '\n '.join(err_names)) + print('## %d names too long:\n %s' % ( + len(err_names), '\n '.join(err_names))) def _dump(fonts, info_file, phase): @@ -889,13 +892,13 @@ def _write(fonts, info_file, phase, extra_styles): if info_file: write_family_name_info_file(family_to_name_info, info_file, pretty=True) else: - print write_family_name_info(family_to_name_info, pretty=True) + print(write_family_name_info(family_to_name_info, pretty=True)) def _test(fonts, phase, extra_styles): """Build name info from font_paths and dump the names for them.""" family_to_name_info = create_family_to_name_info(fonts, phase, extra_styles) - print write_family_name_info(family_to_name_info, pretty=True) + print(write_family_name_info(family_to_name_info, pretty=True)) _dump_family_names(fonts, family_to_name_info, phase) @@ -903,8 +906,8 @@ def _info(fonts): """Group fonts into families and list the subfamilies for each.""" family_to_subfamilies = _create_family_to_subfamilies(fonts) for family in sorted(family_to_subfamilies): - print '%s:\n %s' % ( - family, '\n '.join(sorted(family_to_subfamilies[family]))) + print('%s:\n %s' % ( + family, '\n '.join(sorted(family_to_subfamilies[family])))) def _read_filename_list(filenames): @@ -985,26 +988,26 @@ def main(): paths = _collect_paths(args.dirs, args.files) fonts = _get_noto_fonts(paths) if not fonts: - print 'Please specify at least one directory or file' + print('Please specify at least one directory or file') return if not args.info_file: if args.phase: args.info_file = _PHASE_TO_FILENAME[args.phase] - print 'using name info file: "%s"' % args.info_file + print('using name info file: "%s"' % args.info_file) if args.cmd == 'dump': if not args.info_file: - print 'must specify an info file to dump' + print('must specify an info file to dump') return info_file = tool_utils.resolve_path(args.info_file) if not path.exists(info_file): - print '"%s" does not exist.' % args.info_file + print('"%s" does not exist.' % args.info_file) return _dump(fonts, info_file, args.phase) elif args.cmd == 'write': if not args.phase: - print 'Must specify phase when generating info.' + print('Must specify phase when generating info.') return out = None if args.info_file == '-' else args.info_file _write(fonts, out, args.phase, args.extra_styles) diff --git a/nototools/notoconfig.py b/nototools/notoconfig.py index 91f390ef..a0498310 100755 --- a/nototools/notoconfig.py +++ b/nototools/notoconfig.py @@ -114,10 +114,10 @@ def get(key, default=''): if __name__ == '__main__': keyset = set(_values.keys()) if not keyset: - print 'no keys defined, probably no notoconfig file was found.' + print('no keys defined, probably no notoconfig file was found.') else: wid = max(len(k) for k in keyset) fmt = '%%%ds: %%s' % wid for k in sorted(keyset): - print fmt % (k, get(k)) - print 'config: %s' % _config_path + print(fmt % (k, get(k))) + print('config: %s' % _config_path) diff --git a/requirements.txt b/requirements.txt index 6d4de4b8..7a4d7393 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ booleanOperations==0.7.0 defcon==0.3.1 -fonttools==3.9.1 +fonttools>=3.36.0 Pillow==4.0.0 pyclipper==1.0.6 ufoLib==2.0.0