diff --git a/index.html b/index.html index 4a89d49..d164786 100644 --- a/index.html +++ b/index.html @@ -5377,10 +5377,9 @@

Operator Dictionary (Compact)

Alternatively, discarding the smallest tables as explained above, - one can consider only those for categories 0-7. - Then each of the three 'Form' corresponds to at most three - categories and so ('Form', 'Category') can be encoded - on 4 bits. Using the 12-bit encoding of the 'Content' described + one can consider only those having a 4bits encoding in + . + Using the 12-bit encoding of the 'Content' described above this means that these tables can be encoded with 16bits/entry but binary search would now be performed on a single table. diff --git a/tables/operator-dictionary-compact.html b/tables/operator-dictionary-compact.html index aa05a6b..6c62049 100644 --- a/tables/operator-dictionary-compact.html +++ b/tables/operator-dictionary-compact.html @@ -1,2 +1,2 @@ -

Special TableEntries
Operators_multichar41 entries (null-terminated UTF-16 strings): {U+0021,U+0021,U+0000}, {U+0021,U+003D,U+0000}, {U+0026,U+0026,U+0000}, {U+002A,U+003D,U+0000}, {U+002B,U+002B,U+0000}, {U+002B,U+003D,U+0000}, {U+002D,U+002D,U+0000}, {U+002D,U+003D,U+0000}, {U+002D,U+003E,U+0000}, {U+002E,U+002E,U+0000}, {U+002E,U+002E,U+002E,U+0000}, {U+002F,U+003D,U+0000}, {U+003A,U+003D,U+0000}, {U+003C,U+003D,U+0000}, {U+003D,U+003D,U+0000}, {U+003E,U+003D,U+0000}, {U+007C,U+007C,U+0000}, {U+007C,U+007C,U+007C,U+0000}, {U+223D,U+0331,U+0000}, {U+2242,U+0338,U+0000}, {U+224E,U+0338,U+0000}, {U+224F,U+0338,U+0000}, {U+2266,U+0338,U+0000}, {U+226A,U+0338,U+0000}, {U+226B,U+0338,U+0000}, {U+227F,U+0338,U+0000}, {U+2282,U+20D2,U+0000}, {U+2283,U+20D2,U+0000}, {U+228F,U+0338,U+0000}, {U+2290,U+0338,U+0000}, {U+29CF,U+0338,U+0000}, {U+29D0,U+0338,U+0000}, {U+2A7D,U+0338,U+0000}, {U+2A7E,U+0338,U+0000}, {U+2AA1,U+0338,U+0000}, {U+2AA2,U+0338,U+0000}, {U+2AAF,U+0338,U+0000}, {U+2AB0,U+0338,U+0000}, {U+2ADD,U+0338,U+0000}, {U+D83B,U+DEF0,U+0000}, {U+D83B,U+DEF1,U+0000},
Operators_fence57 entries (15 Unicode ranges): [U+0028–U+0029], {U+005B}, {U+005D}, [U+007B–U+007D], {U+2016}, [U+2018–U+2019], [U+201C–U+201D], [U+2308–U+230B], [U+2329–U+232A], [U+2772–U+2773], [U+27E6–U+27EF], {U+2980}, [U+2983–U+2998], [U+29FC–U+29FD], [U+E010–U+E011],
Operators_separator3 entries: U+002C, U+003B, U+2063,
Special tables for the operator dictionary.
Total size: 101 entries, 301 bytes.
(assuming characters are UTF-16 and 1-byte range lengths)
(Content, Form) keysCategory
138 entries (18 Unicode ranges) in infix form: [U+2190–U+2199], [U+219C–U+21AD], [U+21AF–U+21B5], {U+21B9}, [U+21BC–U+21CC], [U+21D0–U+21DD], [U+21E0–U+21F0], {U+21F3}, [U+21F5–U+21F6], [U+21FD–U+21FF], [U+27F0–U+27F1], [U+27F5–U+27FF], [U+290A–U+2910], [U+2912–U+2913], [U+2921–U+2922], [U+294E–U+2961], [U+296E–U+296F], [U+2B45–U+2B46], 0
103 entries (36 Unicode ranges) in infix form: {U+002B}, {U+002D}, {U+002F}, {U+00B1}, {U+00F7}, [U+2212–U+2214], {U+2216}, {U+2218}, {U+2224}, [U+2227–U+222A], {U+2236}, {U+2238}, [U+228C–U+228F], [U+2293–U+2296], {U+2298}, [U+229D–U+229F], [U+22BB–U+22BD], {U+22C4}, {U+22C6}, [U+22CE–U+22CF], [U+22D2–U+22D3], [U+2795–U+2797], {U+27F4}, {U+29BC}, {U+29F6}, [U+2A22–U+2A2E], [U+2A38–U+2A3A], [U+2A40–U+2A4F], [U+2A51–U+2A63], [U+2ADA–U+2ADB], {U+2AFB}, {U+2AFD}, {U+2B32}, {U+E002}, {U+E005}, {U+E007}, 1
89 entries (42 Unicode ranges) in infix form: {U+0025}, {U+002A}, {U+002E}, {U+0040}, {U+00B7}, {U+00D7}, {U+2022}, {U+2043}, {U+2206}, {U+220E}, {U+2217}, [U+223F–U+2240], {U+2297}, {U+2299}, [U+22A0–U+22A1], {U+22C5}, {U+22C7}, [U+22C9–U+22CC], [U+2305–U+2306], [U+25A0–U+25A1], [U+25AA–U+25AB], [U+25AD–U+25B1], [U+2981–U+2982], [U+2999–U+299A], {U+29B5}, [U+29C2–U+29C3], [U+29C9–U+29CD], [U+29D8–U+29D9], {U+29DB}, [U+29DF–U+29E0], {U+29E2}, [U+29E7–U+29ED], [U+29F8–U+29FB], [U+2A1D–U+2A21], [U+2A2F–U+2A37], [U+2A3B–U+2A3D], {U+2A3F}, {U+2A50}, [U+2ADC–U+2ADD], {U+2AFE}, [U+E010–U+E012], {U+E026}, 2
53 entries (22 Unicode ranges) in prefix form: {U+0021}, {U+002B}, {U+002D}, {U+00AC}, {U+00B1}, {U+2018}, {U+201C}, [U+2200–U+2201], [U+2203–U+2204], {U+2207}, [U+2212–U+2213], [U+221B–U+221C], [U+221F–U+2222], {U+223C}, [U+22BE–U+22BF], {U+2310}, {U+2319}, [U+2795–U+2796], {U+27C0}, [U+299B–U+29AF], [U+2AEC–U+2AED], [U+E010–U+E011], 3
42 entries (22 Unicode ranges) in postfix form: [U+0021–U+0022], [U+0026–U+0027], {U+0060}, {U+00A8}, {U+00B0}, [U+00B2–U+00B4], [U+00B8–U+00B9], [U+02CA–U+02CB], [U+02D8–U+02DA], {U+02DD}, {U+0311}, [U+2019–U+201B], [U+201D–U+201F], [U+2032–U+2037], {U+2057}, [U+20DB–U+20DC], {U+23CD}, {U+E000}, {U+E004}, {U+E006}, [U+E009–U+E00A], [U+E010–U+E011], 4
26 entries (16 Unicode ranges) in postfix form: [U+005E–U+005F], {U+007E}, {U+00AF}, [U+02C6–U+02C7], {U+02C9}, {U+02CD}, {U+02DC}, {U+02F7}, {U+0302}, {U+2016}, {U+203E}, [U+2322–U+2323], [U+23B4–U+23B5], [U+23DC–U+23E1], {U+2980}, [U+E027–U+E028], 5
25 entries in prefix form: U+0028, U+005B, U+007B, U+007C, U+2308, U+230A, U+2329, U+2772, U+27E6, U+27E8, U+27EA, U+27EC, U+27EE, U+2983, U+2985, U+2987, U+2989, U+298B, U+298D, U+298F, U+2991, U+2993, U+2995, U+2997, U+29FC, 6
25 entries in postfix form: U+0029, U+005D, U+007C, U+007D, U+2309, U+230B, U+232A, U+2773, U+27E7, U+27E9, U+27EB, U+27ED, U+27EF, U+2984, U+2986, U+2988, U+298A, U+298C, U+298E, U+2990, U+2992, U+2994, U+2996, U+2998, U+29FD, 7
22 entries (3 Unicode ranges) in prefix form: [U+222B–U+2233], [U+2A0B–U+2A0F], [U+2A15–U+2A1C], 8
18 entries (5 Unicode ranges) in prefix form: [U+220F–U+2210], [U+22C0–U+22C3], [U+2A00–U+2A09], {U+2AFC}, {U+2AFF}, 9
7 entries (3 Unicode ranges) in prefix form: {U+2211}, {U+2A0A}, [U+2A10–U+2A14], 10
6 entries (3 Unicode ranges) in infix form: {U+005C}, [U+2061–U+2064], {U+2396}, 11
3 entries in infix form: U+002C, U+003A, U+003B, 12
3 entries in prefix form: U+2145, U+2146, U+2202, 13
Mapping from operator (Content, Form) to a category.
Total size: 560 entries, 622 bytes.
(assuming characters are UTF-16 and 1-byte range lengths)
Categoryrspacelspaceproperties
00.2777777777777778em0.2777777777777778emstretchy
10.2222222222222222em0.2222222222222222emN/A
20.16666666666666666em0.16666666666666666emN/A
300N/A
400N/A
500stretchy
600stretchy symmetric
700stretchy symmetric
80.16666666666666666em0.16666666666666666emsymmetric largeop
90.05555555555555555em0.1111111111111111emsymmetric largeop movablelimits
100.16666666666666666em0.16666666666666666emsymmetric largeop movablelimits
1100N/A
1200.16666666666666666emN/A
130.16666666666666666em0N/A
Operators values for each category.
\ No newline at end of file +
Special TableEntries
Operators_multichar41 entries (null-terminated UTF-16 strings): {U+0021,U+0021,U+0000}, {U+0021,U+003D,U+0000}, {U+0026,U+0026,U+0000}, {U+002A,U+003D,U+0000}, {U+002B,U+002B,U+0000}, {U+002B,U+003D,U+0000}, {U+002D,U+002D,U+0000}, {U+002D,U+003D,U+0000}, {U+002D,U+003E,U+0000}, {U+002E,U+002E,U+0000}, {U+002E,U+002E,U+002E,U+0000}, {U+002F,U+003D,U+0000}, {U+003A,U+003D,U+0000}, {U+003C,U+003D,U+0000}, {U+003D,U+003D,U+0000}, {U+003E,U+003D,U+0000}, {U+007C,U+007C,U+0000}, {U+007C,U+007C,U+007C,U+0000}, {U+223D,U+0331,U+0000}, {U+2242,U+0338,U+0000}, {U+224E,U+0338,U+0000}, {U+224F,U+0338,U+0000}, {U+2266,U+0338,U+0000}, {U+226A,U+0338,U+0000}, {U+226B,U+0338,U+0000}, {U+227F,U+0338,U+0000}, {U+2282,U+20D2,U+0000}, {U+2283,U+20D2,U+0000}, {U+228F,U+0338,U+0000}, {U+2290,U+0338,U+0000}, {U+29CF,U+0338,U+0000}, {U+29D0,U+0338,U+0000}, {U+2A7D,U+0338,U+0000}, {U+2A7E,U+0338,U+0000}, {U+2AA1,U+0338,U+0000}, {U+2AA2,U+0338,U+0000}, {U+2AAF,U+0338,U+0000}, {U+2AB0,U+0338,U+0000}, {U+2ADD,U+0338,U+0000}, {U+D83B,U+DEF0,U+0000}, {U+D83B,U+DEF1,U+0000},
Operators_fence57 entries (15 Unicode ranges): [U+0028–U+0029], {U+005B}, {U+005D}, [U+007B–U+007D], {U+2016}, [U+2018–U+2019], [U+201C–U+201D], [U+2308–U+230B], [U+2329–U+232A], [U+2772–U+2773], [U+27E6–U+27EF], {U+2980}, [U+2983–U+2998], [U+29FC–U+29FD], [U+E010–U+E011],
Operators_separator3 entries: U+002C, U+003B, U+2063,
Special tables for the operator dictionary.
Total size: 101 entries, 301 bytes.
(assuming characters are UTF-16 and 1-byte range lengths)
(Content, Form) keysCategory
138 entries (18 Unicode ranges) in infix form: [U+2190–U+2199], [U+219C–U+21AD], [U+21AF–U+21B5], {U+21B9}, [U+21BC–U+21CC], [U+21D0–U+21DD], [U+21E0–U+21F0], {U+21F3}, [U+21F5–U+21F6], [U+21FD–U+21FF], [U+27F0–U+27F1], [U+27F5–U+27FF], [U+290A–U+2910], [U+2912–U+2913], [U+2921–U+2922], [U+294E–U+2961], [U+296E–U+296F], [U+2B45–U+2B46], A
103 entries (36 Unicode ranges) in infix form: {U+002B}, {U+002D}, {U+002F}, {U+00B1}, {U+00F7}, [U+2212–U+2214], {U+2216}, {U+2218}, {U+2224}, [U+2227–U+222A], {U+2236}, {U+2238}, [U+228C–U+228F], [U+2293–U+2296], {U+2298}, [U+229D–U+229F], [U+22BB–U+22BD], {U+22C4}, {U+22C6}, [U+22CE–U+22CF], [U+22D2–U+22D3], [U+2795–U+2797], {U+27F4}, {U+29BC}, {U+29F6}, [U+2A22–U+2A2E], [U+2A38–U+2A3A], [U+2A40–U+2A4F], [U+2A51–U+2A63], [U+2ADA–U+2ADB], {U+2AFB}, {U+2AFD}, {U+2B32}, {U+E002}, {U+E005}, {U+E007}, B
89 entries (42 Unicode ranges) in infix form: {U+0025}, {U+002A}, {U+002E}, {U+0040}, {U+00B7}, {U+00D7}, {U+2022}, {U+2043}, {U+2206}, {U+220E}, {U+2217}, [U+223F–U+2240], {U+2297}, {U+2299}, [U+22A0–U+22A1], {U+22C5}, {U+22C7}, [U+22C9–U+22CC], [U+2305–U+2306], [U+25A0–U+25A1], [U+25AA–U+25AB], [U+25AD–U+25B1], [U+2981–U+2982], [U+2999–U+299A], {U+29B5}, [U+29C2–U+29C3], [U+29C9–U+29CD], [U+29D8–U+29D9], {U+29DB}, [U+29DF–U+29E0], {U+29E2}, [U+29E7–U+29ED], [U+29F8–U+29FB], [U+2A1D–U+2A21], [U+2A2F–U+2A37], [U+2A3B–U+2A3D], {U+2A3F}, {U+2A50}, [U+2ADC–U+2ADD], {U+2AFE}, [U+E010–U+E012], {U+E026}, C
53 entries (22 Unicode ranges) in prefix form: {U+0021}, {U+002B}, {U+002D}, {U+00AC}, {U+00B1}, {U+2018}, {U+201C}, [U+2200–U+2201], [U+2203–U+2204], {U+2207}, [U+2212–U+2213], [U+221B–U+221C], [U+221F–U+2222], {U+223C}, [U+22BE–U+22BF], {U+2310}, {U+2319}, [U+2795–U+2796], {U+27C0}, [U+299B–U+29AF], [U+2AEC–U+2AED], [U+E010–U+E011], D
42 entries (22 Unicode ranges) in postfix form: [U+0021–U+0022], [U+0026–U+0027], {U+0060}, {U+00A8}, {U+00B0}, [U+00B2–U+00B4], [U+00B8–U+00B9], [U+02CA–U+02CB], [U+02D8–U+02DA], {U+02DD}, {U+0311}, [U+2019–U+201B], [U+201D–U+201F], [U+2032–U+2037], {U+2057}, [U+20DB–U+20DC], {U+23CD}, {U+E000}, {U+E004}, {U+E006}, [U+E009–U+E00A], [U+E010–U+E011], E
26 entries (16 Unicode ranges) in postfix form: [U+005E–U+005F], {U+007E}, {U+00AF}, [U+02C6–U+02C7], {U+02C9}, {U+02CD}, {U+02DC}, {U+02F7}, {U+0302}, {U+2016}, {U+203E}, [U+2322–U+2323], [U+23B4–U+23B5], [U+23DC–U+23E1], {U+2980}, [U+E027–U+E028], F
25 entries in prefix form: U+0028, U+005B, U+007B, U+007C, U+2308, U+230A, U+2329, U+2772, U+27E6, U+27E8, U+27EA, U+27EC, U+27EE, U+2983, U+2985, U+2987, U+2989, U+298B, U+298D, U+298F, U+2991, U+2993, U+2995, U+2997, U+29FC, G
25 entries in postfix form: U+0029, U+005D, U+007C, U+007D, U+2309, U+230B, U+232A, U+2773, U+27E7, U+27E9, U+27EB, U+27ED, U+27EF, U+2984, U+2986, U+2988, U+298A, U+298C, U+298E, U+2990, U+2992, U+2994, U+2996, U+2998, U+29FD, H
22 entries (3 Unicode ranges) in prefix form: [U+222B–U+2233], [U+2A0B–U+2A0F], [U+2A15–U+2A1C], I
18 entries (5 Unicode ranges) in prefix form: [U+220F–U+2210], [U+22C0–U+22C3], [U+2A00–U+2A09], {U+2AFC}, {U+2AFF}, J
7 entries (3 Unicode ranges) in prefix form: {U+2211}, {U+2A0A}, [U+2A10–U+2A14], K
6 entries (3 Unicode ranges) in infix form: {U+005C}, [U+2061–U+2064], {U+2396}, L
3 entries in infix form: U+002C, U+003A, U+003B, M
3 entries in prefix form: U+2145, U+2146, U+2202, N
Mapping from operator (Content, Form) to a category.
Total size: 560 entries, 622 bytes.
(assuming characters are UTF-16 and 1-byte range lengths)
Categoryencodingrspacelspaceproperties
A0x00.2777777777777778em0.2777777777777778emstretchy
B0x40.2222222222222222em0.2222222222222222emN/A
C0x80.16666666666666666em0.16666666666666666emN/A
D0x100N/A
E0x200N/A
F0x600stretchy
G0x500stretchy symmetric
H0xA00stretchy symmetric
I0x90.16666666666666666em0.16666666666666666emsymmetric largeop
J0xD0.05555555555555555em0.1111111111111111emsymmetric largeop movablelimits
KN/A0.16666666666666666em0.16666666666666666emsymmetric largeop movablelimits
L0xC00N/A
MN/A00.16666666666666666emN/A
NN/A0.16666666666666666em0N/A
Operators values for each category.
The second column provides a 4bits encoding of the categories
where the 2 least significant bits encodes the form infix (0), prefix (1) and postfix (2).
\ No newline at end of file diff --git a/tables/operator-dictionary.py b/tables/operator-dictionary.py index c07df44..cc9c13f 100755 --- a/tables/operator-dictionary.py +++ b/tables/operator-dictionary.py @@ -599,17 +599,19 @@ def serializeValue(value, fence, separator): md.write("U+%04X, " % entry) totalBytes += 2 * count md.write("") - md.write("%d" % value_index); + md.write("%s" % chr(ord('A') + value_index)); value_index += 1; md.write("") md.write(""); md.write('
Mapping from operator (Content, Form) to a category.
Total size: %d entries, %d bytes.
(assuming characters are UTF-16 and 1-byte range lengths)
' % (totalEntryCount, totalBytes)) md.write('') + +category_for_form = [0, 0, 0] value_index = 0 md.write('
') md.write(""); -md.write("") +md.write("") for name, item in sorted(knownTables.items(), key=(lambda v: len(v[1]["singleChar"])), reverse=True): @@ -617,16 +619,27 @@ def serializeValue(value, fence, separator): continue for entry in knownTables[name]["singleChar"]: md.write(""); - md.write("" % value_index) - md.write(serializeValue(knownTables[name]["value"], - False, - False)) + md.write("" % chr(ord('A') + value_index)) + form = knownTables[name]["value"]["form"] + if form == "infix": + form = 0 + elif form == "prefix": + form = 1 + elif form == "postfix": + form = 2 + if category_for_form[form] >= 4: + md.write("") + else: + hexa = form + (category_for_form[form] << 2) + category_for_form[form] += 1 + md.write("" % hexa) + md.write(serializeValue(knownTables[name]["value"], False, False)) md.write(""); break value_index += 1 md.write("
Categoryrspacelspaceproperties
Categoryencodingrspacelspaceproperties
%d%sN/A0x%01X
"); -md.write('
Operators values for each category.
') +md.write('
Operators values for each category.
The second column provides a 4bits encoding of the categories
where the 2 least significant bits encodes the form infix (0), prefix (1) and postfix (2).
') md.write('
') print("done.");