forked from KaTeX/KaTeX
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_ttfs.py
executable file
·115 lines (103 loc) · 4.03 KB
/
extract_ttfs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
from fontTools.ttLib import TTFont
import sys
import json
# map of characters to extract
metrics_to_extract = {
# Font name
"AMS-Regular": {
u"\u21e2": None, # \dashrightarrow
u"\u21e0": None, # \dashleftarrow
},
"Main-Regular": {
# Skew and italic metrics can't be easily parsed from the TTF. Instead,
# we map each character to a "base character", which is a character
# from the same font with correct italic and skew metrics. A character
# maps to None if it doesn't have a base.
u"\u2260": None, # \neq
u"\u2245": None, # \cong
u"\u0020": None, # space
u"\u00a0": None, # nbsp
u"\u2026": None, # \ldots
u"\u22ef": None, # \cdots
u"\u22f1": None, # \ddots
u"\u22ee": None, # \vdots
u"\u22ee": None, # \vdots
u"\u22a8": None, # \models
u"\u22c8": None, # \bowtie
u"\u2250": None, # \doteq
u"\u23b0": None, # \lmoustache
u"\u23b1": None, # \rmoustache
u"\u27ee": None, # \lgroup
u"\u27ef": None, # \rgroup
u"\u27f5": None, # \longleftarrow
u"\u27f8": None, # \Longleftarrow
u"\u27f6": None, # \longrightarrow
u"\u27f9": None, # \Longrightarrow
u"\u27f7": None, # \longleftrightarrow
u"\u27fa": None, # \Longleftrightarrow
u"\u21a6": None, # \mapsto
u"\u27fc": None, # \longmapsto
u"\u21a9": None, # \hookleftarrow
u"\u21aa": None, # \hookrightarrow
u"\u21cc": None, # \rightleftharpoons
},
"Size1-Regular": {
u"\u222c": u"\u222b", # \iint, based on \int
u"\u222d": u"\u222b", # \iiint, based on \int
},
"Size2-Regular": {
u"\u222c": u"\u222b", # \iint, based on \int
u"\u222d": u"\u222b", # \iiint, based on \int
},
}
def main():
start_json = json.load(sys.stdin)
for font, chars in metrics_to_extract.iteritems():
fontInfo = TTFont("../submodules/katex-fonts/fonts/KaTeX_" + font + ".ttf")
glyf = fontInfo["glyf"]
unitsPerEm = float(fontInfo["head"].unitsPerEm)
# We keep ALL Unicode cmaps, not just fontInfo["cmap"].getcmap(3, 1).
# This is playing it extra safe, since it reports inconsistencies.
# Platform 0 is Unicode, platform 3 is Windows. For platform 3,
# encoding 1 is UCS-2 and encoding 10 is UCS-4.
cmap = [t.cmap for t in fontInfo["cmap"].tables
if (t.platformID == 0)
or (t.platformID == 3 and t.platEncID in (1, 10))]
for char, base_char in chars.iteritems():
code = ord(char)
names = set(t.get(code) for t in cmap)
if not names:
sys.stderr.write(
"Codepoint {} of font {} maps to no name\n"
.format(code, font))
continue
if len(names) != 1:
sys.stderr.write(
"Codepoint {} of font {} maps to multiple names: {}\n"
.format(code, font, ", ".join(sorted(names))))
continue
name = names.pop()
height = depth = italic = skew = width = 0
glyph = glyf[name]
if glyph.numberOfContours:
height = glyph.yMax
depth = -glyph.yMin
width = glyph.xMax - glyph.xMin
if base_char:
base_char_str = str(ord(base_char))
base_metrics = start_json[font][base_char_str]
italic = base_metrics["italic"]
skew = base_metrics["skew"]
width = base_metrics["width"]
start_json[font][str(code)] = {
"height": height / unitsPerEm,
"depth": depth / unitsPerEm,
"italic": italic,
"skew": skew,
"width": width
}
sys.stdout.write(
json.dumps(start_json, separators=(',', ':'), sort_keys=True))
if __name__ == "__main__":
main()