-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.civet
213 lines (190 loc) · 6.38 KB
/
index.civet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#!/usr/bin/env node
"civet coffeeComment coffeeEq coffeeInterpolation"
declare global
var fetch: typeof import('undici').fetch
{load as loadCheerio, type Cheerio, type Element} from 'cheerio'
function fromURL(url: string)
response := fetch url |> await
unless response.ok
throw new Error "Failed to fetch #{url}"
response.text() |> await |> loadCheerio
type ID = string
type Recursion = "person" | "advisors" | "students"
interface Degree
degree: string
institution: string
thesis: string
year: string
advisors: ID[]
interface Person
id: ID
name: string
degrees: Degree[]
advisors: ID[] # union of all advisors in all degrees
students: ID[]
function indent(s: string): string
trailing .= ''
s
# remove trailing newlines
.replace /\n*$/, (m) =>
trailing = m
''
# indent remaining lines
.replace /^/gm, ' '
# put trailing newlines back
.replace /$/, trailing
class Fetcher
cache = new Map<ID, Person>
# Did we already fetch this id?
has(id: ID): boolean
@cache.has id
# Get cached person for this id, if already fetched
get(id: ID): Person | undefined
@cache.get id
urlFor(id: ID): string
"https://www.mathgenealogy.org/id.php?id=#{id}"
# Recursively fetch specified id and all its recursive advisors [async]
recurse(id: ID, recursion: Recursion): Promise<Fetcher>
return @ if @has id
person := await @fetch id
return @ if recursion is "person"
for advisor of person[recursion]
await @recurse advisor, recursion
@
# Fetch data for specified this id, cache and return it [async].
# Originally based on
# https://github.com/davidalber/geneagrapher/blob/main/geneagrapher/grabber.py
fetch(id: ID): Promise<Person>
return @cache.get(id)! if @has id
$ := await fromURL @urlFor id
function extractIds(part: Cheerio<Element>): ID[]
for place of part
href := $(place).attr 'href'
continue unless href?
href.split("=").at(-1)!
# Degrees/theses/advisors are split into sibling groups by these headings
degreeHeading := 'div[style="line-height: 30px; text-align: center; margin-bottom: 1ex"]'
degrees :=
for element of $(degreeHeading)
degree := $(element)
section := degree.nextUntil degreeHeading
span := degree.children 'span'
degree: span.contents().first().text().trim() # leading text
institution: span.find('span').text().trim() # inner span
year: span.contents().last().text().trim() # trailing text
thesis: section.find('#thesisTitle').text().trim()
# following thesis title
advisors: extractIds section.find('p[style="text-align: center; line-height: 2.75ex"]:contains("Advisor") a')
person: Person :=
id: id
name: $("h2").text().trim()
degrees: degrees
advisors: Array.from new Set degrees.flatMap .advisors
students: extractIds $('table a')
@cache.set id, person
person
# HTML rendering
# Keep track of which people have already been rendered,
# initialized by `html` method and used by other `html*` methods.
rendered = new Set<ID>
# Call this to render HTML in one of three recursion styles:
# * "person": just the person themselves
# * "advisors": all their recursive ancestors too
# * "students": all their recursive descendants too
# You should have already called `recursive` with the same arguments,
# so that all data is already available in the cache.
html(id: ID, recursion: Recursion): string
@rendered = new Set
@htmlRecurse id, recursion
|> @htmlStyle
|> ($) => """
<!-- Generated by https://github.com/edemaine/mathcestor -->
<!-- #{@rendered.size} total people, #{Array.from($.matchAll(/"node"/g)).length} lines with repetitions -->
#{$}
"""
htmlPerson(id: ID): string
person := @get id
unless person?
throw new Error "Couldn't find person with ID #{id}; did you call fetch or recurse?"
{name, degrees} := person
base := """<a href="#{@urlFor id}">#{name}</a>"""
data .= ''
if @rendered.has id
"<em>#{base} (above)</em>"
else
@rendered.add id
for {degree, institution, year} of degrees
data += '; ' if data
data += degree if degree
data += ', ' if degree and institution
data += institution if institution
data += ', ' if data and year
data += year if year
data = " (#{data})" if data
base + data
htmlRecurse(id: ID, recursion: Recursion): string
rendered := @rendered.has id
s .= """
<div class="node">
<div class="person">
#{@htmlPerson id}
</div>
</div>
"""
return s if recursion is 'person'
children := @get(id)![recursion]
if children.length and not rendered
[...middle, last] := children
if middle.length
s += """<div class="middle">\n"""
for child of middle
s += indent @htmlRecurse child, recursion
s += """</div>\n"""
s += """<div class="last">\n"""
s += indent @htmlRecurse last, recursion
s += """</div>\n"""
s
htmlStyle(html: string): string
"""
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
<style>
:root {
--border: 2px solid gray;
}
.middle, .last { margin-left: 1em; }
.middle { border-left: var(--border); }
.node { display: flex; }
.node::before {
border-bottom: var(--border);
width: 0.5em;
height: 0.6em;
display: inline-block;
content: "\\00a0";
}
.last > .node::before {
border-left: var(--border);
}
.person { padding-left: 0.25em; }
</style>
#{html}
"""
function main()
[root, recurse] .= process.argv[2..]
recurse or= 'person'
unless root and recurse is in ['advisors', 'students', 'person']
console.log """
Usage: mathcestor ID [advisors/students] >out.html
where ID is the number at the end of a URL from https://www.mathgenealogy.org/
and advisors/students optionally specifies which direction to recurse
"""
process.exit 1
fetcher := new Fetcher()
fetcher.recurse root, recurse as Recursion
|> await
|> .html root, recurse as Recursion
|> console.log
* as url from 'node:url'
* as fs from 'node:fs'
main() if import.meta.url.startsWith('file:') and
fs.realpathSync(process.argv[1]) ==
fs.realpathSync url.fileURLToPath import.meta.url