-
Notifications
You must be signed in to change notification settings - Fork 323
/
bench_download.py
executable file
·322 lines (282 loc) · 13.4 KB
/
bench_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
#!/usr/bin/env python
"""
Script for downloading Engine benchmark results into a single static web page
that visualizes all the benchmarks. Without any options, downloads and
visualizes benchmark data for the last 14 days. By default, no data is written
to the disk except for the generated web page, and the data are downloaded
asynchronously.
Set the `--source` parameter to either `engine` or `stdlib`.
The generated website is placed under "generated_site" directory
The default GH artifact retention period is 3 months, which means that all
the artifacts older than 3 months are dropped. If you wish to gather the data
for benchmarks older than 3 months, make sure that the `use_cache` parameter
is set to true, and that the cache directory is populated with older data.
If the script encounters an expired artifact, it prints a warning.
This script is under continuous development, so it is advised to use
`-v|--verbose` option all the time.
It queries only successful benchmark runs. If there are no successful benchmarks
in a given period, no results will be written.
The process of the script is roughly as follows:
- Asynchronously gather all the benchmark results from GH API into job reports (JobReport dataclass)
- Use cache if possible to avoid unnecessary GH API queries
- Transform the gathered results into data for a particular benchmark sorted
by an appropriate commit timestamp.
- BenchmarkData class
If you wish to inspect the data yourself, just use --create-csv option.
Dependencies for the script:
- GH CLI utility
- https://cli.github.com/
- Used for convenience to do the GH API queries.
- It needs to be installed, and you should also authenticate.
- Python version >= 3.7
- Python 3rd party packages:
- pandas
- Used for convenience for a very simple data processing
- jinja2
- Used as a template engine for the HTML.
"""
import sys
from dataclasses import dataclass
from bench_tool.bench_results import get_bench_runs, fetch_job_reports
from bench_tool.remote_cache import ReadonlyRemoteCache
from bench_tool.utils import gather_all_bench_labels, sort_job_reports
if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
print("ERROR: python version lower than 3.7")
exit(1)
import asyncio
import logging
import logging.config
import os
import shutil
import tempfile
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from csv import DictWriter
from datetime import datetime, timedelta
from os import path
from typing import List, Dict, Optional, Set
from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, \
GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, \
JINJA_TEMPLATE, JobRun, JobReport, \
TemplateBenchData, JinjaData, Source
from bench_tool.gh import ensure_gh_installed
from bench_tool.template_render import create_template_data, render_html
try:
import pandas as pd
import numpy as np
import jinja2
except ModuleNotFoundError as err:
print("ERROR: One of pandas, numpy, or jinja2 packages not installed",
file=sys.stderr)
print("Install either with `pip install pandas numpy jinja2` or "
"with `apt-get install python3-pandas python3-numpy python3-jinja2`",
file=sys.stderr)
exit(1)
@dataclass
class CsvRow:
label: str
score: str
commit_id: str
commit_title: str
commit_timestamp: str
commit_author: str
bench_run_id: str
bench_run_url: str
bench_run_event: str
def write_bench_reports_to_csv(bench_reports: List[JobReport],
csv_fname: str) -> None:
logging.info(
f"Writing {len(bench_reports)} benchmark reports to {csv_fname}")
csv_fieldnames = CsvRow.__annotations__.keys()
assert len(bench_reports) > 0
with open(csv_fname, "w") as csv_file:
csv_writer = DictWriter(csv_file, csv_fieldnames)
csv_writer.writeheader()
for bench_report in bench_reports:
for label, score in bench_report.label_score_dict.items():
commit_title = \
bench_report.bench_run.head_commit.message.splitlines()[0]
commit_title = commit_title.replace(",", " ")
# Ensure that score is not printed with exponential notation,
# Enso cannot easily parse that by default now.
score_formatted = f"{score:.9f}"
row = CsvRow(
label=label,
score=score_formatted,
commit_id=bench_report.bench_run.head_commit.id,
commit_title=commit_title,
commit_author=bench_report.bench_run.head_commit.author.name,
commit_timestamp=bench_report.bench_run.head_commit.timestamp,
bench_run_id=bench_report.bench_run.id,
bench_run_url=bench_report.bench_run.html_url,
bench_run_event=bench_report.bench_run.event
)
csv_writer.writerow(row.__dict__)
async def main():
default_since: datetime = (datetime.now() - timedelta(days=14))
default_until: datetime = datetime.now()
default_csv_out = "benchs.csv"
date_format_help = DATE_FORMAT.replace("%", "%%")
def _parse_bench_source(_bench_source: str) -> Source:
try:
return Source(_bench_source)
except ValueError:
print(f"Invalid benchmark source {_bench_source}.", file=sys.stderr)
print(f"Available sources: {[source.value for source in Source]}",
file=sys.stderr)
exit(1)
arg_parser = ArgumentParser(description=__doc__,
formatter_class=RawDescriptionHelpFormatter)
arg_parser.add_argument("-v", "--verbose", action="store_true")
arg_parser.add_argument("-s", "--source",
action="store",
required=True,
metavar=f"({Source.ENGINE.value}|{Source.STDLIB.value})",
type=lambda s: _parse_bench_source(s),
help=f"The source of the benchmarks. Available sources: "
f"{[source.value for source in Source]}")
arg_parser.add_argument("--since", action="store",
default=default_since,
metavar="SINCE_DATE",
type=lambda s: datetime.strptime(s, DATE_FORMAT),
help=f"The date from which the benchmark results will be gathered. "
f"Format is {date_format_help}. "
f"The default is 14 days before")
arg_parser.add_argument("--until", action="store",
default=default_until,
metavar="UNTIL_DATE",
type=lambda s: datetime.strptime(s, DATE_FORMAT),
help=f"The date until which the benchmark results will be gathered. "
f"Format is {date_format_help}. "
f"The default is today")
arg_parser.add_argument("-b", "--branches", action="store",
nargs="+",
default=["develop"],
help="List of branches to gather the benchmark results from. "
"The default is ['develop']")
arg_parser.add_argument("-l", "--labels", action="store",
nargs="+",
default=set(),
help="List of labels to gather the benchmark results from."
"The default behavior is to gather all the labels")
arg_parser.add_argument("-t", "--tmp-dir", action="store",
default=None,
help="Temporary directory with default created by `tempfile.mkdtemp()`")
arg_parser.add_argument("--create-csv", action="store_true",
default=False,
help="Whether an intermediate `benchs.csv` should be created. "
"Appropriate to see whether the benchmark downloading was successful. "
"Or if you wish to inspect the CSV with Enso")
arg_parser.add_argument("--csv-output",
default=default_csv_out,
metavar="CSV_OUTPUT",
help="Output CSV file. Makes sense only when used with --create-csv argument")
args = arg_parser.parse_args()
if args.verbose:
log_level = logging.DEBUG
else:
log_level = logging.INFO
logging.basicConfig(level=log_level, stream=sys.stdout)
since: datetime = args.since
until: datetime = args.until
if not args.tmp_dir:
temp_dir: str = tempfile.mkdtemp()
else:
temp_dir: str = args.tmp_dir
bench_source: Source = args.source
csv_output: str = args.csv_output
create_csv: bool = args.create_csv
branches: List[str] = args.branches
labels_override: Set[str] = args.labels
logging.debug(f"parsed args: since={since}, until={until}, "
f"temp_dir={temp_dir}, bench_source={bench_source}, "
f"csv_output={csv_output}, "
f"create_csv={create_csv}, branches={branches}, "
f"labels_override={labels_override}")
ensure_gh_installed()
# If the user requires benchmarks for which artifacts are not retained
# anymore, then cache should be used.
min_since_without_cache = datetime.today() - GH_ARTIFACT_RETENTION_PERIOD
if since < min_since_without_cache:
logging.info(f"The default GH artifact retention period is "
f"{GH_ARTIFACT_RETENTION_PERIOD.days} days. "
f"This means that all the artifacts older than "
f"{min_since_without_cache.date()} are expired."
f"The since date was set to {since}, so the remote cache is enabled, "
f"and the older artifacts will be fetched from the cache.")
remote_cache = ReadonlyRemoteCache()
bench_labels: Optional[Set[str]] = None
""" Set of all gathered benchmark labels from all the job reports """
job_reports_per_branch: Dict[str, List[JobReport]] = {}
for branch in branches:
bench_runs: List[JobRun] = []
for workflow_id in bench_source.workflow_ids():
bench_runs.extend(
await get_bench_runs(since, until, branch, workflow_id)
)
if len(bench_runs) == 0:
print(
f"No successful benchmarks found within period since {since}"
f" until {until} for branch {branch}")
exit(1)
job_reports = await fetch_job_reports(bench_runs, remote_cache)
logging.debug(f"Got {len(job_reports)} job reports for branch {branch}")
if len(job_reports) == 0:
print(f"There were 0 job_reports in the specified time interval, "
f"for branch {branch}, so "
"there is nothing to visualize or compare.")
exit(1)
logging.debug("Sorting job_reports by commit date")
sort_job_reports(job_reports)
if create_csv:
write_bench_reports_to_csv(job_reports, csv_output)
logging.info(f"Benchmarks written to {csv_output}")
print(f"The generated CSV is in {csv_output}")
exit(0)
# Gather all the benchmark labels from all the job reports
if bench_labels is None:
all_bench_labels = gather_all_bench_labels(job_reports)
if len(labels_override) > 0:
logging.info(f"Subset of labels specified: {labels_override}")
if not set(labels_override).issubset(all_bench_labels):
print(
f"Specified bench labels {labels_override} are not a subset of "
f"all bench labels {all_bench_labels}")
exit(1)
bench_labels = labels_override
else:
bench_labels = all_bench_labels
logging.debug(f"Gathered bench_labels: {bench_labels}")
job_reports_per_branch[branch] = job_reports
template_bench_datas: List[TemplateBenchData] = \
create_template_data(job_reports_per_branch, bench_labels)
template_bench_datas.sort(key=lambda data: data.id)
jinja_data = JinjaData(
since=since,
display_since=max(until - timedelta(days=30), since),
until=until,
bench_datas=template_bench_datas,
bench_source=bench_source,
branches=branches,
timestamp=datetime.now()
)
# Render Jinja template with jinja_data
if not path.exists(GENERATED_SITE_DIR):
os.mkdir(GENERATED_SITE_DIR)
logging.debug(
f"Rendering HTML from {JINJA_TEMPLATE} to {GENERATED_SITE_DIR}")
site_path = GENERATED_SITE_DIR.joinpath(bench_source.value + "-benchs.html")
render_html(
jinja_data,
site_path
)
logging.debug(
f"Copying static site content from {TEMPLATES_DIR} to {GENERATED_SITE_DIR}")
shutil.copy(
path.join(TEMPLATES_DIR, "styles.css"),
path.join(GENERATED_SITE_DIR, "styles.css")
)
index_html_abs_path = path.abspath(site_path)
print(f"The generated HTML is in {index_html_abs_path}")
print(f"Open file://{index_html_abs_path} in the browser")
if __name__ == "__main__":
asyncio.run(main())