-
Notifications
You must be signed in to change notification settings - Fork 275
/
generate_report.py
293 lines (255 loc) · 11.1 KB
/
generate_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Report generator tool."""
import argparse
import os
import sys
import pandas as pd
from analysis import data_utils
from analysis import coverage_data_utils
from analysis import experiment_results
from analysis import plotting
from analysis import queries
from analysis import rendering
from common import filesystem
from common import logs
logger = logs.Logger()
DATA_FILENAME = 'data.csv.gz'
def get_arg_parser():
"""Returns argument parser."""
parser = argparse.ArgumentParser(description='Report generator.')
parser.add_argument('experiments', nargs='+', help='Experiment names')
parser.add_argument(
'-n',
'--report-name',
help='Name of the report. Default: name of the first experiment.')
parser.add_argument(
'-t',
'--report-type',
choices=['default', 'experimental'],
default='default',
help='Type of the report (which template to use). Default: default.')
parser.add_argument(
'-d',
'--report-dir',
default='./report',
help='Directory for writing a report. Default: ./report')
parser.add_argument(
'-q',
'--quick',
action='store_true',
default=False,
help='If set, plots are created faster, but contain less details.')
parser.add_argument(
'--log-scale',
action='store_true',
default=False,
help='If set, the time axis of the coverage growth plot is log scale.')
parser.add_argument(
'-b',
'--benchmarks',
nargs='*',
help='Names of the benchmarks to include in the report.')
parser.add_argument(
'-e',
'--end-time',
default=None,
type=int,
help=('The last time (in seconds) during an experiment to include in '
'the report.'))
parser.add_argument('-f',
'--fuzzers',
nargs='*',
help='Names of the fuzzers to include in the report.')
parser.add_argument(
'-cov',
'--coverage-report',
action='store_true',
default=False,
help='If set, clang coverage reports and differential plots are shown.')
# It doesn't make sense to clobber and label by experiment, since nothing
# can get clobbered like this.
mutually_exclusive_group = parser.add_mutually_exclusive_group()
mutually_exclusive_group.add_argument(
'-l',
'--label-by-experiment',
action='store_true',
default=False,
help='If set, then the report will track progress made in experiments')
mutually_exclusive_group.add_argument(
'-m',
'--merge-with-clobber',
action='store_true',
default=False,
help=('When generating a report from multiple experiments, and trials '
'exist for fuzzer-benchmark pairs in multiple experiments, only '
'include trials for that pair from the last experiment. For '
'example, if experiment "A" has all fuzzers but experiment "B" '
'has used an updated version of afl++, this option allows you to '
'get a report of all trials in "A" except for afl++ and all the '
'trials from "B". "Later experiments" are those whose names come '
'later when passed to this script.'))
mutually_exclusive_group.add_argument(
'-p',
'--merge-with-clobber-nonprivate',
action='store_true',
default=False,
help=('Does --merge-with-clobber but includes all experiments that are '
'not private. See help for --merge-with-clobber for more '
'details.'))
parser.add_argument(
'-c',
'--from-cached-data',
action='store_true',
default=False,
help=('If set, and the experiment data is already cached, '
'don\'t query the database again to get the data.'))
return parser
def get_experiment_data(experiment_names,
main_experiment_name,
from_cached_data,
data_path,
main_experiment_benchmarks=None):
"""Helper function that reads data from disk or from the database. Returns a
dataframe and the experiment description."""
if from_cached_data and os.path.exists(data_path):
logger.info('Reading experiment data from %s.', data_path)
experiment_df = pd.read_csv(data_path)
logger.info('Done reading data from %s.', data_path)
return experiment_df, 'from cached data'
logger.info('Reading experiment data from db.')
experiment_df = queries.get_experiment_data(experiment_names,
main_experiment_benchmarks)
logger.info('Done reading experiment data from db.')
description = queries.get_experiment_description(main_experiment_name)
return experiment_df, description
def modify_experiment_data_if_requested( # pylint: disable=too-many-arguments
experiment_df, experiment_names, benchmarks, fuzzers,
label_by_experiment, end_time, merge_with_clobber):
"""Helper function that returns a copy of |experiment_df| that is modified
based on the other parameters. These parameters come from values specified
by the user on the command line (or callers to generate_report)."""
if benchmarks:
# Filter benchmarks if requested.
logger.debug('Filter included benchmarks: %s.', benchmarks)
experiment_df = data_utils.filter_benchmarks(experiment_df, benchmarks)
if not experiment_df['benchmark'].empty:
# Filter benchmarks in experiment DataFrame.
unique_benchmarks = experiment_df['benchmark'].unique().tolist()
logger.debug('Filter experiment_df benchmarks: %s.', unique_benchmarks)
experiment_df = data_utils.filter_benchmarks(experiment_df,
unique_benchmarks)
if fuzzers is not None:
# Filter fuzzers if requested.
experiment_df = data_utils.filter_fuzzers(experiment_df, fuzzers)
if label_by_experiment:
# Label each fuzzer by the experiment it came from to easily compare the
# same fuzzer accross multiple experiments.
experiment_df = data_utils.label_fuzzers_by_experiment(experiment_df)
if end_time is not None:
# Cut off the experiment at a specific time if requested.
experiment_df = data_utils.filter_max_time(experiment_df, end_time)
if merge_with_clobber:
# Merge with clobber if requested.
experiment_df = data_utils.clobber_experiments_data(
experiment_df, experiment_names)
return experiment_df
# pylint: disable=too-many-arguments,too-many-locals
def generate_report(experiment_names,
report_directory,
report_name=None,
label_by_experiment=False,
benchmarks=None,
fuzzers=None,
report_type='default',
quick=False,
log_scale=False,
from_cached_data=False,
in_progress=False,
end_time=None,
merge_with_clobber=False,
merge_with_clobber_nonprivate=False,
coverage_report=False,
experiment_benchmarks=None):
"""Generate report helper."""
if merge_with_clobber_nonprivate:
experiment_names = (
queries.add_nonprivate_experiments_for_merge_with_clobber(
experiment_names))
merge_with_clobber = True
main_experiment_name = experiment_names[0]
report_name = report_name or main_experiment_name
filesystem.create_directory(report_directory)
data_path = os.path.join(report_directory, DATA_FILENAME)
experiment_df, experiment_description = get_experiment_data(
experiment_names,
main_experiment_name,
from_cached_data,
data_path,
main_experiment_benchmarks=experiment_benchmarks)
# TODO(metzman): Ensure that each experiment is in the df. Otherwise there
# is a good chance user misspelled something.
data_utils.validate_data(experiment_df)
experiment_df = modify_experiment_data_if_requested(
experiment_df, experiment_names, benchmarks, fuzzers,
label_by_experiment, end_time, merge_with_clobber)
# Add |bugs_covered| column prior to export.
experiment_df = data_utils.add_bugs_covered_column(experiment_df)
# Save the filtered raw data along with the report if not using cached data
# or if the data does not exist.
if not from_cached_data or not os.path.exists(data_path):
experiment_df.to_csv(data_path)
# Load the coverage json summary file.
coverage_dict = {}
if coverage_report:
logger.info('Generating coverage report info.')
coverage_dict = coverage_data_utils.get_covered_branches_dict(
experiment_df)
logger.info('Finished generating coverage report info.')
fuzzer_names = experiment_df.fuzzer.unique()
plotter = plotting.Plotter(fuzzer_names, quick, log_scale)
experiment_ctx = experiment_results.ExperimentResults(
experiment_df,
coverage_dict,
report_directory,
plotter,
experiment_name=report_name)
template = report_type + '.html'
logger.info('Rendering HTML report.')
detailed_report = rendering.render_report(experiment_ctx, template,
in_progress, coverage_report,
experiment_description)
logger.info('Done rendering HTML report.')
filesystem.write(os.path.join(report_directory, 'index.html'),
detailed_report)
def main():
"""Generates report."""
logs.initialize()
parser = get_arg_parser()
args = parser.parse_args()
generate_report(experiment_names=args.experiments,
report_directory=args.report_dir,
report_name=args.report_name,
label_by_experiment=args.label_by_experiment,
benchmarks=args.benchmarks,
fuzzers=args.fuzzers,
report_type=args.report_type,
quick=args.quick,
log_scale=args.log_scale,
from_cached_data=args.from_cached_data,
end_time=args.end_time,
merge_with_clobber=args.merge_with_clobber,
coverage_report=args.coverage_report)
if __name__ == '__main__':
sys.exit(main())