-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
Copy pathfuzz_target.py
408 lines (339 loc) · 16.2 KB
/
fuzz_target.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A module to handle running a fuzz target for a specified amount of time."""
import collections
import logging
import multiprocessing
import os
import shutil
import stat
import tempfile
from typing import Optional
import clusterfuzz.environment
import clusterfuzz.fuzz
import config_utils
import logs
logs.init()
# Use len_control=0 since we don't have enough time fuzzing for len_control to
# make sense (probably).
LIBFUZZER_OPTIONS_BATCH = ['-len_control=0']
# Use a fixed seed for determinism for code change fuzzing.
LIBFUZZER_OPTIONS_CODE_CHANGE = LIBFUZZER_OPTIONS_BATCH + ['-seed=1337']
LIBFUZZER_OPTIONS_NO_REPORT_OOM = ['-rss_limit_mb=0']
# The number of reproduce attempts for a crash.
REPRODUCE_ATTEMPTS = 10
DEFAULT_REPRODUCE_TIME_SECONDS = 30
PER_LANGUAGE_REPRODUCE_TIMEOUTS = {
'python': 30 * 4 # Python takes a bit longer on startup.
}
MINIMIZE_TIME_SECONDS = 60 * 4
# Seconds on top of duration until a timeout error is raised.
BUFFER_TIME = 10
# Log message if we can't check if crash reproduces on an recent build.
COULD_NOT_TEST_ON_CLUSTERFUZZ_MESSAGE = (
'Could not run previous build of target to determine if this code change '
'(pr/commit) introduced crash. Assuming crash was newly introduced.')
FuzzResult = collections.namedtuple('FuzzResult',
['testcase', 'stacktrace', 'corpus_path'])
def get_libfuzzer_parallel_options():
"""Returns a list containing options to pass to libFuzzer to fuzz using all
available cores."""
return ['-jobs=' + str(multiprocessing.cpu_count())]
class ReproduceError(Exception):
"""Error for when we can't attempt to reproduce a crash."""
def get_fuzz_target_corpus_dir(workspace, target_name):
"""Returns the directory for storing |target_name|'s corpus in |workspace|."""
return os.path.join(workspace.corpora, target_name)
def get_fuzz_target_pruned_corpus_dir(workspace, target_name):
"""Returns the directory for storing |target_name|'s puned corpus in
|workspace|."""
return os.path.join(workspace.pruned_corpora, target_name)
class FuzzTarget: # pylint: disable=too-many-instance-attributes
"""A class to manage a single fuzz target.
Attributes:
target_name: The name of the fuzz target.
duration: The length of time in seconds that the target should run.
target_path: The location of the fuzz target binary.
workspace: The workspace for storing things related to fuzzing.
"""
# pylint: disable=too-many-arguments
def __init__(self, target_path, duration, workspace, clusterfuzz_deployment,
config):
"""Represents a single fuzz target.
Args:
target_path: The location of the fuzz target binary.
duration: The length of time in seconds the target should run.
workspace: The path used for storing things needed for fuzzing.
clusterfuzz_deployment: The object representing the ClusterFuzz
deployment.
config: The config of this project.
"""
self.target_path = target_path
self.target_name = os.path.basename(self.target_path)
self.duration = int(duration)
self.workspace = workspace
self.clusterfuzz_deployment = clusterfuzz_deployment
self.config = config
self.latest_corpus_path = get_fuzz_target_corpus_dir(
self.workspace, self.target_name)
os.makedirs(self.latest_corpus_path, exist_ok=True)
self.pruned_corpus_path = get_fuzz_target_pruned_corpus_dir(
self.workspace, self.target_name)
os.makedirs(self.pruned_corpus_path, exist_ok=True)
def _download_corpus(self):
"""Downloads the corpus for the target from ClusterFuzz and returns the path
to the corpus. An empty directory is provided if the corpus can't be
downloaded or is empty."""
self.clusterfuzz_deployment.download_corpus(self.target_name,
self.latest_corpus_path)
return self.latest_corpus_path
def _target_artifact_path(self):
"""Target artifact path."""
artifact_path = os.path.join(self.workspace.artifacts, self.target_name,
self.config.sanitizer)
os.makedirs(artifact_path, exist_ok=True)
return artifact_path
def _save_crash(self, crash):
"""Add stacktraces to crashes."""
target_reproducer_path = os.path.join(self._target_artifact_path(),
os.path.basename(crash.input_path))
shutil.copy(crash.input_path, target_reproducer_path)
bug_summary_artifact_path = target_reproducer_path + '.summary'
with open(bug_summary_artifact_path, 'w') as handle:
handle.write(crash.stacktrace)
# Set permissions of testcase to be the same as summary so that we're sure
# it can be read by necessary users.
permissions_mode = os.stat(bug_summary_artifact_path).st_mode
os.chmod(target_reproducer_path, permissions_mode & 0o777)
return target_reproducer_path
def prune(self):
"""Prunes the corpus and returns the result."""
self._download_corpus()
with clusterfuzz.environment.Environment(config_utils.DEFAULT_ENGINE,
self.config.sanitizer,
self.target_path):
engine_impl = clusterfuzz.fuzz.get_engine(config_utils.DEFAULT_ENGINE)
result = engine_impl.minimize_corpus(self.target_path, [],
[self.latest_corpus_path],
self.pruned_corpus_path,
self._target_artifact_path(),
self.duration)
print(result.logs)
return FuzzResult(None, result.logs, self.pruned_corpus_path)
def fuzz(self, batch=False) -> Optional[FuzzResult]:
"""Starts the fuzz target run for the length of time specified by duration.
Returns:
FuzzResult namedtuple with stacktrace and testcase if applicable.
"""
logging.info('Running fuzzer: %s.', self.target_name)
self._download_corpus()
corpus_path = self.latest_corpus_path
logging.info('Starting fuzzing')
with tempfile.TemporaryDirectory() as artifacts_dir:
with clusterfuzz.environment.Environment(config_utils.DEFAULT_ENGINE,
self.config.sanitizer,
self.target_path) as env:
engine_impl = clusterfuzz.fuzz.get_engine(config_utils.DEFAULT_ENGINE)
options = engine_impl.prepare(corpus_path, env.target_path,
env.build_dir)
options.merge_back_new_testcases = False
options.analyze_dictionary = False
if batch:
options.arguments.extend(LIBFUZZER_OPTIONS_BATCH)
else:
options.arguments.extend(LIBFUZZER_OPTIONS_CODE_CHANGE)
if not self.config.report_ooms:
options.arguments.extend(LIBFUZZER_OPTIONS_NO_REPORT_OOM)
if self.config.parallel_fuzzing:
if self.config.sanitizer == 'memory':
# TODO(https://github.com/google/oss-fuzz/issues/11915): Don't gate
# this after jobs is fixed for MSAN.
logging.info('Not using jobs because it breaks MSAN.')
else:
options.arguments.extend(get_libfuzzer_parallel_options())
result = engine_impl.fuzz(self.target_path, options, artifacts_dir,
self.duration)
print(f'Fuzzing logs:\n{result.logs}')
if not result.crashes:
# Libfuzzer max time was reached.
logging.info('Fuzzer %s finished with no crashes discovered.',
self.target_name)
return FuzzResult(None, None, self.latest_corpus_path)
if result.timed_out:
logging.info('Not reporting crash in %s because process timed out.',
self.target_name)
return FuzzResult(None, None, self.latest_corpus_path)
# Only report first crash.
crash = result.crashes[0]
logging.info('Fuzzer: %s. Detected bug.', self.target_name)
is_reportable = self.is_crash_reportable(crash.input_path,
crash.reproduce_args,
batch=batch)
if is_reportable or self.config.upload_all_crashes:
logging.info('SAVING CRASH')
fuzzer_logs = result.logs
testcase_path = self._save_crash(crash)
if is_reportable and self.config.minimize_crashes:
# TODO(metzman): We don't want to minimize unreproducible crashes.
# Use is_reportable to decide this even though reportable crashes
# are a subset of reproducible ones.
self.minimize_testcase(testcase_path)
else:
logging.info('NOT MINIMIZED')
else:
fuzzer_logs = None
testcase_path = None
return FuzzResult(testcase_path, fuzzer_logs, self.latest_corpus_path)
def minimize_testcase(self, testcase_path):
"""Minimizes the testcase located at |testcase_path|."""
with clusterfuzz.environment.Environment(config_utils.DEFAULT_ENGINE,
self.config.sanitizer,
self.target_path):
engine_impl = clusterfuzz.fuzz.get_engine(config_utils.DEFAULT_ENGINE)
minimized_testcase_path = testcase_path + '-minimized'
return engine_impl.minimize_testcase(self.target_path, [],
testcase_path,
minimized_testcase_path,
max_time=MINIMIZE_TIME_SECONDS)
def free_disk_if_needed(self, delete_fuzz_target=True):
"""Deletes things that are no longer needed from fuzzing this fuzz target to
save disk space if needed."""
if not self.config.low_disk_space:
logging.info('Not freeing disk space after running fuzz target.')
return
logging.info('Deleting corpus and seed corpus of %s to save disk.',
self.target_name)
# Delete the seed corpus, corpus, and fuzz target.
for corpus_path in [self.latest_corpus_path, self.pruned_corpus_path]:
# Use ignore_errors=True to fix
# https://github.com/google/oss-fuzz/issues/5383.
shutil.rmtree(corpus_path, ignore_errors=True)
target_seed_corpus_path = self.target_path + '_seed_corpus.zip'
if os.path.exists(target_seed_corpus_path):
os.remove(target_seed_corpus_path)
if delete_fuzz_target:
logging.info('Deleting fuzz target: %s.', self.target_name)
os.remove(self.target_path)
logging.info('Done deleting.')
def is_reproducible(self, testcase, target_path, reproduce_args):
"""Checks if the testcase reproduces.
Args:
testcase: The path to the testcase to be tested.
target_path: The path to the fuzz target to be tested
reproduce_args: The arguments to pass to the target to reproduce the
crash.
Returns:
True if crash is reproducible and we were able to run the
binary.
Raises:
ReproduceError if we can't attempt to reproduce the crash.
"""
if not os.path.exists(target_path):
logging.info('Target: %s does not exist.', target_path)
raise ReproduceError(f'Target {target_path} not found.')
os.chmod(target_path, stat.S_IRWXO)
logging.info('Trying to reproduce crash using: %s.', testcase)
with clusterfuzz.environment.Environment(config_utils.DEFAULT_ENGINE,
self.config.sanitizer,
target_path):
reproduce_time_seconds = PER_LANGUAGE_REPRODUCE_TIMEOUTS.get(
self.config.language, DEFAULT_REPRODUCE_TIME_SECONDS)
for _ in range(REPRODUCE_ATTEMPTS):
engine_impl = clusterfuzz.fuzz.get_engine(config_utils.DEFAULT_ENGINE)
try:
result = engine_impl.reproduce(target_path,
testcase,
arguments=reproduce_args,
max_time=reproduce_time_seconds)
except TimeoutError as error:
logging.error('%s.', error)
return False
if result.return_code != 0:
logging.info('Reproduce command returned: %s. Reproducible on %s.',
result.return_code, target_path)
return True
logging.info('Reproduce command returned: 0. Not reproducible on %s.',
target_path)
return False
def is_crash_reportable(self, testcase, reproduce_args, batch=False):
"""Returns True if a crash is reportable. This means the crash is
reproducible but not reproducible on a build from the ClusterFuzz deployment
(meaning the crash was introduced by this PR/commit/code change).
Args:
testcase: The path to the testcase that triggered the crash.
reproduce_args: The arguments to pass to the target to reproduce the
crash.
Returns:
True if the crash was introduced by the current pull request.
Raises:
ReproduceError if we can't attempt to reproduce the crash on the PR build.
"""
if not self.is_crash_type_reportable(testcase):
return False
if not os.path.exists(testcase):
raise ReproduceError(f'Testcase {testcase} not found.')
try:
reproducible_on_code_change = self.is_reproducible(
testcase, self.target_path, reproduce_args)
except ReproduceError as error:
logging.error('Could not check for crash reproducibility.'
'Please file an issue:'
'https://github.com/google/oss-fuzz/issues/new.')
raise error
if not reproducible_on_code_change:
logging.info('Crash is not reproducible.')
return self.config.report_unreproducible_crashes
logging.info('Crash is reproducible.')
if batch:
# We don't need to check if the crash is novel for batch fuzzing.
return True
return self.is_crash_novel(testcase, reproduce_args)
def is_crash_type_reportable(self, testcase):
"""Returns True if |testcase| is an actual crash. If crash is a timeout or
OOM then returns True if config says we should report those."""
# TODO(metzman): Use a less hacky method.
testcase = os.path.basename(testcase)
if testcase.startswith('oom-'):
return self.config.report_ooms
if testcase.startswith('timeout-'):
return self.config.report_timeouts
return True
def is_crash_novel(self, testcase, reproduce_args):
"""Returns whether or not the crash is new. A crash is considered new if it
can't be reproduced on an older ClusterFuzz build of the target."""
if not os.path.exists(testcase):
raise ReproduceError('Testcase %s not found.' % testcase)
clusterfuzz_build_dir = self.clusterfuzz_deployment.download_latest_build()
if not clusterfuzz_build_dir:
# Crash is reproducible on PR build and we can't test on a recent
# ClusterFuzz/OSS-Fuzz build.
logging.info(COULD_NOT_TEST_ON_CLUSTERFUZZ_MESSAGE)
return True
clusterfuzz_target_path = os.path.join(clusterfuzz_build_dir,
self.target_name)
try:
reproducible_on_clusterfuzz_build = self.is_reproducible(
testcase, clusterfuzz_target_path, reproduce_args)
except ReproduceError:
# This happens if the project has ClusterFuzz builds, but the fuzz target
# is not in it (e.g. because the fuzz target is new).
logging.info(COULD_NOT_TEST_ON_CLUSTERFUZZ_MESSAGE)
return True
if reproducible_on_clusterfuzz_build:
logging.info('The crash is reproducible on previous build. '
'Code change (pr/commit) did not introduce crash.')
return False
logging.info('The crash is not reproducible on previous build. '
'Code change (pr/commit) introduced crash.')
return True