This repository has been archived by the owner on Nov 19, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
preprocessing.py
153 lines (124 loc) · 4.37 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
import re
import shutil
import tempfile
from pathlib import Path
from typing import Iterable, Optional
from ccbuilder import Builder
import utils
"""
Functions to preprocess code for creduce.
See creduce --help to see what it wants.
"""
class PreprocessError(Exception):
pass
def find_marker_decl_range(lines: list[str], marker_prefix: str) -> tuple[int, int]:
p = re.compile(rf"void {marker_prefix}(.*)\(void\);")
first = 0
for i, line in enumerate(lines):
if p.match(line):
first = i
break
last = first + 1
for i, line in enumerate(lines[first + 1 :], start=first + 1):
if p.match(line):
continue
else:
last = i
break
return first, last
def find_platform_main_end(lines: Iterable[str]) -> Optional[int]:
p = re.compile(r".*platform_main_end.*")
for i, line in enumerate(lines):
if p.match(line):
return i
return None
def remove_platform_main_begin(lines: Iterable[str]) -> list[str]:
p = re.compile(r".*platform_main_begin.*")
return [line for line in lines if not p.match(line)]
def remove_print_hash_value(lines: Iterable[str]) -> list[str]:
p = re.compile(r".*print_hash_value = 1.*")
return [line for line in lines if not p.match(line)]
def preprocess_lines(lines: list[str]) -> str:
start_patterns = [
re.compile(r"^extern.*"),
re.compile(r"^typedef.*"),
re.compile(r"^struct.*"),
# The following patterns are to catch if the last of the previous
# patterns in the file was tainted and we'd otherwise mark the rest
# of the file as tainted, as we'll find no end in this case.
re.compile(r"^static.*"),
re.compile(r"^void.*"),
]
taint_patterns = [
re.compile(r".*__access__.*"), # LLVM doesn't know about this
re.compile(r".*__malloc__.*"),
re.compile(
r".*_[F|f]loat[0-9]{1,3}x{0,1}.*"
), # https://gcc.gnu.org/onlinedocs/gcc/Floating-Types.html#Floating-Types
re.compile(r".*__asm__.*"), # CompCert has problems
]
def is_start(l: str) -> bool:
return any([p_start.match(l) for p_start in start_patterns])
lines_to_skip: list[int] = []
for i, line in enumerate(lines):
for p in taint_patterns:
if p.match(line):
# Searching for start of tainted region
up_i = i
up_line = lines[up_i]
while up_i > 0 and not is_start(up_line):
up_i -= 1
up_line = lines[up_i]
# Searching for end of tainted region
down_i = i + 1
down_line = lines[down_i]
while down_i < len(lines) and not is_start(down_line):
down_i += 1
down_line = lines[down_i]
lines_to_skip.extend(list(range(up_i, down_i)))
return "\n".join([line for i, line in enumerate(lines) if i not in lines_to_skip])
def preprocess_csmith_file(
path: os.PathLike[str],
marker_prefix: str,
compiler_setting: utils.CompilerSetting,
bldr: Builder,
) -> str:
with tempfile.NamedTemporaryFile(suffix=".c") as tf:
shutil.copy(path, tf.name)
additional_flags = (
[]
if compiler_setting.additional_flags is None
else compiler_setting.additional_flags
)
cmd = [
str(utils.get_compiler_executable(compiler_setting, bldr)),
tf.name,
"-P",
"-E",
] + additional_flags
lines = utils.run_cmd(cmd).split("\n")
return preprocess_lines(lines)
def preprocess_csmith_code(
code: str,
marker_prefix: str,
compiler_setting: utils.CompilerSetting,
bldr: Builder,
) -> Optional[str]:
"""Will *try* to preprocess code as if it comes from csmith.
Args:
code (str): code to preprocess
marker_prefix (str): Marker prefix
compiler_setting (utils.CompilerSetting): Setting to preprocess with
bldr (builder.Builder):
Returns:
Optional[str]: preprocessed code if it was able to preprocess it.
"""
tf = utils.save_to_tmp_file(code)
try:
res = preprocess_csmith_file(
Path(tf.name), marker_prefix, compiler_setting, bldr
)
return res
except PreprocessError:
return None