-
Notifications
You must be signed in to change notification settings - Fork 15
/
collect.py
103 lines (95 loc) · 3.95 KB
/
collect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python3
from bs4 import BeautifulSoup
import os
import requests
import re
import multiprocessing
import time
'''
Query https://syzkaller.appspot.com/upstream for all bugs against upstream kernel and have "C" and "syz" reproducers
Save reproducers to text files
'''
def rate_limited_get(url, delay=1.0):
"""A simplified rate limiter for get requests in a multiprocessing environment."""
global last_request_time
while True:
with request_lock:
now = time.time()
elapsed = now - last_request_time.value
if elapsed >= delay:
last_request_time.value = time.time()
break
time.sleep(delay - elapsed) # Sleep the remaining time if any
return requests.get(url)
def init_worker(lock, last_time):
"""Initialize global variables on the worker process."""
global request_lock, last_request_time
request_lock = lock
last_request_time = last_time
def get_reproducers(bug):
# get the id from the bug
bug_id = bug.split("=")[1]
existing_files = [f for f in os.listdir("files/") if f.startswith(bug_id)]
if existing_files:
print(f"Files for bug {bug_id} already exist. Skipping...")
return
page = rate_limited_get("https://syzkaller.appspot.com" + bug)
soup = BeautifulSoup(page.content, 'html.parser')
# parse last table in page that has class "list_table"
try:
table = soup.find_all('table', class_="list_table")[-1]
except IndexError:
print("No reproducers for bug : ",bug)
return
# find td that has text "syz", only one
td = table.find_all('td', text="syz")
for entry in td:
# get the href of the link
link = entry.find('a').get('href')
page = rate_limited_get("https://syzkaller.appspot.com" + link)
# bug = files/bug?id=17ee94193810ddc5d820094d4e509d47ad5bf6bc
# link = /text?tag=ReproSyz&x=1789e141d00000
# get the x from the link
x = re.search(r'x=(.*)', link).group(1)
print("Saving bug " + bug_id + " with x " + x)
with open("files/" + bug_id + "-" + x + ".txt", 'w+') as f:
f.write(page.text)
def main():
# Query the page
bugs = []
page = requests.get("https://syzkaller.appspot.com/upstream/")
soup = BeautifulSoup(page.content, 'html.parser')
# parse table rows
rows = soup.find_all('tr')
for row in rows:
# print row with class as "title" and first "stat"
title = row.find_all('td', class_="title")
stat = row.find_all('td', class_="stat")
# if title and stat exist
if title and stat:
# check if stat[0] contains "C" in "td"
if "C" in stat[0] or "syz" in stat[0]:
print(title[0].find('a').get('href'))
bugs.append(title[0].find('a').get('href'))
page = requests.get("https://syzkaller.appspot.com/upstream/fixed")
soup = BeautifulSoup(page.content, 'html.parser')
# parse table rows
rows = soup.find_all('tr')
for row in rows:
# print row with class as "title" and first "stat"
title = row.find_all('td', class_="title")
stat = row.find_all('td', class_="stat")
# if title and stat exist
if title and stat:
# check if stat[0] contains "C" in "td"
if "C" in stat[0] or "syz" in stat[0]:
print(title[0].find('a').get('href'))
bugs.append(title[0].find('a').get('href'))
# for each bug, get the reproducers from "https://syzkaller.appspot.com/$bug"
# run the following code in 15 parallel processes to speed up
lock = multiprocessing.Lock()
last_time = multiprocessing.Value('d', time.time())
pool = multiprocessing.Pool(15, initializer=init_worker, initargs=(lock, last_time))
pool.map(get_reproducers, bugs)
if __name__ == "__main__":
main()