forked from breakwa11/gfw_whitelist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlist_gfw.py
91 lines (80 loc) · 1.8 KB
/
list_gfw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/python
# -*- coding: utf-8 -*-
import urlparse
import logging
import copy
from blacklist import gfwcustom
__all__ = ['main']
def decode_gfwlist(content):
# decode base64 if have to
try:
if '.' in content:
raise
return content.decode('base64')
except:
return content
def get_hostname(something):
try:
# quite enough for GFW
if not something.startswith('http:'):
something = 'http://' + something
r = urlparse.urlparse(something)
return r.hostname
except Exception as e:
logging.error(e)
return None
def parse_gfwlist(content):
gfwlist = content.splitlines(False)
rules = list()
for line in gfwlist:
if line.startswith('!'):
continue
elif line.startswith('['):
continue
elif len(line.strip(' ')) <= 0:
continue
else:
rules.append(line)
return rules
def obfs(url):
ret = ''
index = 0
for c in url:
if index > 0 and ( c == '.' or (index % 7) == 3 ):
last = ord(ret[-1])
if last < 64:
ret = "%s\\%o" % (ret[:-1], last)
else:
ret = "%s\\x%x" % (ret[:-1], last)
ret += c
index += 1
return ret
def obfs_list(list_result):
ret = set()
for item in list_result:
ret.add( obfs(item) )
return ret
def get_all_list(lists):
result = list()
key_comma = ''
for key in lists:
if key.startswith("@@"):
result.append('%s"%s"\n' % (key_comma, key ) )
else:
result.append('%s"%s"\n' % (key_comma, obfs(key) ) )
key_comma = ','
return result
def final_list():
with open('gfwlist.txt', 'r') as f:
content = f.read()
content = decode_gfwlist(content)
#with open('gfwlist_ogn.txt', 'w') as f:
# f.write(content)
domains = parse_gfwlist(content)
gfwlist = list(gfwcustom.getlist())
gfwlist.remove("")
domains += list(gfwlist)
list_result = get_all_list(domains)
content = ''.join(list_result)
content = '[' + content + "]"
return content