-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathooxx.py
executable file
·98 lines (80 loc) · 2.82 KB
/
ooxx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
'''
author : haipz
site : haipz.com
email : [email protected]
modified by TC
oo xx indexes are included.
'''
import urllib2, os, re, thread, time
def getHtml(url) :
hdr = {'User-Agent':'Mozilla/5.0'}
req = urllib2.Request(url, headers=hdr)
page = urllib2.urlopen(req)
html = page.read()
return html
def filterComment(source) :
pattern = ur'begin comments([\s\S]*?)end comments'
matchs = re.search(pattern, source)
return matchs.group()
def filterThumbnail(source) :
pattern = ur'<img src="([\s\S]*?)\.gif"'
reobj = re.compile(pattern)
result, number = reobj.subn('', source)
return result
def downloadPicture(picurl, picpath, picname) :
pic = urllib2.urlopen(picurl)
f = open(picpath + picname, 'wb')
f.write(pic.read())
f.close()
print 'System: ' + picname + ' saved\n'
choice = int(input("0. ooxx 1. pic : "))
pagestart = int(input("page start: "))
pageend = int(input("page end: "))
ooover = int(input("at least how many oo: "))
xxbelow = int(input("at most how many xx: "))
if choice == 0 :
dirname = "ooxx"
else :
dirname = "pic"
path = os.getcwd() + "/" + dirname
isExists = os.path.exists(path)
if not isExists :
print 'System: ' + path + " created"
os.makedirs(path)
else :
print 'System: ' + path + " exists"
initurl = "http://jandan.net/" + dirname + "/"
for pagenum in range(pagestart, pageend) :
cururl = initurl + "page-" + str(pagenum)
print 'Current url: ' + cururl
inithtml = getHtml(cururl)
curhtml = filterComment(inithtml)
pattern = ur'<li id="comment-([\s\S]*?)</li>'
reobj = re.compile(pattern)
matchs = reobj.findall(curhtml)
count0 = 0
for match in matchs :
count0 = count0 + 1
match = filterThumbnail(match)
oopattern = ur'(?:<span id="cos_support-)(?:\d*?)(?:">)(\d*?)(?:</span>)'
xxpattern = ur'(?:<span id="cos_unsupport-)(?:\d*?)(?:">)(\d*?)(?:</span>)'
oo = re.search(oopattern, match).group(1)
xx = re.search(xxpattern, match).group(1)
picpattern = ur'(?:href=")(http\:\/\/w[\s\S]*?)(.jpg|.png|.gif)'
picobj = re.compile(picpattern)
result = picobj.findall(match)
count1 = 0
for pic in result :
if int(oo) > ooover and int(xx) < xxbelow:
count1 = count1 + 1
picurl = pic[0] + pic[1]
picpath = path + '/'
picname = str(pagenum) + '_oo' + oo + '_xx' + xx + '_' + str(count0) + '_' + str(count1) + pic[1]
print 'Infomation:'
print 'Picture url: ' + picurl
print 'Picture path: ' + picpath
print 'Picture name: ' + picname
try :
downloadPicture(picurl, picpath, picname)
except Exception as e :
print(e)