forked from KeaMedes/Cimoc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcategory.py
95 lines (75 loc) · 3.07 KB
/
category.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import requests
import bs4
import re
import urllib.request
def ikanman():
response = requests.get('http://www.ikanman.com/list/')
soup = bs4.BeautifulSoup(response.content.decode('utf-8'), 'lxml')
result = []
for node in soup.select('div.filter-nav > div.filter > ul > li > a'):
result.append((node.get_text(), node.get('href')[6:-1]))
return result
def chuiyao():
response = requests.get('http://m.chuiyao.com/')
soup = bs4.BeautifulSoup(response.content.decode('utf-8'), 'lxml')
result = []
count = 0
for node in soup.select('ul.cat-list > li > a'):
if count == 0:
result.append((node.get_text(), ''))
result.append(('最近更新', '0'))
else:
result.append((node.get_text(), str(count)))
count += 1
return result
def cctuku():
response = requests.get('http://m.tuku.cc/')
soup = bs4.BeautifulSoup(response.text, 'lxml')
result = []
for node in soup.select('ul.pp > li > a[href*=/list/list]'):
if node.get_text():
result.append((node.get_text(), node.get('href').split('_')[1]))
return result
def dmzj():
response = requests.get('http://m.dmzj.com/classify.html')
soup = bs4.BeautifulSoup(response.text, 'lxml')
result = []
for node in soup.select('#classCon > ul > li > a'):
if node.get_text():
result.append((node.get_text(), re.split('\\D+', node.get('onclick'))[2]))
return result
def mh57():
response = requests.get('http://www.57mh.com/list/')
soup = bs4.BeautifulSoup(response.content.decode('utf-8'), 'lxml')
result = []
for node in soup.select('div.filter-nav > div.filter > ul > li > a'):
result.append((node.get_text(), urllib.request.unquote(node.get('href').split('-')[1])))
return result
def hhssee():
response = requests.get('http://www.hhssee.com/')
soup = bs4.BeautifulSoup(response.text, 'lxml')
result = [("全部", "")]
for node in soup.select('#iHBG > div.cHNav > div > span > a'):
result.append((node.get_text(), re.split('_|\\.', node.get('href'))[1]))
return result
def dm5():
response = requests.get('http://www.dm5.com/manhua-latest/')
soup = bs4.BeautifulSoup(response.text, 'lxml')
result = [("全部", "")]
for node in soup.select('#index_left > div.inkk > div.syzm > span.new_span_bak > a'):
result.append((node.get_text(), node.get('href').split('-')[2][2:-1]))
return result
def u17():
response = requests.get('http://www.u17.com/comic_list/th99_gr99_ca99_ss0_ob0_ac0_as0_wm0_co99_ct99_p1.html')
soup = bs4.BeautifulSoup(response.text, 'lxml')
result = [("全部", "th99")]
for node in soup.select('#select > div.fr > dl.subject > dd > a'):
result.append((node.get_text(), node.get('href').split('/')[-1].split('_')[0]))
return result
def build(func):
print('List<Pair<String, String>> list = new ArrayList<>();')
for t in func():
print('list.add(Pair.create("{}", "{}"));'.format(t[0], t[1]))
print('return list;')
if __name__ == '__main__':
build(u17)