-
Notifications
You must be signed in to change notification settings - Fork 1
/
website_hitter_machanize.py
145 lines (124 loc) · 4.05 KB
/
website_hitter_machanize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import cookielib, random, urllib, re, sys, sqlite3, datetime, os, urllib2, socket, multiprocessing, time, Queue, threading, mechanize
from BeautifulSoup import BeautifulSoup as Soup
table_name = 'proxies'
location= 'proxies.db'
queue = Queue.Queue()
output = []
useragents =[]
website="http://javaongsan.github.com/"
Referer="http://yahoo.com"
i=0
def init():
global conn
global c
conn = sqlite3.connect(location)
c = conn.cursor()
create_database()
def create_database():
sql = 'create table if not exists ' + table_name + ' (proxy text, working text) '
c.execute(sql)
conn.commit()
def clear_database():
sql = 'drop table ' + table_name
c.execute(sql)
conn.commit()
def get_records(sql):
c.execute(sql)
rows = c.fetchall()
return rows
def get_record(sql):
c.execute(sql)
row = c.fetchone()
return row
def insert_record(proxy, working):
mylist = [proxy, working]
sql = 'INSERT OR IGNORE INTO ' + table_name + ' (proxy, working) values (?, ?)'
c.execute(sql, mylist)
conn.commit()
def bulk_insert_record(proxies):
for proxy in proxies:
mylist = [proxy, 'UNCHECK']
sql = 'INSERT OR IGNORE INTO ' + table_name + ' (proxy, working) values (?, ?)'
c.execute(sql, mylist)
conn.commit()
def update_record(proxy, working):
sql = 'Update ' + table_name + " set `working` = '"+ working +"' where proxy = '" + proxy +"'"
c.execute(sql)
conn.commit()
def bulk_update_record(proxies):
os.system('clear')
print"Update Database"
for proxy, working in proxies:
sql = 'Update ' + table_name + " set `working` = '"+ working +"' where proxy = '" + proxy +"'"
c.execute(sql)
conn.commit()
def close_database():
if conn:
conn.close()
class ThreadUrl(threading.Thread):
"""Threaded Url Grab"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
#grabs host from queue
proxy_info = self.queue.get().strip()
try:
ua=random.choice(useragents)
cj = cookielib.LWPCookieJar()
browser = mechanize.Browser()
browser.set_cookiejar(cj)
browser.set_handle_equiv(True)
browser.set_handle_gzip(True)
browser.set_handle_redirect(True)
browser.set_handle_referer(True)
browser.set_handle_robots(False)
browser.addheaders = [('User-agent',ua), ('Referer', Referer)]
browser.set_proxies({'http': proxy_info})
site = browser.open(website,timeout=100)
html = browser.read()
output.append((proxy_info, 'GOOD'))
print website + "-->"+proxy_info + ":" + "OK"
i=i+1
except:
output.append((proxy_info, 'BAD'))
print website + "-->"+proxy_info + ":" + "BAD"
self.queue.task_done()
def hitprocess():
try:
os.system('clear')
start = time.time()
sql = "SELECT proxy from proxies where `working` = 'GOOD' "
rows = get_records(sql)
if rows:
for i in range(50):
t = ThreadUrl(queue)
t.setDaemon(True)
t.start()
for row in rows:
proxy = row[0]
queue.put(proxy)
queue.join()
time_taken = time.time() - start
print "Elapsed Time: %s s" % time_taken
print website + " hitted %d" % i
else:
print 'Nothing'
except Exception, detail:
print "ERROR:", detail
def parseLog():
file = "useragentswitcher.xml"
handler = open(file).read()
soup = Soup(handler)
for message in soup.findAll('useragent'):
f_user_dict = dict(message.attrs)
useragents.append(f_user_dict[u'useragent'])
def main():
os.system('clear')
init()
parseLog()
hitprocess()
bulk_update_record(output)
if __name__ == "__main__":
main()