-
Notifications
You must be signed in to change notification settings - Fork 1
/
proxypool.py
executable file
·74 lines (63 loc) · 2.54 KB
/
proxypool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
# coding:utf-8
from gevent import monkey
monkey.patch_all()
import datetime
import time
import threading
from logger import logger
from DB import DatabaseObject
from config import DB_CONFIG, PROXYPOOL_CONFIG, API_CONFIG
from crawler import Crawler
from validator import Validator
from api import ProxyServer
class ProxyPool:
def __init__(self):
self.sqlite = DatabaseObject(DB_CONFIG['SQLITE'])
self.Validator = Validator()
self.Crawler = Crawler()
def _monitor(self):
while True:
self._update(PROXYPOOL_CONFIG['UPDATE_TIME'])
self._delete(PROXYPOOL_CONFIG['DELETE_TIME'])
self._crawl(PROXYPOOL_CONFIG['CRAWL_TIME'])
time.sleep(1800)
def _crawl(self, minutes):
query = 'SELECT COUNT(*) FROM proxy WHERE updatetime>\'%s\'' % (
(datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
count = self.sqlite.executesql(query)[0]
if int(count[0]) < PROXYPOOL_CONFIG['MIN_IP_NUM']:
logger.info('Crawl proxy begin')
proxies = self.Crawler.run()
logger.info('Crawl proxy end')
logger.info('Validate proxy begin')
avaliable_proxies = self.Validator.run(proxies)
logger.info('Validate proxy end')
if DB_CONFIG['SQLITE']:
self.save2sqlite(avaliable_proxies)
def _delete(self, minutes):
query = 'DELETE FROM proxy WHERE updatetime<\'%s\'' % (
(datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
self.sqlite.executesql(query)
def _update(self, minutes):
query = 'SELECT ip,port FROM proxy WHERE updatetime<\'%s\'' % (
(datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'))
proxies = ['%s:%s' % n for n in self.sqlite.executesql(query)]
if proxies:
avaliable_proxies = self.Validator.run(proxies)
self.save2sqlite(avaliable_proxies)
def save2sqlite(self, result):
failed = self.sqlite.insert('proxy', result)
if failed:
failed = self.sqlite.update('proxy', failed)
if failed:
logger.info('Some ip failed to save: %s' % (str(failed)))
def _api(self):
ProxyServer(API_CONFIG['PORT'])
def run(self):
t1 = threading.Thread(target=self._api)
t2 = threading.Thread(target=self._monitor)
t1.start()
t2.start()
if __name__ == '__main__':
ProxyPool().run()