diff --git a/README.md b/README.md index 4ca8b70..06cf902 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -An HTTP handler for `urllib` that supports HTTP 1.1 and keepalive in both Python 2.x and 3.x versions. +An HTTP handler for `urllib` that supports HTTP 1.1 and keepalive. This repo contains a port from the very old version of [keepalive.py](http://yum.baseurl.org/gitweb?p=urlgrabber.git;a=blob;f=urlgrabber/keepalive.py;h=89ee97dfe2505ff0590e0a312e81f9daac6d9f77;hb=27878b9288fe8a6c6dc17168cdd1d288915929fe) diff --git a/keepalive/keepalive.py b/keepalive/keepalive.py index c07ef81..43787c7 100644 --- a/keepalive/keepalive.py +++ b/keepalive/keepalive.py @@ -20,15 +20,15 @@ # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko # Copyright 2015 Sergio Fernández -"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive. +"""An HTTP handler for urllib that supports HTTP 1.1 and keepalive. ->>> import urllib2 +>>> import urllib.request >>> from keepalive import HTTPHandler >>> keepalive_handler = HTTPHandler() ->>> opener = urllib2.build_opener(keepalive_handler) ->>> urllib2.install_opener(opener) +>>> opener = urllib.request.build_opener(keepalive_handler) +>>> urllib.request.install_opener(opener) >>> ->>> fo = urllib2.urlopen('http://www.python.org') +>>> fo = urllib.request.urlopen('http://www.python.org') If a connection to a given host is requested, and all of the existing connections are still in use, another connection will be opened. If @@ -58,7 +58,7 @@ Upon a status of 200, the object returned has a few additional attributes and methods, which should not be used if you want to - remain consistent with the normal urllib2-returned objects: + remain consistent with the normal urllib-returned objects: close_connection() - close the connection to the host readlines() - you know, readlines() @@ -73,55 +73,30 @@ Unfortunately, these are ONLY there if status == 200, so it's not easy to distinguish between non-200 responses. The reason is that - urllib2 tries to do clever things with error codes 301, 302, 401, + urllib tries to do clever things with error codes 301, 302, 401, and 407, and it wraps the object upon return. - For python versions earlier than 2.4, you can avoid this fancy error - handling by setting the module-level global HANDLE_ERRORS to zero. - You see, prior to 2.4, it's the HTTP Handler's job to determine what - to handle specially, and what to just pass up. HANDLE_ERRORS == 0 - means "pass everything up". In python 2.4, however, this job no - longer belongs to the HTTP Handler and is now done by a NEW handler, - HTTPErrorProcessor. Here's the bottom line: - - python version < 2.4 - HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as - errors - HANDLE_ERRORS == 0 pass everything up, error processing is - left to the calling code - python version >= 2.4 - HANDLE_ERRORS == 1 pass up 200, treat the rest as errors - HANDLE_ERRORS == 0 (default) pass everything up, let the - other handlers (specifically, - HTTPErrorProcessor) decide what to do - - In practice, setting the variable either way makes little difference - in python 2.4, so for the most consistent behavior across versions, - you probably just want to use the defaults, which will give you - exceptions on errors. - """ # $Id: keepalive.py,v 1.17 2006/12/08 00:14:16 mstenner Exp $ -import urllib2 -import httplib +import urllib.error +import urllib.request +import http.client import socket -import thread +import _thread DEBUG = None import sys -if sys.version_info < (2, 4): HANDLE_ERRORS = 1 -else: HANDLE_ERRORS = 0 - + class ConnectionManager: """ The connection manager must be able to: * keep track of all existing """ def __init__(self): - self._lock = thread.allocate_lock() + self._lock = _thread.allocate_lock() self._hostmap = {} # map hosts to a list of connections self._connmap = {} # map connections to host self._readymap = {} # map connection to ready state @@ -129,7 +104,7 @@ def __init__(self): def add(self, host, connection, ready): self._lock.acquire() try: - if not self._hostmap.has_key(host): self._hostmap[host] = [] + if host not in self._hostmap: self._hostmap[host] = [] self._hostmap[host].append(connection) self._connmap[connection] = host self._readymap[connection] = ready @@ -159,7 +134,7 @@ def get_ready_conn(self, host): conn = None self._lock.acquire() try: - if self._hostmap.has_key(host): + if host in self._hostmap: for c in self._hostmap[host]: if self._readymap[c]: self._readymap[c] = 0 @@ -183,7 +158,7 @@ def __init__(self): def open_connections(self): """return a list of connected hosts and the number of connections to each. [('foo.com:80', 2), ('bar.org', 1)]""" - return [(host, len(li)) for (host, li) in self._cm.get_all().items()] + return [(host, len(li)) for (host, li) in list(self._cm.get_all().items())] def close_connection(self, host): """close connection(s) to @@ -195,7 +170,7 @@ def close_connection(self, host): def close_all(self): """close all open connections""" - for host, conns in self._cm.get_all().items(): + for host, conns in list(self._cm.get_all().items()): for h in conns: self._cm.remove(h) h.close() @@ -213,7 +188,7 @@ def _remove_connection(self, host, connection, close=0): def do_open(self, req): host = req.host if not host: - raise urllib2.URLError('no host given') + raise urllib.error.URLError('no host given') try: h = self._cm.get_ready_conn(host) @@ -237,8 +212,8 @@ def do_open(self, req): self._cm.add(host, h, 0) self._start_transaction(h, req) r = h.getresponse() - except (socket.error, httplib.HTTPException), err: - raise urllib2.URLError(err) + except (socket.error, http.client.HTTPException) as err: + raise urllib.error.URLError(err) if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason) @@ -255,11 +230,7 @@ def do_open(self, req): r.headers = r.msg r.msg = r.reason - if r.status == 200 or not HANDLE_ERRORS: - return r - else: - return self.parent.error('http', req, r, - r.status, r.msg, r.headers) + return r def _reuse_connection(self, h, req, host): """start the transaction with a re-used connection @@ -273,7 +244,7 @@ def _reuse_connection(self, h, req, host): r = h.getresponse() # note: just because we got something back doesn't mean it # worked. We'll check the version below, too. - except (socket.error, httplib.HTTPException): + except (socket.error, http.client.HTTPException): r = None except: # adding this block just in case we've missed @@ -292,7 +263,7 @@ def _reuse_connection(self, h, req, host): raise if r is None or r.version == 9: - # httplib falls back to assuming HTTP 0.9 if it gets a + # http.client falls back to assuming HTTP 0.9 if it gets a # bad header back. This is most likely to happen if # the socket has been closed by the server since we # last used the connection. @@ -312,22 +283,22 @@ def _start_transaction(self, h, req): h.putrequest('POST', req.selector) else: h.putrequest('POST', req.get_selector()) - if not req.headers.has_key('Content-type'): + if 'Content-type' not in req.headers: h.putheader('Content-type', 'application/x-www-form-urlencoded') - if not req.headers.has_key('Content-length'): + if 'Content-length' not in req.headers: h.putheader('Content-length', '%d' % len(data)) else: if hasattr(req, 'selector'): h.putrequest('GET', req.selector) else: h.putrequest('GET', req.get_selector()) - except (socket.error, httplib.HTTPException), err: - raise urllib2.URLError(err) + except (socket.error, http.client.HTTPException) as err: + raise urllib.error.URLError(err) for args in self.parent.addheaders: h.putheader(*args) - for k, v in req.headers.items(): + for k, v in list(req.headers.items()): h.putheader(k, v) h.endheaders() if req.data: @@ -336,7 +307,7 @@ def _start_transaction(self, h, req): def _get_connection(self, host): return NotImplementedError -class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler): +class HTTPHandler(KeepAliveHandler, urllib.request.HTTPHandler): def __init__(self): KeepAliveHandler.__init__(self) @@ -346,7 +317,7 @@ def http_open(self, req): def _get_connection(self, host): return HTTPConnection(host) -class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): +class HTTPSHandler(KeepAliveHandler, urllib.request.HTTPSHandler): def __init__(self, ssl_factory=None): KeepAliveHandler.__init__(self) if not ssl_factory: @@ -364,7 +335,7 @@ def _get_connection(self, host): try: return self._ssl_factory.get_https_connection(host) except AttributeError: return HTTPSConnection(host) -class HTTPResponse(httplib.HTTPResponse): +class HTTPResponse(http.client.HTTPResponse): # we need to subclass HTTPResponse in order to # 1) add readline() and readlines() methods # 2) add close_connection() methods @@ -385,10 +356,7 @@ class HTTPResponse(httplib.HTTPResponse): def __init__(self, sock, debuglevel=0, strict=0, method=None): - if method: # the httplib in python 2.3 uses the method arg - httplib.HTTPResponse.__init__(self, sock, debuglevel, method) - else: # 2.2 doesn't - httplib.HTTPResponse.__init__(self, sock, debuglevel) + http.client.HTTPResponse.__init__(self, sock, debuglevel, method) self.fileno = sock.fileno self.code = None self._rbuf = b"" @@ -398,7 +366,7 @@ def __init__(self, sock, debuglevel=0, strict=0, method=None): self._url = None # (same) self._connection = None # (same) - _raw_read = httplib.HTTPResponse.read + _raw_read = http.client.HTTPResponse.read def close(self): if self.fp: @@ -462,11 +430,11 @@ def readlines(self, sizehint = 0): return list -class HTTPConnection(httplib.HTTPConnection): +class HTTPConnection(http.client.HTTPConnection): # use the modified response class response_class = HTTPResponse -class HTTPSConnection(httplib.HTTPSConnection): +class HTTPSConnection(http.client.HTTPSConnection): response_class = HTTPResponse ######################################################################### @@ -474,81 +442,77 @@ class HTTPSConnection(httplib.HTTPSConnection): ######################################################################### def error_handler(url): - global HANDLE_ERRORS - orig = HANDLE_ERRORS keepalive_handler = HTTPHandler() - opener = urllib2.build_opener(keepalive_handler) - urllib2.install_opener(opener) - pos = {0: 'off', 1: 'on'} - for i in (0, 1): - print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i) - HANDLE_ERRORS = i - try: - fo = urllib2.urlopen(url) - foo = fo.read() - fo.close() - try: status, reason = fo.status, fo.reason - except AttributeError: status, reason = None, None - except IOError, e: - print " EXCEPTION: %s" % e - raise - else: - print " status = %s, reason = %s" % (status, reason) - HANDLE_ERRORS = orig + opener = urllib.request.build_opener(keepalive_handler) + urllib.request.install_opener(opener) + try: + fo = urllib.request.urlopen(url) + foo = fo.read() + fo.close() + try: status, reason = fo.status, fo.reason + except AttributeError: status, reason = None, None + except IOError as e: + print(" EXCEPTION: %s" % e) + raise + else: + print(" status = %s, reason = %s" % (status, reason)) hosts = keepalive_handler.open_connections() - print "open connections:", hosts + print("open connections:", hosts) keepalive_handler.close_all() def continuity(url): - import md5 + import hashlib format = '%25s: %s' # first fetch the file with the normal http handler - opener = urllib2.build_opener() - urllib2.install_opener(opener) - fo = urllib2.urlopen(url) + opener = urllib.request.build_opener() + urllib.request.install_opener(opener) + fo = urllib.request.urlopen(url) foo = fo.read() fo.close() - m = md5.new(foo) - print format % ('normal urllib', m.hexdigest()) + m = hashlib.md5() + m.update(foo) + print(format % ('normal urllib', m.hexdigest())) # now install the keepalive handler and try again - opener = urllib2.build_opener(HTTPHandler()) - urllib2.install_opener(opener) + opener = urllib.request.build_opener(HTTPHandler()) + urllib.request.install_opener(opener) - fo = urllib2.urlopen(url) + fo = urllib.request.urlopen(url) foo = fo.read() fo.close() - m = md5.new(foo) - print format % ('keepalive read', m.hexdigest()) + m = hashlib.md5() + m.update(foo) + print(format % ('keepalive read', m.hexdigest())) - fo = urllib2.urlopen(url) - foo = '' + fo = urllib.request.urlopen(url) + foo = b'' while 1: f = fo.readline() if f: foo = foo + f else: break fo.close() - m = md5.new(foo) - print format % ('keepalive readline', m.hexdigest()) + m = hashlib.md5() + m.update(foo) + print(format % ('keepalive readline', m.hexdigest())) def comp(N, url): - print ' making %i connections to:\n %s' % (N, url) + print(' making %i connections to:\n %s' % (N, url)) sys.stdout.write(' first using the normal urllib handlers') # first use normal opener - opener = urllib2.build_opener() - urllib2.install_opener(opener) + opener = urllib.request.build_opener() + urllib.request.install_opener(opener) t1 = fetch(N, url) - print ' TIME: %.3f s' % t1 + print(' TIME: %.3f s' % t1) sys.stdout.write(' now using the keepalive handler ') # now install the keepalive handler and try again - opener = urllib2.build_opener(HTTPHandler()) - urllib2.install_opener(opener) + opener = urllib.request.build_opener(HTTPHandler()) + urllib.request.install_opener(opener) t2 = fetch(N, url) - print ' TIME: %.3f s' % t2 - print ' improvement factor: %.2f' % (t1/t2, ) + print(' TIME: %.3f s' % t2) + print(' improvement factor: %.2f' % (t1/t2, )) def fetch(N, url, delay=0): import time @@ -556,7 +520,7 @@ def fetch(N, url, delay=0): starttime = time.time() for i in range(N): if delay and i > 0: time.sleep(delay) - fo = urllib2.urlopen(url) + fo = urllib.request.urlopen(url) foo = fo.read() fo.close() lens.append(len(foo)) @@ -566,7 +530,7 @@ def fetch(N, url, delay=0): for i in lens[1:]: j = j + 1 if not i == lens[0]: - print "WARNING: inconsistent length on read %i: %i" % (j, i) + print("WARNING: inconsistent length on read %i: %i" % (j, i)) return diff @@ -574,16 +538,16 @@ def test_timeout(url): global DEBUG dbbackup = DEBUG class FakeLogger: - def debug(self, msg, *args): print msg % args + def debug(self, msg, *args): print(msg % args) info = warning = error = debug DEBUG = FakeLogger() - print " fetching the file to establish a connection" - fo = urllib2.urlopen(url) + print(" fetching the file to establish a connection") + fo = urllib.request.urlopen(url) data1 = fo.read() fo.close() i = 20 - print " waiting %i seconds for the server to close the connection" % i + print(" waiting %i seconds for the server to close the connection" % i) while i > 0: sys.stdout.write('\r %2i' % i) sys.stdout.flush() @@ -591,33 +555,33 @@ def debug(self, msg, *args): print msg % args i -= 1 sys.stderr.write('\r') - print " fetching the file a second time" - fo = urllib2.urlopen(url) + print(" fetching the file a second time") + fo = urllib.request.urlopen(url) data2 = fo.read() fo.close() if data1 == data2: - print ' data are identical' + print(' data are identical') else: - print ' ERROR: DATA DIFFER' + print(' ERROR: DATA DIFFER') DEBUG = dbbackup def test(url, N=10): - print "checking error hander (do this on a non-200)" + print("checking error hander (do this on a non-200)") try: error_handler(url) - except IOError, e: - print "exiting - exception will prevent further tests" + except IOError as e: + print("exiting - exception will prevent further tests") sys.exit() - print - print "performing continuity test (making sure stuff isn't corrupted)" + print() + print("performing continuity test (making sure stuff isn't corrupted)") continuity(url) - print - print "performing speed comparison" + print() + print("performing speed comparison") comp(N, url) - print - print "performing dropped-connection check" + print() + print("performing dropped-connection check") test_timeout(url) if __name__ == '__main__': @@ -627,7 +591,7 @@ def test(url, N=10): N = int(sys.argv[1]) url = sys.argv[2] except: - print "%s " % sys.argv[0] + print("%s " % sys.argv[0]) else: test(url, N) diff --git a/setup.py b/setup.py index 4f06360..0f963c1 100644 --- a/setup.py +++ b/setup.py @@ -23,23 +23,13 @@ from setuptools import setup import sys -try: - import six - py3 = six.PY3 -except: - py3 = sys.version_info[0] >= 3 - # metadata -if py3: - import re - _version_re = re.compile(r'__version__\s*=\s*"(.*)"') - for line in open('keepalive/__init__.py', encoding='utf-8'): - version_match = _version_re.match(line) - if version_match: - _version = version_match.group(1) -else: - import keepalive - _version = keepalive.__version__ +import re +_version_re = re.compile(r'__version__\s*=\s*"(.*)"') +for line in open('keepalive/__init__.py', encoding='utf-8'): + version_match = _version_re.match(line) + if version_match: + _version = version_match.group(1) setup( name = 'keepalive', @@ -62,14 +52,10 @@ 'Topic :: Software Development :: Libraries :: Python Modules', 'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', ], - keywords = 'python http urllib keepalive', - use_2to3 = True + keywords = 'python http urllib keepalive' )