Skip to content

Commit

Permalink
[web] Allow modules to control the headers sent with HTTP requests
Browse files Browse the repository at this point in the history
  • Loading branch information
Elad Alfassa committed May 28, 2013
1 parent a4f6e5c commit 0af9faa
Showing 1 changed file with 19 additions and 12 deletions.
31 changes: 19 additions & 12 deletions willie/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
Copyright © 2008, Sean B. Palmer, inamidst.com
Copyright © 2009, Michael Yanovich <[email protected]>
Copyright © 2012, Dimitri Molenaars, Tyrope.nl.
Copyright © 2012, Elad Alfassa, <[email protected]>
Copyright © 2012-2013, Elad Alfassa, <[email protected]>
Licensed under the Eiffel Forum License 2.
More info:
Expand All @@ -24,27 +24,28 @@
from htmlentitydefs import name2codepoint

#HTTP GET
def get(uri, timeout=20):
def get(uri, timeout=20, headers=None):
"""
Execute an HTTP GET query on `uri`, and return the result.
`timeout` is an optional argument, which represents how much time we should wait before throwing a timeout exception. It defualts to 20, but can be set to higher values if you are communicating with a slow web application.
`headers` is a dict of HTTP headers to send with the request.
"""
if not uri.startswith('http'):
return
u = get_urllib_object(uri, timeout)
uri = "http://" + uri
u = get_urllib_object(uri, timeout, headers)
bytes = u.read()
u.close()
return bytes

# Get HTTP headers
def head(uri, timeout=20):
def head(uri, timeout=20, headers=None):
"""
Execute an HTTP GET query on `uri`, and return the headers.
`timeout` is an optional argument, which represents how much time we should wait before throwing a timeout exception. It defualts to 20, but can be set to higher values if you are communicating with a slow web application.
"""
if not uri.startswith('http'):
return
u = get_urllib_object(uri, timeout)
uri = "http://" + uri
u = get_urllib_object(uri, timeout, headers)
info = u.info()
u.close()
return info
Expand All @@ -53,10 +54,11 @@ def head(uri, timeout=20):
def post(uri, query):
"""
Execute an HTTP POST query. `uri` is the target URI, and `query` is the POST data.
`headers` is a dict of HTTP headers to send with the request.
"""
if not uri.startswith('http'):
return
u = urllib2.urlopen(uri, query)
uri = "http://" + uri
u = urllib2.urlopen(uri, query, headers)
bytes = u.read()
u.close()
return bytes
Expand All @@ -78,18 +80,23 @@ def decode(html):

#For internal use in web.py, (modules can use this if they need a urllib object they can execute read() on)
#Both handles redirects and makes sure input URI is UTF-8
def get_urllib_object(uri, timeout):
def get_urllib_object(uri, timeout, headers):
"""
Return a urllib2 object for `uri` and `timeout`. This is better than using urrlib2 directly, for it handles redirects, makes sure URI is utf8, and is shorter and easier to use.
Return a urllib2 object for `uri` and `timeout` and `headers`. This is better than using urrlib2 directly, for it handles redirects, makes sure URI is utf8, and is shorter and easier to use.
Modules may use this if they need a urllib2 object to execute .read() on. For more information, refer to the urllib2 documentation.
"""
redirects = 0
try:
uri = uri.encode("utf-8")
except:
pass
original_headers = {'Accept':'*/*', 'User-Agent':'Mozilla/5.0 (Jenni)'}
if headers is not None:
headers = dict(original_headers.items(), headers.items())
else:
headers = original_headers
while True:
req = urllib2.Request(uri, headers={'Accept':'*/*', 'User-Agent':'Mozilla/5.0 (Jenni)'})
req = urllib2.Request(uri, headers=headers)
try: u = urllib2.urlopen(req, None, timeout)
except urllib2.HTTPError, e:
return e.fp
Expand Down

0 comments on commit 0af9faa

Please sign in to comment.