[web] Allow modules to control the headers sent with HTTP requests

sopel-irc · May 28, 2013 · 0af9faa · 0af9faa
1 parent a4f6e5c
commit 0af9faa
Showing 1 changed file with 19 additions and 12 deletions.
diff --git a/willie/web.py b/willie/web.py
@@ -12,7 +12,7 @@
 Copyright © 2008, Sean B. Palmer, inamidst.com
 Copyright © 2009, Michael Yanovich <[email protected]>
 Copyright © 2012, Dimitri Molenaars, Tyrope.nl.
-Copyright © 2012, Elad Alfassa, <[email protected]>
+Copyright © 2012-2013, Elad Alfassa, <[email protected]>
 Licensed under the Eiffel Forum License 2.
 
 More info:
@@ -24,27 +24,28 @@
 from htmlentitydefs import name2codepoint
 
 #HTTP GET
-def get(uri, timeout=20):
+def get(uri, timeout=20, headers=None):
     """
     Execute an HTTP GET query on `uri`, and return the result.
     `timeout` is an optional argument, which represents how much time we should wait before throwing a timeout exception. It defualts to 20, but can be set to higher values if you are communicating with a slow web application.
+    `headers` is a dict of HTTP headers to send with the request.
     """
     if not uri.startswith('http'):
-        return
-    u = get_urllib_object(uri, timeout)
+        uri = "http://" + uri
+    u = get_urllib_object(uri, timeout, headers)
     bytes = u.read()
     u.close()
     return bytes
 
 # Get HTTP headers
-def head(uri, timeout=20):
+def head(uri, timeout=20, headers=None):
     """
     Execute an HTTP GET query on `uri`, and return the headers.
     `timeout` is an optional argument, which represents how much time we should wait before throwing a timeout exception. It defualts to 20, but can be set to higher values if you are communicating with a slow web application.
     """
     if not uri.startswith('http'):
-        return
-    u = get_urllib_object(uri, timeout)
+        uri = "http://" + uri
+    u = get_urllib_object(uri, timeout, headers)
     info = u.info()
     u.close()
     return info
@@ -53,10 +54,11 @@ def head(uri, timeout=20):
 def post(uri, query):
     """
     Execute an HTTP POST query. `uri` is the target URI, and `query` is the POST data.
+    `headers` is a dict of HTTP headers to send with the request.
     """
     if not uri.startswith('http'):
-        return
-    u = urllib2.urlopen(uri, query)
+        uri = "http://" + uri
+    u = urllib2.urlopen(uri, query, headers)
     bytes = u.read()
     u.close()
     return bytes
@@ -78,18 +80,23 @@ def decode(html):
 
 #For internal use in web.py, (modules can use this if they need a urllib object they can execute read() on)
 #Both handles redirects and makes sure input URI is UTF-8
-def get_urllib_object(uri, timeout):
+def get_urllib_object(uri, timeout, headers):
     """
-    Return a urllib2 object for `uri` and `timeout`. This is better than using urrlib2 directly, for it handles redirects, makes sure URI is utf8, and is shorter and easier to use.
+    Return a urllib2 object for `uri` and `timeout` and `headers`. This is better than using urrlib2 directly, for it handles redirects, makes sure URI is utf8, and is shorter and easier to use.
     Modules may use this if they need a urllib2 object to execute .read() on. For more information, refer to the urllib2 documentation.
     """
     redirects = 0
     try:
         uri = uri.encode("utf-8")
     except:
         pass
+    original_headers = {'Accept':'*/*', 'User-Agent':'Mozilla/5.0 (Jenni)'}
+    if headers is not None:
+        headers = dict(original_headers.items(), headers.items())
+    else:
+        headers = original_headers
     while True:
-        req = urllib2.Request(uri, headers={'Accept':'*/*', 'User-Agent':'Mozilla/5.0 (Jenni)'})
+        req = urllib2.Request(uri, headers=headers)
         try: u = urllib2.urlopen(req, None, timeout)
         except urllib2.HTTPError, e:
             return e.fp