url: port to requests

Now that web.py is deprecated, we can port url.py to requests. Originally from pull request #988, committed here with minor bugfixes and modified commit message.
sopel-irc · Jan 30, 2016 · ccda12e · ccda12e
1 parent f07cc17
commit ccda12e
Showing 1 changed file with 11 additions and 23 deletions.
diff --git a/sopel/modules/url.py b/sopel/modules/url.py
@@ -8,10 +8,12 @@
 from __future__ import unicode_literals, absolute_import, print_function, division
 
 import re
+from contextlib import closing
 from sopel import web, tools
 from sopel.module import commands, rule, example
 from sopel.config.types import ValidatedAttribute, ListAttribute, StaticSection
 
+import requests
 
 url_finder = None
 # These are used to clean up the title tag before actually parsing it. Not the
@@ -150,14 +152,6 @@ def process_urls(bot, trigger, urls):
                 pass
             # First, check that the URL we got doesn't match
             matched = check_callbacks(bot, trigger, url, False)
-            if matched:
-                continue
-            # Then see if it redirects anywhere
-            new_url = follow_redirects(url)
-            if not new_url:
-                continue
-            # Then see if the final URL matches anything
-            matched = check_callbacks(bot, trigger, new_url, new_url != url)
             if matched:
                 continue
             # Finally, actually show the URL
@@ -167,20 +161,6 @@ def process_urls(bot, trigger, urls):
     return results
 
 
-def follow_redirects(url):
-    """
-    Follow HTTP 3xx redirects, and return the actual URL. Return None if
-    there's a problem.
-    """
-    try:
-        connection = web.get_urllib_object(url, 60)
-        url = connection.geturl() or url
-        connection.close()
-    except:
-        return None
-    return url
-
-
 def check_callbacks(bot, trigger, url, run=True):
     """
     Check the given URL against the callbacks list. If it matches, and ``run``
@@ -201,10 +181,18 @@ def check_callbacks(bot, trigger, url, run=True):
 
 def find_title(url):
     """Return the title for the given URL."""
+    response = requests.get(url, stream=True)
     try:
-        content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes)
+        content = ''
+        for byte in response.iter_content(chunk_size=512, decode_unicode=True):
+            content += str(byte)
+            if '</title>' in content or len(content) > max_bytes:
+                break
     except UnicodeDecodeError:
         return  # Fail silently when data can't be decoded
+    finally:
+        # need to close the connexion because we have not read all the data
+        response.close()
 
     # Some cleanup that I don't really grok, but was in the original, so
     # we'll keep it (with the compiled regexes made global) for now.