Skip to content

Commit

Permalink
url: port to requests
Browse files Browse the repository at this point in the history
Now that web.py is deprecated, we can port url.py to requests.

Originally from pull request #988, committed here with minor bugfixes
and modified commit message.
  • Loading branch information
anarcat authored and Elad Alfassa committed Jan 30, 2016
1 parent f07cc17 commit ccda12e
Showing 1 changed file with 11 additions and 23 deletions.
34 changes: 11 additions & 23 deletions sopel/modules/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
from __future__ import unicode_literals, absolute_import, print_function, division

import re
from contextlib import closing
from sopel import web, tools
from sopel.module import commands, rule, example
from sopel.config.types import ValidatedAttribute, ListAttribute, StaticSection

import requests

url_finder = None
# These are used to clean up the title tag before actually parsing it. Not the
Expand Down Expand Up @@ -150,14 +152,6 @@ def process_urls(bot, trigger, urls):
pass
# First, check that the URL we got doesn't match
matched = check_callbacks(bot, trigger, url, False)
if matched:
continue
# Then see if it redirects anywhere
new_url = follow_redirects(url)
if not new_url:
continue
# Then see if the final URL matches anything
matched = check_callbacks(bot, trigger, new_url, new_url != url)
if matched:
continue
# Finally, actually show the URL
Expand All @@ -167,20 +161,6 @@ def process_urls(bot, trigger, urls):
return results


def follow_redirects(url):
"""
Follow HTTP 3xx redirects, and return the actual URL. Return None if
there's a problem.
"""
try:
connection = web.get_urllib_object(url, 60)
url = connection.geturl() or url
connection.close()
except:
return None
return url


def check_callbacks(bot, trigger, url, run=True):
"""
Check the given URL against the callbacks list. If it matches, and ``run``
Expand All @@ -201,10 +181,18 @@ def check_callbacks(bot, trigger, url, run=True):

def find_title(url):
"""Return the title for the given URL."""
response = requests.get(url, stream=True)
try:
content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes)
content = ''
for byte in response.iter_content(chunk_size=512, decode_unicode=True):
content += str(byte)
if '</title>' in content or len(content) > max_bytes:
break
except UnicodeDecodeError:
return # Fail silently when data can't be decoded
finally:
# need to close the connexion because we have not read all the data
response.close()

# Some cleanup that I don't really grok, but was in the original, so
# we'll keep it (with the compiled regexes made global) for now.
Expand Down

0 comments on commit ccda12e

Please sign in to comment.