-
-
Notifications
You must be signed in to change notification settings - Fork 405
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
11 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,10 +8,12 @@ | |
from __future__ import unicode_literals, absolute_import, print_function, division | ||
|
||
import re | ||
from contextlib import closing | ||
from sopel import web, tools | ||
from sopel.module import commands, rule, example | ||
from sopel.config.types import ValidatedAttribute, ListAttribute, StaticSection | ||
|
||
import requests | ||
|
||
url_finder = None | ||
# These are used to clean up the title tag before actually parsing it. Not the | ||
|
@@ -150,14 +152,6 @@ def process_urls(bot, trigger, urls): | |
pass | ||
# First, check that the URL we got doesn't match | ||
matched = check_callbacks(bot, trigger, url, False) | ||
if matched: | ||
continue | ||
# Then see if it redirects anywhere | ||
new_url = follow_redirects(url) | ||
if not new_url: | ||
continue | ||
# Then see if the final URL matches anything | ||
matched = check_callbacks(bot, trigger, new_url, new_url != url) | ||
if matched: | ||
continue | ||
# Finally, actually show the URL | ||
|
@@ -167,20 +161,6 @@ def process_urls(bot, trigger, urls): | |
return results | ||
|
||
|
||
def follow_redirects(url): | ||
""" | ||
Follow HTTP 3xx redirects, and return the actual URL. Return None if | ||
there's a problem. | ||
""" | ||
try: | ||
connection = web.get_urllib_object(url, 60) | ||
url = connection.geturl() or url | ||
connection.close() | ||
except: | ||
return None | ||
return url | ||
|
||
|
||
def check_callbacks(bot, trigger, url, run=True): | ||
""" | ||
Check the given URL against the callbacks list. If it matches, and ``run`` | ||
|
@@ -201,10 +181,18 @@ def check_callbacks(bot, trigger, url, run=True): | |
|
||
def find_title(url): | ||
"""Return the title for the given URL.""" | ||
response = requests.get(url, stream=True) | ||
try: | ||
content, headers = web.get(url, return_headers=True, limit_bytes=max_bytes) | ||
content = '' | ||
for byte in response.iter_content(chunk_size=512, decode_unicode=True): | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
elad661
Contributor
|
||
content += str(byte) | ||
if '</title>' in content or len(content) > max_bytes: | ||
break | ||
except UnicodeDecodeError: | ||
return # Fail silently when data can't be decoded | ||
finally: | ||
# need to close the connexion because we have not read all the data | ||
response.close() | ||
|
||
# Some cleanup that I don't really grok, but was in the original, so | ||
# we'll keep it (with the compiled regexes made global) for now. | ||
|
why hardcoding that value here? shouldn't we rely on the requests.CHUNK_SIZE instead?