Skip to content

Commit

Permalink
squashme
Browse files Browse the repository at this point in the history
  • Loading branch information
half-duplex committed Nov 12, 2023
1 parent 51bef91 commit 5e56e07
Showing 1 changed file with 21 additions and 12 deletions.
33 changes: 21 additions & 12 deletions sopel/builtins/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

if TYPE_CHECKING:
from collections.abc import Generator

from sopel.bot import Sopel, SopelWrapper
from sopel.config import Config
from sopel.trigger import Trigger
Expand Down Expand Up @@ -426,10 +427,16 @@ def process_urls(
continue

# Call the URL to get a title, if possible
unsafe_urls = [
url
for url, data in bot.memory.get("safety_cache", {}).items()
if data.get("positives")
]
title_results = find_title(
url,
allow_local=bot.config.url.enable_private_resolution,
memory=bot.memory,
unsafe_urls=unsafe_urls,
unsafe_domains=bot.memory.get("safety_cache_local", {}).values(),
)
if not title_results:
# No title found: don't handle this URL
Expand Down Expand Up @@ -484,13 +491,15 @@ def find_title(
url: str,
verify: bool = True,
allow_local: bool = False,
memory: tools.SopelMemory = {},
) -> Optional[Tuple[str, str]]:
unsafe_urls: list = [],
unsafe_domains: list = [],
) -> Optional[tuple[str, str]]:
"""Fetch the title for the given URL.
:param verify: Whether to require a valid certificate when using https
:param allow_local: Allow requests to non-global addresses (RFC1918, etc.)
:param memory: The bot.memory to search for safety.py caches
:param unsafe_urls: A list of URLs to consider malicious and ignore
:param unsafe_domains: A list of domains to consider malicious and ignore
:return: A tuple of the (title, final_hostname) that were found, or None
"""
original_url = url
Expand All @@ -501,14 +510,14 @@ def find_title(
while redirects_left > 0:
redirects_left -= 1
parsed_url = urlparse(url)
if not parsed_url.hostname:
return None

# Avoid fetching known malicious links
safety_cache = bot.memory.get("safety_cache", {})
safety_cache_local = bot.memory.get("safety_cache_local", {})
if url in safety_cache and safety_cache[url]["positives"] > 0:
LOGGER.debug("Ignoring unsafe URL: %r", url)
return None
if parsed_url.hostname.lower() in safety_cache_local:
if url in unsafe_urls:
LOGGER.debug("Ignoring unsafe URL: %r", url)
return None
if parsed_url.hostname.lower() in unsafe_domains:
LOGGER.debug("Ignoring unsafe domain: %r", url)
return None

Expand Down Expand Up @@ -548,9 +557,9 @@ def find_title(
LOGGER.debug(
"URL %r redirected to %r", url, response.headers.get("Location")
)
url = response.headers.get("Location")
if url is None:
if "Location" not in response.headers:
return None
url = response.headers["Location"]
continue

content_bytes = b''
Expand Down

0 comments on commit 5e56e07

Please sign in to comment.