From 42dff140b7c6199771ec4fec132eb9ac4cf154fe Mon Sep 17 00:00:00 2001 From: James Gerity Date: Sat, 25 Mar 2023 01:08:37 -0400 Subject: [PATCH 1/2] url: fix premature loop termination --- sopel/modules/url.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sopel/modules/url.py b/sopel/modules/url.py index 7f16d1879..d50253c36 100644 --- a/sopel/modules/url.py +++ b/sopel/modules/url.py @@ -389,10 +389,10 @@ def process_urls( parsed_url = urlparse(url) - # Check the URL does not match an existing URL callback if check_callbacks(bot, url, use_excludes=not requested): + # URL matches a callback OR is excluded, ignore yield (url, None, None, None, True) - return + continue # Prevent private addresses from being queried if enable_private_resolution is False # FIXME: This does nothing when an attacker knows how to host a 302 From 2051f95235f5f6dd4066149363c65f8532d4b6c2 Mon Sep 17 00:00:00 2001 From: James Gerity Date: Fri, 24 Mar 2023 22:34:33 -0400 Subject: [PATCH 2/2] url: clarify redirection behavior, tidy up docs Co-authored-by: Florian Strzelecki Co-authored-by: dgw --- sopel/modules/url.py | 69 +++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/sopel/modules/url.py b/sopel/modules/url.py index d50253c36..1d216c42c 100644 --- a/sopel/modules/url.py +++ b/sopel/modules/url.py @@ -13,7 +13,7 @@ from ipaddress import ip_address import logging import re -from typing import Generator, List, Optional, Tuple, TYPE_CHECKING +from typing import Generator, List, NamedTuple, Optional, TYPE_CHECKING from urllib.parse import urlparse import dns.resolver @@ -289,10 +289,10 @@ def title_command(bot: SopelWrapper, trigger: Trigger): # needs to be a list so len() can be checked later urls = list(web.search_urls(trigger)) - for url, title, domain, tinyurl, dispatched in process_urls( + for url, title, domain, tinyurl, ignored in process_urls( bot, trigger, urls, requested=True ): - if dispatched: + if ignored: result_count += 1 continue message = "%s | %s" % (title, domain) @@ -317,8 +317,13 @@ def title_command(bot: SopelWrapper, trigger: Trigger): def title_auto(bot: SopelWrapper, trigger: Trigger): """ Automatically show titles for URLs. For shortened URLs/redirects, find - where the URL redirects to and show the title for that (or call a function - from another plugin to give more information). + where the URL redirects to and show the title for that. + + .. note:: + + URLs that match (before redirection) any other registered callbacks + will *not* have their titles shown. + """ # Enabled or disabled by feature flag if not bot.settings.url.enable_auto_title: @@ -342,8 +347,8 @@ def title_auto(bot: SopelWrapper, trigger: Trigger): continue urls.append(url) - for url, title, domain, tinyurl, dispatched in process_urls(bot, trigger, urls): - if not dispatched: + for url, title, domain, tinyurl, ignored in process_urls(bot, trigger, urls): + if not ignored: message = '%s | %s' % (title, domain) if tinyurl: message += ' ( %s )' % tinyurl @@ -353,16 +358,30 @@ def title_auto(bot: SopelWrapper, trigger: Trigger): bot.memory["last_seen_url"][trigger.sender] = url +class URLInfo(NamedTuple): + """Helper class for information about a URL handled by this plugin.""" + + url: str + + title: Optional[str] + """The title associated with ``url``, if appropriate.""" + + hostname: Optional[str] + """The hostname associated with ``url``, if appropriate.""" + + tinyurl: Optional[str] + """A shortened form of ``url``, if appropriate.""" + + ignored: bool + """Whether or not this URL matches any registered callbacks or is explicitly excluded.""" + + def process_urls( bot: SopelWrapper, trigger: Trigger, urls: List[str], requested: bool = False, -) -> Generator[ - Tuple[str, Optional[str], Optional[str], Optional[str], bool], - None, - None, -]: +) -> Generator[URLInfo, None, None]: """ For each URL in the list, ensure it should be titled, and do so. @@ -371,15 +390,25 @@ def process_urls( :param urls: The URLs detected in the triggering message :param requested: Whether the title was explicitly requested (vs automatic) - See if it's handled by another plugin. If not, find where it redirects to, - if anywhere. If that redirected URL should be handled by another plugin, - dispatch the callback for it. Return a list of - (url, title, hostname, tinyurl, dispatched) tuples for each URL. + Yields a tuple ``(url, title, hostname, tinyurl, ignored)`` for each URL. + + .. note: + + If a URL in ``urls`` has any registered callbacks, this function will NOT + retrieve the title, and considers the URL as dispatched to those callbacks. + In this case, only the ``url`` and ``ignored=True`` will be set; all + other values will be ``None``. + + .. note: + + For titles explicitly requested by the user, ``exclusion_char`` and + exclusions from the ``.urlban``/``.urlpban`` commands are skipped. + + .. versionchanged:: 8.0 - If a callback was dispatched, only the url and dispatched=True will be set. + This function **does not** notify callbacks registered for URLs + redirected to from URLs passed to this function. See #2432, #2230. - For titles explicitly requested by the user, exclusion_char and excludes - are skipped. """ shorten_url_length = bot.config.url.shorten_url_length for url in urls: @@ -391,7 +420,7 @@ def process_urls( if check_callbacks(bot, url, use_excludes=not requested): # URL matches a callback OR is excluded, ignore - yield (url, None, None, None, True) + yield URLInfo(url, None, None, None, True) continue # Prevent private addresses from being queried if enable_private_resolution is False