Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

url: fix premature loop termination, docstring cleanup & slight refactoring #2433

Merged
merged 2 commits into from
Mar 28, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 51 additions & 22 deletions sopel/modules/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ipaddress import ip_address
import logging
import re
from typing import Generator, List, Optional, Tuple, TYPE_CHECKING
from typing import Generator, List, NamedTuple, Optional, TYPE_CHECKING
from urllib.parse import urlparse

import dns.resolver
Expand Down Expand Up @@ -289,10 +289,10 @@ def title_command(bot: SopelWrapper, trigger: Trigger):
# needs to be a list so len() can be checked later
urls = list(web.search_urls(trigger))

for url, title, domain, tinyurl, dispatched in process_urls(
for url, title, domain, tinyurl, ignored in process_urls(
bot, trigger, urls, requested=True
):
if dispatched:
if ignored:
result_count += 1
continue
message = "%s | %s" % (title, domain)
Expand All @@ -317,8 +317,13 @@ def title_command(bot: SopelWrapper, trigger: Trigger):
def title_auto(bot: SopelWrapper, trigger: Trigger):
"""
Automatically show titles for URLs. For shortened URLs/redirects, find
where the URL redirects to and show the title for that (or call a function
from another plugin to give more information).
where the URL redirects to and show the title for that.

.. note::
SnoopJ marked this conversation as resolved.
Show resolved Hide resolved

URLs that match (before redirection) any other registered callbacks
will *not* have their titles shown.

"""
# Enabled or disabled by feature flag
if not bot.settings.url.enable_auto_title:
Expand All @@ -342,8 +347,8 @@ def title_auto(bot: SopelWrapper, trigger: Trigger):
continue
urls.append(url)

for url, title, domain, tinyurl, dispatched in process_urls(bot, trigger, urls):
if not dispatched:
for url, title, domain, tinyurl, ignored in process_urls(bot, trigger, urls):
if not ignored:
message = '%s | %s' % (title, domain)
if tinyurl:
message += ' ( %s )' % tinyurl
Expand All @@ -353,16 +358,30 @@ def title_auto(bot: SopelWrapper, trigger: Trigger):
bot.memory["last_seen_url"][trigger.sender] = url


class URLInfo(NamedTuple):
"""Helper class for information about a URL handled by this plugin."""

url: str

title: Optional[str]
"""The title associated with ``url``, if appropriate."""

hostname: Optional[str]
"""The hostname associated with ``url``, if appropriate."""

tinyurl: Optional[str]
"""A shortened form of ``url``, if appropriate."""
dgw marked this conversation as resolved.
Show resolved Hide resolved

ignored: bool
"""Whether or not this URL matches any registered callbacks or is explicitly excluded."""


def process_urls(
bot: SopelWrapper,
trigger: Trigger,
urls: List[str],
requested: bool = False,
) -> Generator[
Tuple[str, Optional[str], Optional[str], Optional[str], bool],
None,
None,
]:
) -> Generator[URLInfo, None, None]:
"""
For each URL in the list, ensure it should be titled, and do so.

Expand All @@ -371,15 +390,25 @@ def process_urls(
:param urls: The URLs detected in the triggering message
:param requested: Whether the title was explicitly requested (vs automatic)

See if it's handled by another plugin. If not, find where it redirects to,
if anywhere. If that redirected URL should be handled by another plugin,
dispatch the callback for it. Return a list of
(url, title, hostname, tinyurl, dispatched) tuples for each URL.
Yields a tuple ``(url, title, hostname, tinyurl, ignored)`` for each URL.

.. note:
SnoopJ marked this conversation as resolved.
Show resolved Hide resolved

If a URL in ``urls`` has any registered callbacks, this function will NOT
retrieve the title, and considers the URL as dispatched to those callbacks.
In this case, only the ``url`` and ``ignored=True`` will be set; all
other values will be ``None``.

.. note:
SnoopJ marked this conversation as resolved.
Show resolved Hide resolved

If a callback was dispatched, only the url and dispatched=True will be set.
For titles explicitly requested by the user, ``exclusion_char`` and
exclusions from the ``.urlban``/``.urlpban`` commands are skipped.

.. versionchanged:: 8.0

This function **does not** notify callbacks registered for URLs
redirected to from URLs passed to this function. See #2432, #2230.

For titles explicitly requested by the user, exclusion_char and excludes
are skipped.
"""
shorten_url_length = bot.config.url.shorten_url_length
for url in urls:
Expand All @@ -389,10 +418,10 @@ def process_urls(

parsed_url = urlparse(url)

# Check the URL does not match an existing URL callback
if check_callbacks(bot, url, use_excludes=not requested):
yield (url, None, None, None, True)
return
# URL matches a callback OR is excluded, ignore
yield URLInfo(url, None, None, None, True)
continue

# Prevent private addresses from being queried if enable_private_resolution is False
# FIXME: This does nothing when an attacker knows how to host a 302
Expand Down