From 8977dbe341b189fd3870e6279f30d06de27c90b1 Mon Sep 17 00:00:00 2001 From: mal Date: Wed, 20 Nov 2024 12:52:32 -0500 Subject: [PATCH] safety: fix safeify_url() exception on python 3.11 --- sopel/builtins/safety.py | 19 +++++++++++-------- test/builtins/test_builtins_safety.py | 27 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) create mode 100644 test/builtins/test_builtins_safety.py diff --git a/sopel/builtins/safety.py b/sopel/builtins/safety.py index bf47e7d6f..f1adcdff4 100644 --- a/sopel/builtins/safety.py +++ b/sopel/builtins/safety.py @@ -126,11 +126,15 @@ def setup(bot: Sopel) -> None: def safeify_url(url: str) -> str: """Replace bits of a URL to make it hard to browse to.""" - parts = urlparse(url) - scheme = "hxx" + parts.scheme[3:] # hxxp - netloc = parts.netloc.replace(".", "[.]") # google[.]com and IPv4 - netloc = netloc.replace(":", "[:]") # IPv6 addresses (bad lazy method) - return urlunparse((scheme, netloc) + parts[2:]) + try: + parts = urlparse(url) + scheme = parts.scheme.replace("t", "x") # hxxp + netloc = parts.netloc.replace(".", "[.]") # google[.]com and IPv4 + netloc = netloc.replace(":", "[:]") # IPv6 addresses (bad lazy method) + return urlunparse((scheme, netloc) + parts[2:]) + except ValueError: + # Still try to defang URLs that fail parsing + return url.replace(":", "[:]").replace(".", "[.]") def download_domain_list(bot: Sopel, path: str) -> bool: @@ -224,7 +228,6 @@ def url_handler(bot: SopelWrapper, trigger: Trigger) -> None: strict = "strict" in mode for url in tools.web.search_urls(trigger): - safe_url = safeify_url(url) positives = 0 # Number of engines saying it's malicious total = 0 # Number of total engines @@ -249,6 +252,7 @@ def url_handler(bot: SopelWrapper, trigger: Trigger) -> None: if positives >= 1: # Possibly malicious URL detected! + safe_url = safeify_url(url) LOGGER.info( "Possibly malicious link (%s/%s) posted in %s by %s: %r", positives, @@ -258,11 +262,10 @@ def url_handler(bot: SopelWrapper, trigger: Trigger) -> None: safe_url, ) bot.say( - "{} {} of {} engine{} flagged a link {} posted as malicious".format( + "{} {} of {} engines flagged a link {} posted as malicious".format( bold(color("WARNING:", colors.RED)), positives, total, - "" if total == 1 else "s", bold(trigger.nick), ) ) diff --git a/test/builtins/test_builtins_safety.py b/test/builtins/test_builtins_safety.py new file mode 100644 index 000000000..7c9853bf3 --- /dev/null +++ b/test/builtins/test_builtins_safety.py @@ -0,0 +1,27 @@ +"""Tests for Sopel's ``safety`` plugin""" + +from __future__ import annotations + +import pytest + +from sopel.builtins.safety import safeify_url + +URL_TESTS = ( + # Valid URLs + ("http://example.com", ("hxxp://example[.]com")), + ("http://1.2.3.4/mgr.cgi", ("hxxp://1[.]2[.]3[.]4/mgr.cgi")), + ("http://[fd00:1234::4321]/", ("hxxp://[fd00[:]1234[:][:]4321]/")), + ("ftp://1.2.3.4/", ("fxp://1[.]2[.]3[.]4/")), + # Invalid, but parsed anyway + ("http:///", ("hxxp:///")), + ("http://1.2.3.4.5/", ("hxxp://1[.]2[.]3[.]4[.]5/")), + ("http://555.555.555.555/", ("hxxp://555[.]555[.]555[.]555/")), + # urllib.urlparse() works on these in python <=3.10 but fails in 3.11 + ("http://[fd00:::]/", ("hxxp://[fd00[:][:][:]]/", "http[:]//[fd00[:][:][:]]/")), + ("http://[placeholder]/", ("hxxp://[placeholder]/", "http[:]//[placeholder]/")), +) + + +@pytest.mark.parametrize("original, safed_options", URL_TESTS) +def test_safeify_url(original, safed_options): + assert safeify_url(original) in safed_options