From 4b4b118417b5cce5f6d82703a89b674c556d61a6 Mon Sep 17 00:00:00 2001 From: James Gerity Date: Wed, 29 Nov 2023 18:08:53 -0500 Subject: [PATCH] wikipedia: fix unreliable urlparse() usage --- sopel/builtins/wikipedia.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sopel/builtins/wikipedia.py b/sopel/builtins/wikipedia.py index f55d8c3b2..36e56d819 100644 --- a/sopel/builtins/wikipedia.py +++ b/sopel/builtins/wikipedia.py @@ -327,12 +327,16 @@ def mw_image_description(server, image): def mw_info(bot, trigger, match=None): """Retrieves and outputs a snippet of the linked page.""" server = match.group(1) - page_info = urlparse(match.group(2)) - article = unquote(page_info.path) + page_info = urlparse(match.match.string) + # in Python 3.9+ this can be str.removeprefix() instead, but we're confident that + # "/wiki/" is at the start of the path anyway since it's part of the pattern + trim_offset = len("/wiki/") + article = unquote(page_info.path)[trim_offset:] section = unquote(page_info.fragment) if section: - if section.startswith('cite_note-'): # Don't bother trying to retrieve a snippet when cite-note is linked + if section.startswith('cite_note-'): + # Don't bother trying to retrieve a snippet when cite-note is linked say_snippet(bot, trigger, server, article, show_url=False) elif section.startswith('/media'): # gh2316: media fragments are usually images; try to get an image description