From 4b4b118417b5cce5f6d82703a89b674c556d61a6 Mon Sep 17 00:00:00 2001
From: James Gerity <snoopjedi@gmail.com>
Date: Wed, 29 Nov 2023 18:08:53 -0500
Subject: [PATCH] wikipedia: fix unreliable urlparse() usage

---
 sopel/builtins/wikipedia.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/sopel/builtins/wikipedia.py b/sopel/builtins/wikipedia.py
index f55d8c3b2..36e56d819 100644
--- a/sopel/builtins/wikipedia.py
+++ b/sopel/builtins/wikipedia.py
@@ -327,12 +327,16 @@ def mw_image_description(server, image):
 def mw_info(bot, trigger, match=None):
     """Retrieves and outputs a snippet of the linked page."""
     server = match.group(1)
-    page_info = urlparse(match.group(2))
-    article = unquote(page_info.path)
+    page_info = urlparse(match.match.string)
+    # in Python 3.9+ this can be str.removeprefix() instead, but we're confident that
+    # "/wiki/" is at the start of the path anyway since it's part of the pattern
+    trim_offset = len("/wiki/")
+    article = unquote(page_info.path)[trim_offset:]
     section = unquote(page_info.fragment)
 
     if section:
-        if section.startswith('cite_note-'):  # Don't bother trying to retrieve a snippet when cite-note is linked
+        if section.startswith('cite_note-'):
+            # Don't bother trying to retrieve a snippet when cite-note is linked
             say_snippet(bot, trigger, server, article, show_url=False)
         elif section.startswith('/media'):
             # gh2316: media fragments are usually images; try to get an image description