From ee3540ef1385164f582113319aed30be8bca5955 Mon Sep 17 00:00:00 2001
From: Nico Albers <nico.albers@aboutyou.com>
Date: Fri, 4 Nov 2022 14:24:27 +0100
Subject: [PATCH 1/2] ENH: refactor gitlab parsing in ShortenLinkTransform
 (#1039)

---
 src/pydata_sphinx_theme/__init__.py | 50 ++++++++++++++++++-----------
 tests/sites/base/page1.rst          |  3 ++
 tests/test_build/gitlab_links.html  |  9 ++++++
 3 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/src/pydata_sphinx_theme/__init__.py b/src/pydata_sphinx_theme/__init__.py
index 13ba74f78..865c89bf0 100644
--- a/src/pydata_sphinx_theme/__init__.py
+++ b/src/pydata_sphinx_theme/__init__.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 from functools import lru_cache
 import json
-from urllib.parse import urlparse
+from urllib.parse import urlparse, urlunparse
 
 import jinja2
 from bs4 import BeautifulSoup as bs
@@ -925,38 +925,50 @@ def run(self, **kwargs):
                 self.platform = self.supported_platform.get(uri.netloc)
                 if self.platform is not None:
                     node.attributes["classes"].append(self.platform)
-                    node.children[0] = nodes.Text(self.parse_url(uri.path))
+                    node.children[0] = nodes.Text(self.parse_url(uri))
 
-    def parse_url(self, path):
+    def parse_url(self, uri):
         """
         parse the content of the url with respect to the selected platform
         """
+        path = uri.path
 
-        # split the url content
-        # be careful the first one is a "/"
-        parts = path.split("/")
+        if path == "":
+            # plain url passed, return platform only
+            return self.platform
+
+        # the leading "/" is removed
+        path = path.lstrip("/")
 
         # check the platform name and read the information accordingly
         # as "<organisation>/<repository>#<element number>"
+        # or "<group>/<subgroup 1>/…/<subgroup N>/<repository>#<element number>"
         if self.platform == "github":
-            text = "github"
+            # split the url content
+            parts = path.split("/")
+            if len(parts) > 0:
+                text = parts[0]  # organisation
             if len(parts) > 1:
-                text = parts[1]  # organisation
+                text += f"/{parts[1]}"  # repository
             if len(parts) > 2:
-                text += f"/{parts[2]}"  # repository
-            if len(parts) > 3:
-                if parts[3] in ["issues", "pull", "discussions"]:
+                if parts[2] in ["issues", "pull", "discussions"]:
                     text += f"#{parts[-1]}"  # element number
 
         elif self.platform == "gitlab":
-            text = "gitlab"
-            if len(parts) > 1:
-                text = parts[1]  # organisation
-            if len(parts) > 2:
-                text += f"/{parts[2]}"  # repository
-            if len(parts) > 4:
-                if parts[4] in ["issues", "merge_requests"]:
-                    text += f"#{parts[-1]}"  # element number
+            # cp. https://docs.gitlab.com/ee/user/markdown.html#gitlab-specific-references
+            if any(map(uri.path.__contains__, ["issues", "merge_requests"])):
+                group_and_subgroups, parts, *_ = path.split("/-/")
+                parts = parts.split("/")
+                url_type, element_number, *_ = parts
+                if url_type == "issues":
+                    text = f"{group_and_subgroups}#{element_number}"
+                elif url_type == "merge_requests":
+                    text = f"{group_and_subgroups}!{element_number}"
+            else:
+                # display the whole uri (after "gitlab.com/") including parameters
+                # for example "<group>/<subgroup1>/<subgroup2>/<repository>"
+                text = uri._replace(netloc="", scheme="")  # remove platform
+                text = urlunparse(text)[1:]  # combine to string and strip leading "/"
 
         return text
 
diff --git a/tests/sites/base/page1.rst b/tests/sites/base/page1.rst
index b05b07d6f..a5424441d 100644
--- a/tests/sites/base/page1.rst
+++ b/tests/sites/base/page1.rst
@@ -22,3 +22,6 @@ Page 1
     https://gitlab.com/gitlab-org
     https://gitlab.com/gitlab-org/gitlab
     https://gitlab.com/gitlab-org/gitlab/-/issues/375583
+    https://gitlab.com/gitlab-org/gitlab/-/merge_requests/84669
+    https://gitlab.com/gitlab-org/gitlab/-/pipelines/511894707
+    https://gitlab.com/gitlab-com/gl-infra/production/-/issues/6788
diff --git a/tests/test_build/gitlab_links.html b/tests/test_build/gitlab_links.html
index 86f023af7..e93a4db50 100644
--- a/tests/test_build/gitlab_links.html
+++ b/tests/test_build/gitlab_links.html
@@ -12,5 +12,14 @@
   <a class="gitlab reference external" href="https://gitlab.com/gitlab-org/gitlab/-/issues/375583">
    gitlab-org/gitlab#375583
   </a>
+  <a class="gitlab reference external" href="https://gitlab.com/gitlab-org/gitlab/-/merge_requests/84669">
+   gitlab-org/gitlab!84669
+  </a>
+  <a class="gitlab reference external" href="https://gitlab.com/gitlab-org/gitlab/-/pipelines/511894707">
+   gitlab-org/gitlab/-/pipelines/511894707
+  </a>
+  <a class="gitlab reference external" href="https://gitlab.com/gitlab-com/gl-infra/production/-/issues/6788">
+   gitlab-com/gl-infra/production#6788
+  </a>
  </p>
 </div>

From 38dff96ae19187130cd94d4e9f2366daba33ca62 Mon Sep 17 00:00:00 2001
From: Nico Albers <nico.albers@aboutyou.com>
Date: Mon, 7 Nov 2022 11:12:17 +0100
Subject: [PATCH 2/2] improve comments in ShortenLinkTransform (#1039)

---
 src/pydata_sphinx_theme/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pydata_sphinx_theme/__init__.py b/src/pydata_sphinx_theme/__init__.py
index 865c89bf0..1dcbbf3bd 100644
--- a/src/pydata_sphinx_theme/__init__.py
+++ b/src/pydata_sphinx_theme/__init__.py
@@ -937,7 +937,8 @@ def parse_url(self, uri):
             # plain url passed, return platform only
             return self.platform
 
-        # the leading "/" is removed
+        # if the path is not empty it contains a leading "/", which we don't want to
+        # include in the parsed content
         path = path.lstrip("/")
 
         # check the platform name and read the information accordingly