From cd70092b8e6b7d9064e9893b10bfa7506269eb76 Mon Sep 17 00:00:00 2001 From: Santeri Hurnanen Date: Mon, 4 Sep 2023 10:55:25 +0300 Subject: [PATCH 1/3] UHF-8837: remove pages with X-Robots-Tag: noindex from sitemap --- helfi_proxy.module | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/helfi_proxy.module b/helfi_proxy.module index fa91826..1c0364c 100644 --- a/helfi_proxy.module +++ b/helfi_proxy.module @@ -7,6 +7,7 @@ use Drupal\Core\Asset\AttachedAssetsInterface; use Drupal\Core\Entity\EntityInterface; +use Drupal\helfi_proxy\ProxyManagerInterface; /** * Implements hook_module_implements_alter(). @@ -82,3 +83,38 @@ function helfi_proxy_page_attachments_alter(array &$attachments) { $attachments['#attached']['html_head'][] = [$helfi_content_type, $tag_name]; } } + +/** + * Implements hook_simple_sitemap_links_alter(). + */ +function helfi_proxy_simple_sitemap_links_alter(array &$links, $sitemap_variant) { + /** @var \Drupal\Core\Config\ImmutableConfig $config */ + $config = \Drupal::service('config.factory')->get('helfi_proxy.settings'); + + if (!$paths = implode("\n", $config->get(ProxyManagerInterface::ROBOTS_PATHS) ?? [])) { + return; + } + + /** @var \Drupal\helfi_api_base\Environment\Environment $environment */ + $environment = \Drupal::service('helfi_api_base.environment_resolver')->getActiveEnvironment(); + + /** @var \Drupal\Core\Path\PathMatcherInterface $pathMatcher */ + $pathMatcher = \Drupal::service('path.matcher'); + + // helfi_proxy module sets "X-Robots-Tag: noindex" header for configured + // paths. These url should not be included in the sitemap.xml file. + foreach ($links as $key => $link) { + $baseUrl = $environment->getUrl($link['langcode']); + $url = $link['url']; + + if (str_starts_with($url, $baseUrl)) { + $path = substr($url, strlen($baseUrl)); + + // Remove matched paths from sitemap.xml file. + if ($pathMatcher->matchPath($path, $paths)) { + unset($links[$key]); + } + } + } + +} From 9ba62298bcfb025f2b84dd9fb337cbddd719795c Mon Sep 17 00:00:00 2001 From: Santeri Hurnanen Date: Mon, 4 Sep 2023 12:59:18 +0300 Subject: [PATCH 2/3] UHF-8837: handle environment_resolver exceptions --- helfi_proxy.module | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/helfi_proxy.module b/helfi_proxy.module index 1c0364c..d9c6bcf 100644 --- a/helfi_proxy.module +++ b/helfi_proxy.module @@ -95,8 +95,12 @@ function helfi_proxy_simple_sitemap_links_alter(array &$links, $sitemap_variant) return; } - /** @var \Drupal\helfi_api_base\Environment\Environment $environment */ - $environment = \Drupal::service('helfi_api_base.environment_resolver')->getActiveEnvironment(); + try { + /** @var \Drupal\helfi_api_base\Environment\Environment $environment */ + $environment = \Drupal::service('helfi_api_base.environment_resolver')->getActiveEnvironment(); + } catch (\InvalidArgumentException) { + return; + } /** @var \Drupal\Core\Path\PathMatcherInterface $pathMatcher */ $pathMatcher = \Drupal::service('path.matcher'); @@ -104,7 +108,13 @@ function helfi_proxy_simple_sitemap_links_alter(array &$links, $sitemap_variant) // helfi_proxy module sets "X-Robots-Tag: noindex" header for configured // paths. These url should not be included in the sitemap.xml file. foreach ($links as $key => $link) { - $baseUrl = $environment->getUrl($link['langcode']); + try { + $baseUrl = $environment->getUrl($link['langcode']); + } catch (\InvalidArgumentException) { + // Base url not found for given langcode. + continue; + } + $url = $link['url']; if (str_starts_with($url, $baseUrl)) { From dc19d76cad07575b4b89b58cdd976a02c5595fe2 Mon Sep 17 00:00:00 2001 From: Santeri Hurnanen Date: Mon, 4 Sep 2023 13:47:16 +0300 Subject: [PATCH 3/3] UHF-8837: phpcs fixes --- helfi_proxy.module | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/helfi_proxy.module b/helfi_proxy.module index d9c6bcf..3233c01 100644 --- a/helfi_proxy.module +++ b/helfi_proxy.module @@ -98,7 +98,8 @@ function helfi_proxy_simple_sitemap_links_alter(array &$links, $sitemap_variant) try { /** @var \Drupal\helfi_api_base\Environment\Environment $environment */ $environment = \Drupal::service('helfi_api_base.environment_resolver')->getActiveEnvironment(); - } catch (\InvalidArgumentException) { + } + catch (\InvalidArgumentException) { return; } @@ -110,7 +111,8 @@ function helfi_proxy_simple_sitemap_links_alter(array &$links, $sitemap_variant) foreach ($links as $key => $link) { try { $baseUrl = $environment->getUrl($link['langcode']); - } catch (\InvalidArgumentException) { + } + catch (\InvalidArgumentException) { // Base url not found for given langcode. continue; }