From 6da0b037025a1dc9ae56eb23cf74f69a5f39c2a7 Mon Sep 17 00:00:00 2001 From: tuutti Date: Mon, 28 Oct 2024 15:34:13 +0200 Subject: [PATCH] UHF-10891: Better url validation --- .../Commands/TransliterateFilesCommands.php | 45 +++++++++---------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/src/Drush/Commands/TransliterateFilesCommands.php b/src/Drush/Commands/TransliterateFilesCommands.php index c327047..a87fec5 100644 --- a/src/Drush/Commands/TransliterateFilesCommands.php +++ b/src/Drush/Commands/TransliterateFilesCommands.php @@ -102,6 +102,24 @@ private function processEntityType(string $entityType, array $fields) : void { } } + /** + * Checks if the given link is valid. + * + * @param string $url + * The URL. + * + * @return bool + * TRUE if link is valid, FALSE if not. + */ + private function isValidLink(string $url) : bool { + $validLinks = [ + 'blob.core.windows.net', + '/sites/default/files/', + ]; + + return (bool) array_filter($validLinks, fn ($link) => str_contains($url, $link)); + } + /** * Checks if the given remote file exists. * @@ -112,34 +130,11 @@ private function processEntityType(string $entityType, array $fields) : void { * TRUE if remote file exists, FALSE if not. */ private function remoteFileExists(string $url) : bool { - // Skip wps since it seems to require a VPN. - if (str_contains('https://www.hel.fi/wps/', $url)) { - return TRUE; - } - try { $this->httpClient->request('HEAD', $url, ['timeout' => 15]); return TRUE; } - catch (ClientException $e) { - $response = $e->getResponse(); - - // Skip non-404 responses. - if ($response->getStatusCode() !== 404) { - return TRUE; - } - $skip = [ - 'text/html', - 'text/plain', - ]; - foreach ($skip as $type) { - // Skip html content. - if (str_contains($response->getHeaderLine('Content-Type'), $type)) { - return TRUE; - } - } - } catch (GuzzleException) { } return FALSE; @@ -168,8 +163,8 @@ private function processFieldLinks(ContentEntityInterface $entity, string $field } $href = trim($href); - // Do nothing if file exists already. - if ($this->remoteFileExists($href)) { + // Skip invalid links or links that does not result in 404 error. + if (!$this->isValidLink($href) || $this->remoteFileExists($href)) { continue; } $this->io()->note(sprintf('Found a broken link "%s"', $href));