From 065eab6ef5a844617b10ba42872d0191c938e72c Mon Sep 17 00:00:00 2001 From: tuutti Date: Mon, 28 Oct 2024 13:15:08 +0200 Subject: [PATCH] UHF-10891: Skip non-404 responses, skip URL that seems to timeout without VPN --- .../Commands/TransliterateFilesCommands.php | 34 ++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/Drush/Commands/TransliterateFilesCommands.php b/src/Drush/Commands/TransliterateFilesCommands.php index 2ac0a9f..c327047 100644 --- a/src/Drush/Commands/TransliterateFilesCommands.php +++ b/src/Drush/Commands/TransliterateFilesCommands.php @@ -19,6 +19,7 @@ use Drush\Commands\DrushCommands; use GuzzleHttp\ClientInterface; use GuzzleHttp\Exception\ClientException; +use GuzzleHttp\Exception\GuzzleException; use Symfony\Contracts\EventDispatcher\EventDispatcherInterface; /** @@ -111,12 +112,35 @@ private function processEntityType(string $entityType, array $fields) : void { * TRUE if remote file exists, FALSE if not. */ private function remoteFileExists(string $url) : bool { + // Skip wps since it seems to require a VPN. + if (str_contains('https://www.hel.fi/wps/', $url)) { + return TRUE; + } + try { - $this->httpClient->request('HEAD', $url); + $this->httpClient->request('HEAD', $url, ['timeout' => 15]); return TRUE; } - catch (ClientException) { + catch (ClientException $e) { + $response = $e->getResponse(); + + // Skip non-404 responses. + if ($response->getStatusCode() !== 404) { + return TRUE; + } + $skip = [ + 'text/html', + 'text/plain', + ]; + foreach ($skip as $type) { + // Skip html content. + if (str_contains($response->getHeaderLine('Content-Type'), $type)) { + return TRUE; + } + } + } + catch (GuzzleException) { } return FALSE; } @@ -142,11 +166,13 @@ private function processFieldLinks(ContentEntityInterface $entity, string $field if (!$href = $node->getAttribute('href')) { continue; } + $href = trim($href); + // Do nothing if file exists already. if ($this->remoteFileExists($href)) { continue; } - $this->io()->note(sprintf('Found a broken link [%s]: "%s"', $entity->toUrl()->toString(), $href)); + $this->io()->note(sprintf('Found a broken link "%s"', $href)); $basename = basename($href); // Test sanitized filename and urldecoded+sanitized filename. @@ -166,7 +192,7 @@ private function processFieldLinks(ContentEntityInterface $entity, string $field } if (!$newUrl) { - $this->io()->warning(sprintf('Failed to process [%s]: "%s"', $entity->toUrl()->toString(), $href)); + $this->io()->warning(sprintf('Failed to process: "%s"', $href)); continue; }