Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UHF-10891: Initial command to transliterate filenames embbed in text fields #33

Merged
merged 11 commits into from
Oct 28, 2024
181 changes: 173 additions & 8 deletions src/Drush/Commands/TransliterateFilesCommands.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,22 @@

namespace Drupal\helfi_azure_fs\Drush\Commands;

use Drupal\Component\Utility\Html;
use Drupal\Core\Entity\ContentEntityInterface;
use Drupal\Core\Entity\EntityFieldManagerInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Entity\TranslatableInterface;
use Drupal\Core\File\Event\FileUploadSanitizeNameEvent;
use Drupal\Core\File\Exception\FileException;
use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface;
use Drupal\file\Entity\File;
use Drupal\file\FileInterface;
use Drush\Attributes\Command;
use Drush\Commands\AutowireTrait;
use Drush\Commands\DrushCommands;
use Symfony\Component\Console\Style\SymfonyStyle;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;

/**
Expand All @@ -38,34 +43,194 @@ public function __construct(
private readonly EventDispatcherInterface $eventDispatcher,
private readonly EntityTypeManagerInterface $entityTypeManager,
private readonly FileSystemInterface $fileSystem,
private readonly EntityFieldManagerInterface $entityFieldManager,
private readonly ClientInterface $httpClient,
) {
$this->io = new SymfonyStyle($this->input(), $this->output());
parent::__construct();
}

/**
* Gets the sanitized filename.
*
* @param \Drupal\file\FileInterface $file
* @param string $filename
* The file to sanitize.
*
* @return string
* The sanitized filename.
*/
private function getSanitizedFilename(FileInterface $file): string {
$event = new FileUploadSanitizeNameEvent($file->getFilename(), '');
private function getSanitizedFilename(string $filename): string {
$event = new FileUploadSanitizeNameEvent($filename, '');
$this->eventDispatcher->dispatch($event);

return $event->getFilename();
}

/**
* Processes all fields for given entity type.
*
* @param string $entityType
* The entity type to process.
* @param array $fields
* The fields to process.
*/
private function processEntityType(string $entityType, array $fields) : void {
foreach ($fields as $name => $field) {
$query = $this->entityTypeManager
->getStorage($entityType)
->getQuery();
// Only load entities that has link to a local or MS blob
// storage file.
$conditionGroup = $query->orConditionGroup();
$conditionGroup
->condition($name, '%blob.core.windows.net%', 'LIKE');
$conditionGroup
->condition($name, '/sites/default/files%', 'LIKE');
$query->exists($name)
->condition($conditionGroup);
$query->accessCheck(FALSE);
$ids = $query->execute();

foreach ($ids as $id) {
$entity = $this->entityTypeManager->getStorage($entityType)
->load($id);

assert($entity instanceof TranslatableInterface);
foreach ($entity->getTranslationLanguages() as $language) {
$this->processFieldLinks($entity->getTranslation($language->getId()), $name);
}
}
}
}

/**
* Checks if the given link is valid.
*
* @param string $url
* The URL.
*
* @return bool
* TRUE if link is valid, FALSE if not.
*/
private function isValidLink(string $url) : bool {
$validLinks = [
'blob.core.windows.net',
'/sites/default/files/',
];

return (bool) array_filter($validLinks, fn ($link) => str_contains($url, $link));
}

/**
* Checks if the given remote file exists.
*
* @param string $url
* The url to check.
*
* @return bool
* TRUE if remote file exists, FALSE if not.
*/
private function remoteFileExists(string $url) : bool {
try {
$this->httpClient->request('HEAD', $url, ['timeout' => 15]);

return TRUE;
}
catch (GuzzleException) {
}
return FALSE;
}

/**
* Sanitize filenames inside text fields.
*
* @param \Drupal\Core\Entity\ContentEntityInterface $entity
* The entity translation to process.
* @param string $fieldName
* The field name.
*/
private function processFieldLinks(ContentEntityInterface $entity, string $fieldName) : void {
if (!$value = $entity->get($fieldName)->value) {
return;
}

$hasChanges = FALSE;
$dom = Html::load($value);
/** @var \DOMElement $node */
foreach ($dom->getElementsByTagName('a') as $node) {
// Nothing to do if link has no href.
if (!$href = $node->getAttribute('href')) {
continue;
}
$href = trim($href);

// Skip invalid links or links that does not result in 404 error.
if (!$this->isValidLink($href) || $this->remoteFileExists($href)) {
continue;
}
$this->io()->note(sprintf('Found a broken link "%s"', $href));
$basename = basename($href);

// Test sanitized filename and urldecoded+sanitized filename.
$candidates = [
$this->getSanitizedFilename($basename),
$this->getSanitizedFilename(urldecode($basename)),
];

$newUrl = NULL;
foreach ($candidates as $candidate) {
$sanitizedUrl = str_replace($basename, $candidate, $href);

if (!$this->remoteFileExists($sanitizedUrl)) {
continue;
}
$newUrl = $sanitizedUrl;
}

if (!$newUrl) {
$this->io()->warning(sprintf('Failed to process: "%s"', $href));

continue;
}
$hasChanges = TRUE;
$value = str_replace($href, $newUrl, $value);
}

if ($hasChanges) {
$entity->set($fieldName, $value);
$entity->save();
}
}

/**
* Transliterates all files embedded in text fields.
*
* @return int
* The exit code.
*/
#[Command(name: 'helfi:transliterate:fields')]
public function transliterateTextFields() : int {
$fieldTypes = [
'text_with_summary',
'text',
'text_long',
];
foreach ($fieldTypes as $fieldType) {
$fieldMap = $this->entityFieldManager->getFieldMapByFieldType($fieldType);

foreach ($fieldMap as $entityType => $fields) {
$this->processEntityType($entityType, $fields);
}
}
return DrushCommands::EXIT_SUCCESS;
}

/**
* Transliterates the existing filenames.
*
* @return int
* The exit code.
*/
#[Command(name: 'helfi:files:transliterate')]
#[Command(name: 'helfi:transliterate:files')]
public function transliterate() : int {
$ids = $this->entityTypeManager
->getStorage('file')
Expand All @@ -78,7 +243,7 @@ public function transliterate() : int {
continue;
}

$sanitizedFilename = $this->getSanitizedFilename($file);
$sanitizedFilename = $this->getSanitizedFilename($file->getFilename());

if ($sanitizedFilename === $file->getFilename()) {
continue;
Expand Down
7 changes: 1 addition & 6 deletions tests/src/Kernel/TransliterateFilesCommandsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,7 @@ public function testTransliterateFilesCommand() : void {
'replacement_character' => '_',
])->save();

$command = new TransliterateFilesCommands(
$this->container->get('stream_wrapper_manager'),
$this->container->get('event_dispatcher'),
$this->container->get('entity_type.manager'),
$fileSystem,
);
$command = TransliterateFilesCommands::create($this->container);
$command->transliterate();

foreach ($files as $expected) {
Expand Down
Loading