diff --git a/EMS/helpers/src/File/File.php b/EMS/helpers/src/File/File.php index ee9aa2f51..6145c2282 100644 --- a/EMS/helpers/src/File/File.php +++ b/EMS/helpers/src/File/File.php @@ -10,14 +10,16 @@ class File { public string $name; + public string $extension; public string $mimeType; public int $size; public const DEFAULT_CHUNK_SIZE = 8 * 1024 * 1024; - private function __construct(private readonly \SplFileInfo $file) + public function __construct(private readonly \SplFileInfo $file) { $this->name = $this->file->getFilename(); + $this->extension = $this->file->getExtension(); $this->size = Type::integer($this->file->getSize()); $this->mimeType = MimeTypes::getDefault()->guessMimeType($file->getPathname()) ?? 'application/octet-stream'; } diff --git a/elasticms-cli/src/Client/Audit/Report.php b/elasticms-cli/src/Client/Audit/Report.php index 1cbf4466a..9cbe804a0 100644 --- a/elasticms-cli/src/Client/Audit/Report.php +++ b/elasticms-cli/src/Client/Audit/Report.php @@ -5,12 +5,10 @@ namespace App\CLI\Client\Audit; use App\CLI\Client\HttpClient\UrlReport; +use App\CLI\Client\Report\AbstractReport; use App\CLI\Client\WebToElasticms\Helper\Url; -use EMS\CommonBundle\Common\SpreadsheetGeneratorService; -use EMS\CommonBundle\Contracts\SpreadsheetGeneratorServiceInterface; -use Symfony\Component\HttpFoundation\HeaderUtils; -class Report +class Report extends AbstractReport { /** @var string[][] */ private array $accessibilityErrors = [['URL', 'WCAG2AA', 'Accessibility\'s score']]; @@ -22,50 +20,6 @@ class Report private array $ignoredLinks = [['URL', 'Error message', 'Referrers']]; /** @var string[][] */ private array $warnings = [['URL', 'Warning message', 'Referrer']]; - private readonly SpreadsheetGeneratorService $spreadsheetGeneratorService; - - public function __construct() - { - $this->spreadsheetGeneratorService = new SpreadsheetGeneratorService(); - } - - public function generateXslxReport(): string - { - $config = [ - SpreadsheetGeneratorServiceInterface::CONTENT_DISPOSITION => HeaderUtils::DISPOSITION_ATTACHMENT, - SpreadsheetGeneratorServiceInterface::WRITER => SpreadsheetGeneratorServiceInterface::XLSX_WRITER, - SpreadsheetGeneratorServiceInterface::CONTENT_FILENAME => 'Audit-Report.xlsx', - SpreadsheetGeneratorServiceInterface::SHEETS => [ - [ - 'name' => 'Broken links', - 'rows' => \array_values($this->brokenLinks), - ], - [ - 'name' => 'Ignored links', - 'rows' => \array_values($this->ignoredLinks), - ], - [ - 'name' => 'Warnings', - 'rows' => \array_values($this->warnings), - ], - [ - 'name' => 'Accessibility', - 'rows' => \array_values($this->accessibilityErrors), - ], - [ - 'name' => 'Security', - 'rows' => \array_values($this->securityErrors), - ], - ], - ]; - $tmpFilename = \tempnam(\sys_get_temp_dir(), 'WebReport'); - if (!\is_string($tmpFilename)) { - throw new \RuntimeException('Not able to generate a temporary filename'); - } - $this->spreadsheetGeneratorService->generateSpreadsheetFile($config, $tmpFilename); - - return $tmpFilename; - } public function addAccessibilityError(string $url, int $errorCount, ?float $score): void { @@ -211,4 +165,33 @@ public function setIgnoredLinks(array $ignoredLinks): void { $this->ignoredLinks = $ignoredLinks; } + + /** + * @return array{array{name: string, rows: string[][]}} + */ + protected function getSheets(): array + { + return [ + [ + 'name' => 'Broken links', + 'rows' => \array_values($this->brokenLinks), + ], + [ + 'name' => 'Ignored links', + 'rows' => \array_values($this->ignoredLinks), + ], + [ + 'name' => 'Warnings', + 'rows' => \array_values($this->warnings), + ], + [ + 'name' => 'Accessibility', + 'rows' => \array_values($this->accessibilityErrors), + ], + [ + 'name' => 'Security', + 'rows' => \array_values($this->securityErrors), + ], + ]; + } } diff --git a/elasticms-cli/src/Client/File/Report.php b/elasticms-cli/src/Client/File/Report.php new file mode 100644 index 000000000..b4f51d9f7 --- /dev/null +++ b/elasticms-cli/src/Client/File/Report.php @@ -0,0 +1,43 @@ +warnings[] = [ + $type, + $filename, + $message, + ]; + } + + /** + * @return string[][] + */ + public function getWarnings(): array + { + return $this->warnings; + } + + /** + * @return array{array{name: string, rows: string[][]}} + */ + protected function getSheets(): array + { + return [ + [ + 'name' => 'Warnings', + 'rows' => \array_values($this->warnings), + ], + ]; + } +} diff --git a/elasticms-cli/src/Client/Report/AbstractReport.php b/elasticms-cli/src/Client/Report/AbstractReport.php new file mode 100644 index 000000000..2b2dbfc25 --- /dev/null +++ b/elasticms-cli/src/Client/Report/AbstractReport.php @@ -0,0 +1,41 @@ +spreadsheetGeneratorService = new SpreadsheetGeneratorService(); + } + + public function generateXslxReport(): string + { + $config = [ + SpreadsheetGeneratorServiceInterface::CONTENT_DISPOSITION => HeaderUtils::DISPOSITION_ATTACHMENT, + SpreadsheetGeneratorServiceInterface::WRITER => SpreadsheetGeneratorServiceInterface::XLSX_WRITER, + SpreadsheetGeneratorServiceInterface::CONTENT_FILENAME => 'Audit-Report.xlsx', + SpreadsheetGeneratorServiceInterface::SHEETS => $this->getSheets(), + ]; + $tmpFilename = \tempnam(\sys_get_temp_dir(), 'Audit-Report-').'.xlsx'; + if (!\is_string($tmpFilename)) { + throw new \RuntimeException('Not able to generate a temporary filename'); + } + $this->spreadsheetGeneratorService->generateSpreadsheetFile($config, $tmpFilename); + + return $tmpFilename; + } + + /** + * @return array{array{name: string, rows: string[][]}} + */ + abstract protected function getSheets(): array; +} diff --git a/elasticms-cli/src/Client/WebToElasticms/Config/ConfigManager.php b/elasticms-cli/src/Client/WebToElasticms/Config/ConfigManager.php index c5d10781d..5ab27e63d 100644 --- a/elasticms-cli/src/Client/WebToElasticms/Config/ConfigManager.php +++ b/elasticms-cli/src/Client/WebToElasticms/Config/ConfigManager.php @@ -48,6 +48,8 @@ class ConfigManager /** @var string[] */ private array $validClasses = []; /** @var string[] */ + private array $styleValidTags = []; + /** @var string[] */ private array $locales = []; /** @var string[] */ private array $linkToClean = []; @@ -264,6 +266,22 @@ public function setValidClasses(array $validClasses): void $this->validClasses = $validClasses; } + /** + * @return string[] + */ + public function getStyleValidTags(): array + { + return $this->styleValidTags; + } + + /** + * @param string[] $styleValidTags + */ + public function setStyleValidTags(array $styleValidTags): void + { + $this->styleValidTags = $styleValidTags; + } + public function findInDocuments(Url $url): ?string { foreach ($this->documents as $document) { @@ -435,6 +453,11 @@ public function getExpressionLanguage(): ExpressionLanguage fn ($arguments, $pattern, $str, $limit = -1, $flags = PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) => (null === $pattern || null === $str) ? null : \preg_split($pattern, (string) $str, $limit, $flags) ); + $this->expressionLanguage->register('match', + fn ($pattern, $str, $flags = 0) => \sprintf('((null === %1$s || null === %2$s) ? null : \\preg_match(%1$s, %2$s, %3$d))', $pattern, $str, $flags), + fn ($arguments, $pattern, $str, $flags = 0) => (null === $pattern || null === $str) ? null : $this->matches($pattern, (string) $str, $flags) + ); + $this->expressionLanguage->register('datalinks', fn ($value, $type) => \sprintf('((null === %1$s || null === %2$s) ? null : (is_array($value) ? \\$this->findDataLinksArray(%1$s, %2$s): $this->findDataLinkString(%1$s, %2$s)))', \strval($value), $type), fn ($arguments, $value, $type) => (null === $value || null === $type) ? null : (\is_array($value) ? $this->findDataLinksArray($value, $type) : $this->findDataLinkString($value, $type)) @@ -458,6 +481,16 @@ public function getExpressionLanguage(): ExpressionLanguage return $this->expressionLanguage; } + /** + * @return string[] + */ + public function matches(string $pattern, string $str, int $flags): array + { + \preg_match_all($pattern, $str, $matches, $flags); + + return $matches['matches'] ?? $matches[0]; + } + public function getHashResourcesField(): string { return $this->hashResourcesField; diff --git a/elasticms-cli/src/Client/WebToElasticms/Extract/Html.php b/elasticms-cli/src/Client/WebToElasticms/Extract/Html.php index 4ab381edc..ec1dbbfe6 100644 --- a/elasticms-cli/src/Client/WebToElasticms/Extract/Html.php +++ b/elasticms-cli/src/Client/WebToElasticms/Extract/Html.php @@ -124,7 +124,7 @@ private function applyFilters(WebResource $resource, Crawler $content, Extractor $filter = new InternalLink($this->config, $rapport, $resource->getUrl()); break; case StyleCleaner::TYPE: - $filter = new StyleCleaner(); + $filter = new StyleCleaner($this->config); break; case ClassCleaner::TYPE: $filter = new ClassCleaner($this->config); diff --git a/elasticms-cli/src/Client/WebToElasticms/Filter/Html/StyleCleaner.php b/elasticms-cli/src/Client/WebToElasticms/Filter/Html/StyleCleaner.php index e14f0fb9d..87e6a71e1 100644 --- a/elasticms-cli/src/Client/WebToElasticms/Filter/Html/StyleCleaner.php +++ b/elasticms-cli/src/Client/WebToElasticms/Filter/Html/StyleCleaner.php @@ -4,6 +4,7 @@ namespace App\CLI\Client\WebToElasticms\Filter\Html; +use App\CLI\Client\WebToElasticms\Config\ConfigManager; use App\CLI\Client\WebToElasticms\Config\WebResource; use Symfony\Component\DomCrawler\Crawler; @@ -11,12 +12,21 @@ class StyleCleaner implements HtmlInterface { final public const TYPE = 'style-cleaner'; + public function __construct(private readonly ConfigManager $config) + { + } + public function process(WebResource $resource, Crawler $content): void { foreach ($content->filter('[style]') as $item) { if (!$item instanceof \DOMElement) { throw new \RuntimeException('Unexpected non DOMElement object'); } + + if (\in_array($item->nodeName, $this->config->getStyleValidTags())) { + continue; + } + $item->removeAttribute('style'); } } diff --git a/elasticms-cli/src/Command/File/AuditFileCommand.php b/elasticms-cli/src/Command/File/AuditFileCommand.php new file mode 100644 index 000000000..11bf08ad1 --- /dev/null +++ b/elasticms-cli/src/Command/File/AuditFileCommand.php @@ -0,0 +1,91 @@ +mimeTypes = new MimeTypes(); + $this->report = new Report(); + parent::__construct(); + } + + protected function configure(): void + { + $this + ->setDescription('Audit files in a folder structure') + ->addArgument( + self::ARG_FOLDER, + InputArgument::REQUIRED, + 'Path of the folder structure' + ); + } + + protected function initialize(InputInterface $input, OutputInterface $output): void + { + parent::initialize($input, $output); + $this->logger = new ConsoleLogger($output); + $this->folder = $this->getArgumentString(self::ARG_FOLDER); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $this->io->title(\sprintf('Audit files in %s', $this->folder)); + $finder = new Finder(); + $finder->files()->in($this->folder); + + if (!$finder->hasResults()) { + throw new \RuntimeException('No files found!'); + } + $this->io->comment(\sprintf('%d files located', $finder->count())); + $progressBar = $this->io->createProgressBar($finder->count()); + foreach ($finder as $file) { + $pathInStructure = \substr($file->getPathname(), \strlen($this->folder)); + $info = new File($file); + $extension = \strtolower($info->extension); + if ($extension !== $info->extension) { + $this->log(self::UPPERCASE_EXTENSION, $pathInStructure, \sprintf('The extension %s contains uppercase', $info->extension)); + } + if (!\in_array($extension, $this->mimeTypes->getExtensions($info->mimeType))) { + $this->log(self::EXTENSION_MISMATCH, $pathInStructure, \sprintf('The extension %s mismatch with the mime type %s', $info->extension, $info->mimeType)); + } + $progressBar->advance(); + } + + $progressBar->finish(); + $this->io->newLine(); + $this->io->writeln(\sprintf('Audit report: %s', $this->report->generateXslxReport())); + + return self::EXECUTE_SUCCESS; + } + + private function log(string $type, string $filename, string $message): void + { + $this->report->addWarning($type, $filename, $message); + $this->logger->warning(\sprintf('%s: %s', $filename, $message)); + } +} diff --git a/elasticms-cli/src/Commands.php b/elasticms-cli/src/Commands.php index ed23eecac..da5760bdf 100644 --- a/elasticms-cli/src/Commands.php +++ b/elasticms-cli/src/Commands.php @@ -8,6 +8,7 @@ class Commands { final public const WEB_MIGRATION = 'emscli:web:migrate'; final public const APPLE_PHOTOS_MIGRATION = 'emscli:apple-photos:migrate'; + final public const FILE_AUDIT = 'emscli:file:audit'; final public const WEB_AUDIT = 'emscli:web:audit'; final public const DOCUMENTS_UPDATE = 'emscli:documents:update'; final public const MEDIA_LIBRARY_SYNC = 'emscli:media-library:synchronize';