Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cli/file-structure): improve push command (save_hash_file, chunk size) #1076

Merged
merged 11 commits into from
Nov 25, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use EMS\CommonBundle\Contracts\File\FileManagerInterface;
use EMS\CommonBundle\Storage\Archive;
use EMS\CommonBundle\Storage\StorageManager;
use EMS\Helpers\File\File;
use EMS\Helpers\Html\MimeTypes;
use EMS\Helpers\Standard\Json;
use Symfony\Component\Console\Input\InputArgument;
Expand All @@ -22,8 +23,16 @@ class FileStructurePushCommand extends AbstractCommand
protected static $defaultName = Commands::FILE_STRUCTURE_PUSH;
private const ARGUMENT_FOLDER = 'folder';
private const OPTION_ADMIN = 'admin';
private const OPTION_QUIET = 'quiet';
private const OPTION_CHUNK_SIZE = 'chunk-size';
private const OPTION_QUIET_SHORTCUT = 'q';
private const OPTION_SAVE_HASH_FILENAME = 'save-hash-filename';
private const DEFAULT_SAVE_HASH_FILE = '.hash';
private string $folderPath;
private FileManagerInterface $fileManager;
private bool $quiet;
private int $chunkSize;
private string $saveHashFilename;

public function __construct(
private readonly AdminHelper $adminHelper,
Expand All @@ -39,6 +48,9 @@ protected function configure(): void
->setDescription('Push an EMS Archive file structure into a EMS Admin storage services (via the API)')
->addArgument(self::ARGUMENT_FOLDER, InputArgument::REQUIRED, 'Source folder')
->addOption(self::OPTION_ADMIN, null, InputOption::VALUE_NONE, 'Push to admin')
->addOption(self::OPTION_QUIET, self::OPTION_QUIET_SHORTCUT, InputOption::VALUE_NONE, 'only displays the archive hash (if succeed)')
->addOption(self::OPTION_CHUNK_SIZE, null, InputOption::VALUE_OPTIONAL, 'Set the heads method chunk size', FileManagerInterface::HEADS_CHUNK_SIZE)
->addOption(self::OPTION_SAVE_HASH_FILENAME, null, InputOption::VALUE_OPTIONAL, 'File where to save the structure hash within the source folder (used to avoid head request). Delete the file to force recheck all files.', self::DEFAULT_SAVE_HASH_FILE)
;
}

Expand All @@ -50,30 +62,69 @@ protected function initialize(InputInterface $input, OutputInterface $output): v
true => $this->adminHelper->getCoreApi()->file(),
false => $this->storageManager,
};
$this->quiet = $this->getOptionBool(self::OPTION_QUIET);
$this->chunkSize = $this->getOptionInt(self::OPTION_CHUNK_SIZE);
$this->saveHashFilename = $this->getOptionString(self::OPTION_SAVE_HASH_FILENAME);
}

protected function execute(InputInterface $input, OutputInterface $output): int
{
$this->io->title('EMS - File structure - Push');
if (!$this->quiet) {
$this->io->title('EMS - File structure - Push');
}
$algo = $this->fileManager->getHashAlgo();

$this->io->section('Building archive');
if (!$this->quiet) {
$this->io->section('Building archive');
}
$archive = Archive::fromDirectory($this->folderPath, $algo);
$previousArchive = null;
$hashFilename = \implode(DIRECTORY_SEPARATOR, [$this->folderPath, $this->saveHashFilename]);
if (\file_exists($hashFilename)) {
$previousArchive = Archive::fromStructure($this->fileManager->getContents(File::fromFilename($hashFilename)->getContents()), $algo);
}

$this->io->section('Pushing archive');
if (!$this->quiet) {
$this->io->section('Pushing archive');
}
$progressBar = $this->io->createProgressBar($archive->getCount());
foreach ($this->fileManager->heads(...$archive->getHashes()) as $hash) {
$failedCount = 0;
if ($this->chunkSize < 1) {
throw new \RuntimeException(\sprintf('Chunk size must greater than 0, %d given', $this->chunkSize));
}
$this->fileManager->setHeadChunkSize($this->chunkSize);
foreach ($this->fileManager->heads(...$archive->getHashes($previousArchive)) as $hash) {
if (true === $hash) {
$progressBar->advance();
continue;
}
$file = $archive->getFirstFileByHash($hash);
$uploadHash = $this->fileManager->uploadFile($this->folderPath.DIRECTORY_SEPARATOR.$file->filename);
if ($uploadHash !== $hash) {
throw new \RuntimeException(\sprintf('Mismatched between the computed hash (%s) and the hash of the uploaded file (%s) for the file %s', $hash, $uploadHash, $file->filename));
try {
$uploadHash = $this->fileManager->uploadFile($this->folderPath.DIRECTORY_SEPARATOR.$file->filename);
if ($uploadHash !== $hash) {
throw new \RuntimeException(\sprintf('Mismatched between the computed hash (%s) and the hash of the uploaded file (%s) for the file %s', $hash, $uploadHash, $file->filename));
}
} catch (\Throwable) {
$this->io->error(\sprintf('Error while saving the file %s', $file->filename));
++$failedCount;
}
$progressBar->advance();
}
$progressBar->finish();
$hash = $this->fileManager->uploadContents(Json::encode($archive), 'archive.json', MimeTypes::APPLICATION_JSON->value);
$this->io->newLine();
$this->io->success(\sprintf('Archive %s have been uploaded with the directory content of %s', $hash, $this->folderPath));
if (0 !== $failedCount) {
$this->io->error(\sprintf('%d files faced an issue while uploading, please retry.', $failedCount));

return self::EXECUTE_ERROR;
}
\file_put_contents($hashFilename, $hash);

if ($this->quiet) {
$this->io->write($hash);
} else {
$this->io->success(\sprintf('Archive %s have been uploaded with the directory content of %s', $hash, $this->folderPath));
}

return self::EXECUTE_SUCCESS;
}
Expand Down
15 changes: 14 additions & 1 deletion EMS/common-bundle/src/Common/CoreApi/Endpoint/File/File.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@

final class File implements FileInterface
{
/**
* @var int<1, max>
*/
private int $headChunkSize = self::HEADS_CHUNK_SIZE;

public function __construct(private readonly Client $client, private readonly StorageManager $storageManager)
{
}
Expand Down Expand Up @@ -174,7 +179,7 @@ public function headHash(string $hash): bool
public function heads(string ...$fileHashes): \Traversable
{
$uniqueFileHashes = \array_unique($fileHashes);
$pagedHashes = \array_chunk($uniqueFileHashes, self::HEADS_CHUNK_SIZE, true);
$pagedHashes = \array_chunk($uniqueFileHashes, $this->headChunkSize, true);
foreach ($pagedHashes as $hashes) {
foreach ($this->client->post('/api/file/heads', $hashes)->getData() as $hash) {
yield $hash;
Expand All @@ -191,4 +196,12 @@ public function getStream(string $hash): StreamInterface
{
return $this->client->download($this->downloadLink($hash));
}

/**
* @param int<1, max> $chunkSize
*/
public function setHeadChunkSize(int $chunkSize): void
{
$this->headChunkSize = $chunkSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

interface FileManagerInterface
{
public const HEADS_CHUNK_SIZE = 1000;
public const HEADS_CHUNK_SIZE = 256;

public function downloadFile(string $hash): string;

Expand All @@ -19,11 +19,16 @@ public function getHashAlgo(): string;
public function getStream(string $hash): StreamInterface;

/**
* @return \Traversable<int, string>
* @return \Traversable<int, string|true>
*/
public function heads(string ...$fileHashes): \Traversable;

public function uploadContents(string $contents, string $filename, string $mimeType): string;

public function uploadFile(string $realPath, ?string $mimeType = null, ?string $filename = null, ?callable $callback = null): string;

/**
* @param int<1, max> $chunkSize
*/
public function setHeadChunkSize(int $chunkSize): void;
}
16 changes: 15 additions & 1 deletion EMS/common-bundle/src/Storage/Archive.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ public static function fromStructure(string $structure, string $hashAlgo): self
/**
* @return iterable<string>
*/
public function getHashes(): iterable
public function getHashes(Archive $previousArchive = null): iterable
{
foreach ($this->files as $file) {
if (null !== $previousArchive && $previousArchive->containsByHash($file->hash)) {
continue;
}
yield $file->hash;
}
}
Expand Down Expand Up @@ -143,4 +146,15 @@ private function resolveFile(array $file): array

return $resolved;
}

private function containsByHash(string $hash): bool
{
foreach ($this->files as $file) {
if ($hash === $file->hash) {
return true;
}
}

return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public function head(string $hash): bool

public function heads(string ...$hashes): array
{
return \array_filter($hashes, fn (string $hash) => !$this->head($hash));
return \array_values(\array_map(fn (string $hash) => $this->head($hash) ? true : $hash, $hashes));
}

public function create(string $hash, string $filename): bool
Expand Down
2 changes: 1 addition & 1 deletion EMS/common-bundle/src/Storage/Service/EntityStorage.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public function head(string $hash): bool

public function heads(string ...$hashes): array
{
return \array_filter($hashes, fn (string $hash) => !$this->head($hash));
return \array_values(\array_map(fn (string $hash) => $this->head($hash) ? true : $hash, $hashes));
}

public function getSize(string $hash): int
Expand Down
4 changes: 3 additions & 1 deletion EMS/common-bundle/src/Storage/Service/S3Storage.php
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,9 @@ public function heads(string ...$hashes): array
yield $client->headObjectAsync([
'Bucket' => $this->bucket,
'Key' => \implode('/', [\substr($hash, 0, 3), $hash]),
])->then(onRejected: function (AwsException $exception) use (&$notFound, $hash) {
])->then(onFulfilled: function () use (&$notFound) {
$notFound[] = true;
}, onRejected: function (AwsException $exception) use (&$notFound, $hash) {
if ('NotFound' === $exception->getAwsErrorCode()) {
$notFound[] = $hash;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ interface StorageInterface
public function head(string $hash): bool;

/**
* @return string[]
* @return array<int, string|true>
*/
public function heads(string ...$hashes): array;

Expand Down
14 changes: 13 additions & 1 deletion EMS/common-bundle/src/Storage/StorageManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ class StorageManager implements FileManagerInterface
private array $adapters = [];
/** @var StorageFactoryInterface[] */
private array $factories = [];
/**
* @var int<1, max>
*/
private int $headChunkSize = FileManagerInterface::HEADS_CHUNK_SIZE;

/**
* @param iterable<StorageFactoryInterface> $factories
Expand Down Expand Up @@ -94,7 +98,7 @@ public function head(string $hash): bool
public function heads(string ...$fileHashes): \Traversable
{
$uniqueFileHashes = \array_unique($fileHashes);
$pagedHashes = \array_chunk($uniqueFileHashes, self::HEADS_CHUNK_SIZE, true);
$pagedHashes = \array_chunk($uniqueFileHashes, $this->headChunkSize, true);

foreach ($pagedHashes as $hashes) {
foreach ($this->adapters as $adapter) {
Expand Down Expand Up @@ -680,4 +684,12 @@ public function downloadFile(string $hash): string
{
return $this->getFile($hash)->getFilename();
}

/**
* @param int<1, max> $chunkSize
*/
public function setHeadChunkSize(int $chunkSize): void
{
$this->headChunkSize = $chunkSize;
}
}
2 changes: 1 addition & 1 deletion EMS/core-bundle/src/Service/FileService.php
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ public function getImages(): iterable
}

/**
* @return \Traversable<int, string>
* @return \Traversable<int, string|true>
*/
public function heads(string ...$hashes): \Traversable
{
Expand Down
Loading