Skip to content

Commit

Permalink
move the storage preparation responsibility
Browse files Browse the repository at this point in the history
  • Loading branch information
WengerK committed Apr 25, 2024
1 parent 491ee94 commit a6a0410
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 28 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ $body = \Drupal::service('entity_to_text_tika.extractor.file_to_text')->fromFile
or for an advanced usage avoiding multiple calls to Tika:

```php
// Anywhere at least once in the code (Eg. module.install) in order to prepare the storage.
\Drupal::service('entity_to_text_tika.storage.local_file')->prepareStorage();

// Load the already OCR'ed file if possible to avoid unecessary calls to Tika.
$body = \Drupal::service('entity_to_text_tika.storage.local_file')->load($file, 'eng+fra');

Expand Down
24 changes: 13 additions & 11 deletions modules/entity_to_text_tika/src/Commands/OcrWarmupCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,20 @@ public function __construct(EntityTypeManagerInterface $entity_type_manager, Fil
* @usage drush e2t:t:w --fid=2
* Warmup the file with FID 2.
*/
public function warmup(array $options = [
'fid' => NULL,
'filemime' => [
'application/pdf', 'image/jpeg', 'image/png', 'image/tiff',
'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
public function warmup(
array $options = [
'fid' => NULL,
'filemime' => [
'application/pdf', 'image/jpeg', 'image/png', 'image/tiff',
'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
],
'stop-on-failure' => FALSE,
'force' => FALSE,
'no-progress' => FALSE,
'dry-run' => FALSE,
],
'stop-on-failure' => FALSE,
'force' => FALSE,
'no-progress' => FALSE,
'dry-run' => FALSE,
]): void {
): void {
$fid = $options['fid'];
$filemime = (array) $options['filemime'];
$stop_on_failure = (bool) $options['stop-on-failure'];
Expand Down
6 changes: 2 additions & 4 deletions modules/entity_to_text_tika/src/Storage/LocalFileStorage.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ public function load(File $file, string $langcode = 'eng'): ?string {
* {@inheritdoc}
*/
public function save(File $file, string $content, string $langcode = 'eng'): string {
$this->prepareDestination();

$fullpath = $this->getFullPath($file, $langcode);
file_put_contents($fullpath, $content);
return $fullpath;
Expand Down Expand Up @@ -98,9 +96,9 @@ private function getFullPath(File $file, string $langcode = 'eng'): string {
}

/**
* Ensure the destination directory is ready to use.
* {@inheritdoc}
*/
private function prepareDestination(): void {
public function prepareStorage(): void {
$dest = self::DESTINATION;
$this->fileSystem->prepareDirectory($dest, FileSystemInterface::CREATE_DIRECTORY);
}
Expand Down
5 changes: 5 additions & 0 deletions modules/entity_to_text_tika/src/Storage/StorageInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,9 @@ public function load(File $file, string $langcode = 'eng'): ?string;
*/
public function save(File $file, string $content, string $langcode = 'eng'): string;

/**
* Ensure the storage is ready to store OCR text values.
*/
public function prepareStorage(): void;

}
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,6 @@ public function testSave(): void {
->willReturn('/tmp')
->shouldBeCalled();

$this->fileSystem->prepareDirectory('private://entity-to-text/ocr', FileSystemInterface::CREATE_DIRECTORY)
->shouldBeCalled();

self::assertEquals('/tmp/199-test.pdf.en.ocr.txt', $this->localFileStorage->save($file->reveal(), 'lorem ipsum', 'en'));
self::assertFileExists('/tmp/199-test.pdf.en.ocr.txt');
self::assertEquals('lorem ipsum', file_get_contents('/tmp/199-test.pdf.en.ocr.txt'));
Expand All @@ -141,9 +138,6 @@ public function testSaveInvalidScheme(): void {
->willReturn('test.pdf')
->shouldBeCalled();

$this->fileSystem->prepareDirectory('private://entity-to-text/ocr', FileSystemInterface::CREATE_DIRECTORY)
->shouldBeCalled();

$this->streamWrapperManager->isValidScheme('private')
->willReturn(FALSE)
->shouldBeCalled();
Expand All @@ -170,9 +164,6 @@ public function testSaveInvalidRealpath(): void {
->willReturn('test.pdf')
->shouldBeCalled();

$this->fileSystem->prepareDirectory('private://entity-to-text/ocr', FileSystemInterface::CREATE_DIRECTORY)
->shouldBeCalled();

$this->streamWrapperManager->isValidScheme('private')
->willReturn(TRUE)
->shouldBeCalled();
Expand Down Expand Up @@ -206,7 +197,6 @@ public function testloadInvalidScheme(): void {
->shouldBeCalled();

$this->fileSystem->realpath(Argument::any())->shouldNotBeCalled();
$this->fileSystem->prepareDirectory(Argument::any())->shouldNotBeCalled()->shouldNotBeCalled();

$this->expectException(\RuntimeException::class);
$this->expectExceptionMessage('The destination path is not a valid stream wrapper');
Expand Down Expand Up @@ -237,9 +227,6 @@ public function testloadInvalidRealpath(): void {
->willReturn(FALSE)
->shouldBeCalled();

$this->fileSystem->prepareDirectory('private://entity-to-text/ocr', FileSystemInterface::CREATE_DIRECTORY)
->shouldBeCalled();

$this->expectException(\RuntimeException::class);
$this->expectExceptionMessage('The resolved realpath from uri "private://entity-to-text/ocr" is not a valid directory.');
self::assertEquals('/tmp/199-test.pdf.en.ocr.txt', $this->localFileStorage->save($file->reveal(), 'lorem ipsum', 'en'));
Expand Down

0 comments on commit a6a0410

Please sign in to comment.