From da8f18bd3bf68aa844e28c6e13acb980840bcf84 Mon Sep 17 00:00:00 2001 From: Robin Windey Date: Tue, 3 Oct 2023 17:30:56 +0200 Subject: [PATCH 1/2] Skip notifications when OCR mode is "skip file" * Fixes #232 * Move logic to OcrService --- lib/BackgroundJobs/ProcessFileJob.php | 128 +----- lib/Exception/OcrResultEmptyException.php | 32 ++ .../Version2404Date20220903071748.php | 2 +- lib/OcrProcessors/OcrMyPdfBasedProcessor.php | 3 +- lib/Service/IOcrService.php | 12 +- lib/Service/OcrService.php | 155 ++++++- .../Notification/NotificationTest.php | 79 +--- .../BackgroundJobs/ProcessFileJobTest.php | 370 +-------------- .../OcrProcessors/PdfOcrProcessorTest.php | 3 +- tests/Unit/Service/OcrServiceTest.php | 420 +++++++++++++++++- tests/psalm-baseline.xml | 20 +- 11 files changed, 636 insertions(+), 588 deletions(-) create mode 100644 lib/Exception/OcrResultEmptyException.php diff --git a/lib/BackgroundJobs/ProcessFileJob.php b/lib/BackgroundJobs/ProcessFileJob.php index fb3dab3..6fe4c36 100644 --- a/lib/BackgroundJobs/ProcessFileJob.php +++ b/lib/BackgroundJobs/ProcessFileJob.php @@ -27,22 +27,10 @@ namespace OCA\WorkflowOcr\BackgroundJobs; use \OCP\Files\File; -use OC\User\NoUserException; -use OCA\WorkflowOcr\Helper\IProcessingFileAccessor; use OCA\WorkflowOcr\Model\WorkflowSettings; -use OCA\WorkflowOcr\Service\IEventService; use OCA\WorkflowOcr\Service\INotificationService; use OCA\WorkflowOcr\Service\IOcrService; -use OCA\WorkflowOcr\Wrapper\IFilesystem; -use OCA\WorkflowOcr\Wrapper\IViewFactory; use OCP\AppFramework\Utility\ITimeFactory; -use OCP\Files\FileInfo; -use OCP\Files\IRootFolder; -use OCP\Files\Node; -use OCP\Files\NotFoundException; -use OCP\IUser; -use OCP\IUserManager; -use OCP\IUserSession; use Psr\Log\LoggerInterface; /** @@ -52,47 +40,19 @@ class ProcessFileJob extends \OCP\BackgroundJob\QueuedJob { /** @var LoggerInterface */ protected $logger; - /** @var IRootFolder */ - private $rootFolder; /** @var IOcrService */ private $ocrService; - /** @var IEventService */ - private $eventService; - /** @var IViewFactory */ - private $viewFactory; - /** @var IFilesystem */ - private $filesystem; - /** @var IUserManager */ - private $userManager; - /** @var IUserSession */ - private $userSession; - /** @var IProcessingFileAccessor */ - private $processingFileAccessor; /** @var INotificationService */ private $notificationService; public function __construct( LoggerInterface $logger, - IRootFolder $rootFolder, IOcrService $ocrService, - IEventService $eventService, - IViewFactory $viewFactory, - IFilesystem $filesystem, - IUserManager $userManager, - IUserSession $userSession, - IProcessingFileAccessor $processingFileAccessor, INotificationService $notificationService, ITimeFactory $timeFactory) { parent::__construct($timeFactory); $this->logger = $logger; - $this->rootFolder = $rootFolder; $this->ocrService = $ocrService; - $this->eventService = $eventService; - $this->viewFactory = $viewFactory; - $this->filesystem = $filesystem; - $this->userManager = $userManager; - $this->userSession = $userSession; - $this->processingFileAccessor = $processingFileAccessor; $this->notificationService = $notificationService; } @@ -104,13 +64,10 @@ protected function run($argument) : void { try { [$fileId, $uid, $settings] = $this->parseArguments($argument); - $this->initUserEnvironment($uid); - $this->processFile($fileId, $settings); + $this->ocrService->runOcrProcess($fileId, $uid, $settings); } catch (\Throwable $ex) { $this->logger->error($ex->getMessage(), ['exception' => $ex]); $this->notificationService->createErrorNotification($uid, 'An error occured while executing the OCR process ('.$ex->getMessage().'). Please have a look at your servers logfile for more details.'); - } finally { - $this->shutdownUserEnvironment(); } $this->logger->debug('ENDED -- Run ' . self::class . ' job. Argument: {argument}.', ['argument' => $argument]); @@ -135,87 +92,4 @@ private function parseArguments($argument) : array { $settings ]; } - - /** - * @param int $fileId The id of the file to be processed - * @param WorkflowSettings $settings The settings to be used for processing - */ - private function processFile(int $fileId, WorkflowSettings $settings) : void { - $node = $this->getNode($fileId); - - $ocrFile = $this->ocrService->ocrFile($node, $settings); - - $filePath = $node->getPath(); - $fileContent = $ocrFile->getFileContent(); - $originalFileExtension = $node->getExtension(); - $newFileExtension = $ocrFile->getFileExtension(); - - // Only create a new file version if the file OCR result was not empty #130 - if ($ocrFile->getRecognizedText() !== '') { - $newFilePath = $originalFileExtension === $newFileExtension ? - $filePath : - $filePath . ".pdf"; - - $this->createNewFileVersion($newFilePath, $fileContent, $fileId); - } - - $this->eventService->textRecognized($ocrFile, $node); - } - - private function getNode(int $fileId) : ?Node { - /** @var File[] */ - $nodeArr = $this->rootFolder->getById($fileId); - if (count($nodeArr) === 0) { - throw new NotFoundException('Could not process file with id \'' . $fileId . '\'. File was not found'); - } - - $node = array_shift($nodeArr); - - if (!$node instanceof Node || $node->getType() !== FileInfo::TYPE_FILE) { - throw new \InvalidArgumentException('Skipping process for file with id \'' . $fileId . '\'. It is not a file'); - } - - return $node; - } - - /** - * * @param string $userId The owners userId of the file to be processed - */ - private function initUserEnvironment(string $userId) : void { - /** @var IUser */ - $user = $this->userManager->get($userId); - if (!$user) { - throw new NoUserException("User with uid '$userId' was not found"); - } - - $this->userSession->setUser($user); - $this->filesystem->init($userId, '/' . $userId . '/files'); - } - - private function shutdownUserEnvironment() : void { - $this->userSession->setUser(null); - } - - /** - * @param string $filePath The filepath of the file to write - * @param string $ocrContent The new filecontent (which was OCR processed) - * @param int $fileId The id of the file to write. Used for locking. - */ - private function createNewFileVersion(string $filePath, string $ocrContent, int $fileId) : void { - $dirPath = dirname($filePath); - $filename = basename($filePath); - - $this->processingFileAccessor->setCurrentlyProcessedFileId($fileId); - - try { - $view = $this->viewFactory->create($dirPath); - // Create new file or file-version with OCR-file - // This will trigger 'postWrite' event which would normally - // add the file to the queue again but this is tackled - // by the processingFileAccessor. - $view->file_put_contents($filename, $ocrContent); - } finally { - $this->processingFileAccessor->setCurrentlyProcessedFileId(null); - } - } } diff --git a/lib/Exception/OcrResultEmptyException.php b/lib/Exception/OcrResultEmptyException.php new file mode 100644 index 0000000..73e2c0c --- /dev/null +++ b/lib/Exception/OcrResultEmptyException.php @@ -0,0 +1,32 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\Exception; + +use Exception; + +class OcrResultEmptyException extends Exception { + public function __construct(string $message) { + $this->message = $message; + } +} diff --git a/lib/Migration/Version2404Date20220903071748.php b/lib/Migration/Version2404Date20220903071748.php index a6c3859..a29f9b1 100644 --- a/lib/Migration/Version2404Date20220903071748.php +++ b/lib/Migration/Version2404Date20220903071748.php @@ -89,7 +89,7 @@ private function getDatasetsToMigrate() : array { $workflowSettings = json_decode($row['operation'], true); $foundMapping = false; $newLangArr = []; - if (!array_key_exists('languages', $workflowSettings)) { + if (!$workflowSettings || !array_key_exists('languages', $workflowSettings)) { continue; } $languagesArr = $workflowSettings['languages']; diff --git a/lib/OcrProcessors/OcrMyPdfBasedProcessor.php b/lib/OcrProcessors/OcrMyPdfBasedProcessor.php index 740f42a..1aa7cff 100644 --- a/lib/OcrProcessors/OcrMyPdfBasedProcessor.php +++ b/lib/OcrProcessors/OcrMyPdfBasedProcessor.php @@ -25,6 +25,7 @@ use Cocur\Chain\Chain; use OCA\WorkflowOcr\Exception\OcrNotPossibleException; +use OCA\WorkflowOcr\Exception\OcrResultEmptyException; use OCA\WorkflowOcr\Helper\ISidecarFileAccessor; use OCA\WorkflowOcr\Model\GlobalSettings; use OCA\WorkflowOcr\Model\WorkflowSettings; @@ -86,7 +87,7 @@ public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $ $ocrFileContent = $this->command->getOutput(); if (!$ocrFileContent) { - throw new OcrNotPossibleException('OCRmyPDF did not produce any output for file ' . $file->getPath()); + throw new OcrResultEmptyException('OCRmyPDF did not produce any output for file ' . $file->getPath()); } $recognizedText = $this->sidecarFileAccessor->getSidecarFileContent(); diff --git a/lib/Service/IOcrService.php b/lib/Service/IOcrService.php index 96d9f3f..10ee68c 100644 --- a/lib/Service/IOcrService.php +++ b/lib/Service/IOcrService.php @@ -27,19 +27,19 @@ namespace OCA\WorkflowOcr\Service; use OCA\WorkflowOcr\Model\WorkflowSettings; -use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult; -use OCP\Files\File; interface IOcrService { /** - * Processes OCR on the given file + * Processes OCR on the given file. Creates a new file version and emits appropriate events. * - * @param string $mimeType The mimetype of the file to be processed - * @param string $fileContent The file to be processed + * @param int $fileId The id if the file to be processed + * @param string $uid The id of the user who has access to this file * @param WorkflowSettings $settings The settings to be used for processing * * @throws \OCA\WorkflowOcr\Exception\OcrNotPossibleException * @throws \OCA\WorkflowOcr\Exception\OcrProcessorNotFoundException + * @throws \OCA\WorkflowOcr\Exception\OcrResultEmptyException + * @throws \InvalidArgumentException */ - public function ocrFile(File $file, WorkflowSettings $settings) : OcrProcessorResult; + public function runOcrProcess(int $fileId, string $uid, WorkflowSettings $settings) : void; } diff --git a/lib/Service/OcrService.php b/lib/Service/OcrService.php index 64790ff..e2ce5db 100644 --- a/lib/Service/OcrService.php +++ b/lib/Service/OcrService.php @@ -26,10 +26,21 @@ namespace OCA\WorkflowOcr\Service; +use OC\User\NoUserException; +use OCA\WorkflowOcr\Exception\OcrResultEmptyException; +use OCA\WorkflowOcr\Helper\IProcessingFileAccessor; use OCA\WorkflowOcr\Model\WorkflowSettings; use OCA\WorkflowOcr\OcrProcessors\IOcrProcessorFactory; -use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult; +use OCA\WorkflowOcr\Wrapper\IFilesystem; +use OCA\WorkflowOcr\Wrapper\IViewFactory; use OCP\Files\File; +use OCP\Files\FileInfo; +use OCP\Files\IRootFolder; +use OCP\Files\Node; +use OCP\Files\NotFoundException; +use OCP\IUser; +use OCP\IUserManager; +use OCP\IUserSession; use OCP\SystemTag\ISystemTagObjectMapper; use OCP\SystemTag\TagNotFoundException; use Psr\Log\LoggerInterface; @@ -44,22 +55,129 @@ class OcrService implements IOcrService { /** @var ISystemTagObjectMapper */ private $systemTagObjectMapper; + /** @var IUserManager */ + private $userManager; + + /** @var IFilesystem */ + private $filesystem; + + /** @var IUserSession */ + private $userSession; + + /** @var IRootFolder */ + private $rootFolder; + + /** @var IEventService */ + private $eventService; + + /** @var IViewFactory */ + private $viewFactory; + + /** @var IProcessingFileAccessor */ + private $processingFileAccessor; + /** @var LoggerInterface */ private $logger; - public function __construct(IOcrProcessorFactory $ocrProcessorFactory, IGlobalSettingsService $globalSettingsService, ISystemTagObjectMapper $systemTagObjectMapper, LoggerInterface $logger) { + public function __construct( + IOcrProcessorFactory $ocrProcessorFactory, + IGlobalSettingsService $globalSettingsService, + ISystemTagObjectMapper $systemTagObjectMapper, + IUserManager $userManager, + IFilesystem $filesystem, + IUserSession $userSession, + IRootFolder $rootFolder, + IEventService $eventService, + IViewFactory $viewFactory, + IProcessingFileAccessor $processingFileAccessor, + LoggerInterface $logger) { $this->ocrProcessorFactory = $ocrProcessorFactory; $this->globalSettingsService = $globalSettingsService; $this->systemTagObjectMapper = $systemTagObjectMapper; + $this->userManager = $userManager; + $this->filesystem = $filesystem; + $this->userSession = $userSession; + $this->rootFolder = $rootFolder; + $this->eventService = $eventService; + $this->viewFactory = $viewFactory; + $this->processingFileAccessor = $processingFileAccessor; $this->logger = $logger; } /** @inheritdoc */ - public function ocrFile(File $file, WorkflowSettings $settings) : OcrProcessorResult { - $ocrProcessor = $this->ocrProcessorFactory->create($file->getMimeType()); - $result = $ocrProcessor->ocrFile($file, $settings, $this->globalSettingsService->getGlobalSettings()); - $this->processTagsAfterSuccessfulOcr($file, $settings); - return $result; + public function runOcrProcess(int $fileId, string $uid, WorkflowSettings $settings) : void { + try { + $this->initUserEnvironment($uid); + + $file = $this->getNode($fileId); + $ocrProcessor = $this->ocrProcessorFactory->create($file->getMimeType()); + $globalSettings = $this->globalSettingsService->getGlobalSettings(); + + try { + $result = $ocrProcessor->ocrFile($file, $settings, $globalSettings); + } catch(OcrResultEmptyException $ex) { + // #232: it's okay to have an empty result if the file was skipped due to OCR mode + if ($settings->getOcrMode() === WorkflowSettings::OCR_MODE_SKIP_FILE) { + $this->logger->debug('Skipping empty OCR result for file with id {fileId} because OCR mode is set to \'skip file\'', ['fileId' => $fileId]); + return; + } + throw $ex; + } + + $this->processTagsAfterSuccessfulOcr($file, $settings); + + $filePath = $file->getPath(); + $fileContent = $result->getFileContent(); + $originalFileExtension = $file->getExtension(); + $newFileExtension = $result->getFileExtension(); + + // Only create a new file version if the file OCR result was not empty #130 + if ($result->getRecognizedText() !== '') { + $newFilePath = $originalFileExtension === $newFileExtension ? + $filePath : + $filePath . ".pdf"; + + $this->createNewFileVersion($newFilePath, $fileContent, $fileId); + } + + $this->eventService->textRecognized($result, $file); + } finally { + $this->shutdownUserEnvironment(); + } + } + + /** + * * @param string $uid The owners userId of the file to be processed + */ + private function initUserEnvironment(string $uid) : void { + /** @var IUser */ + $user = $this->userManager->get($uid); + if (!$user) { + throw new NoUserException("User with uid '$uid' was not found"); + } + + $this->userSession->setUser($user); + $this->filesystem->init($uid, '/' . $uid . '/files'); + } + + private function shutdownUserEnvironment() : void { + $this->userSession->setUser(null); + } + + private function getNode(int $fileId) : ?Node { + /** @var File[] */ + $nodeArr = $this->rootFolder->getById($fileId); + if (count($nodeArr) === 0) { + throw new NotFoundException('Could not process file with id \'' . $fileId . '\'. File was not found'); + } + + $node = array_shift($nodeArr); + + if (!$node instanceof Node || $node->getType() !== FileInfo::TYPE_FILE) { + throw new \InvalidArgumentException('Skipping process for file with id \'' . $fileId . '\'. It is not a file'); + } + + return $node; } private function processTagsAfterSuccessfulOcr(File $file, WorkflowSettings $settings) : void { @@ -84,4 +202,27 @@ private function processTagsAfterSuccessfulOcr(File $file, WorkflowSettings $set } } } + + /** + * @param string $filePath The filepath of the file to write + * @param string $ocrContent The new filecontent (which was OCR processed) + * @param int $fileId The id of the file to write. Used for locking. + */ + private function createNewFileVersion(string $filePath, string $ocrContent, int $fileId) : void { + $dirPath = dirname($filePath); + $filename = basename($filePath); + + $this->processingFileAccessor->setCurrentlyProcessedFileId($fileId); + + try { + $view = $this->viewFactory->create($dirPath); + // Create new file or file-version with OCR-file + // This will trigger 'postWrite' event which would normally + // add the file to the queue again but this is tackled + // by the processingFileAccessor. + $view->file_put_contents($filename, $ocrContent); + } finally { + $this->processingFileAccessor->setCurrentlyProcessedFileId(null); + } + } } diff --git a/tests/Integration/Notification/NotificationTest.php b/tests/Integration/Notification/NotificationTest.php index 70e277a..b991ec1 100644 --- a/tests/Integration/Notification/NotificationTest.php +++ b/tests/Integration/Notification/NotificationTest.php @@ -23,51 +23,23 @@ use OC\BackgroundJob\JobList; use OCA\WorkflowOcr\BackgroundJobs\ProcessFileJob; use OCA\WorkflowOcr\Exception\OcrNotPossibleException; -use OCA\WorkflowOcr\Helper\IProcessingFileAccessor; -use OCA\WorkflowOcr\Service\IEventService; use OCA\WorkflowOcr\Service\INotificationService; use OCA\WorkflowOcr\Service\IOcrService; use OCA\WorkflowOcr\Service\NotificationService; -use OCA\WorkflowOcr\Wrapper\IFilesystem; -use OCA\WorkflowOcr\Wrapper\IViewFactory; use OCP\AppFramework\Utility\ITimeFactory; use OCP\DB\QueryBuilder\IExpressionBuilder; use OCP\DB\QueryBuilder\IQueryBuilder; -use OCP\Files\File; -use OCP\Files\FileInfo; -use OCP\Files\IRootFolder; use OCP\IConfig; use OCP\IDBConnection; -use OCP\IUser; -use OCP\IUserManager; -use OCP\IUserSession; use PHPUnit\Framework\MockObject\MockObject; use PHPUnit\Framework\TestCase; use Psr\Log\LoggerInterface; class NotificationTest extends TestCase { - /** @var AppFake */ - private $appFake; /** @var LoggerInterface|MockObject */ private $logger; - /** @var IRootFolder|MockObject */ - private $rootFolder; /** @var IOcrService|MockObject */ private $ocrService; - /** @var IEventService|MockObject */ - private $eventService; - /** @var IViewFactory|MockObject */ - private $viewFactory; - /** @var IFilesystem|MockObject */ - private $filesystem; - /** @var IUserSession|MockObject */ - private $userSession; - /** @var IUserManager|MockObject */ - private $userManager; - /** @var IUser|MockObject */ - private $user; - /** @var IProcessingFileAccessor|MockObject */ - private $processingFileAccessor; /** @var INotificationService|MockObject */ private $notificationService; /** @var JobList */ @@ -84,34 +56,11 @@ protected function setUp() : void { $this->notificationService = new NotificationService(\OC::$server->get(\OCP\Notification\IManager::class)); $this->logger = $this->createMock(LoggerInterface::class); - $this->rootFolder = $this->createMock(IRootFolder::class); $this->ocrService = $this->createMock(IOcrService::class); - $this->eventService = $this->createMock(IEventService::class); - $this->viewFactory = $this->createMock(IViewFactory::class); - $this->filesystem = $this->createMock(IFilesystem::class); - $this->userSession = $this->createMock(IUserSession::class); - $this->processingFileAccessor = $this->createMock(IProcessingFileAccessor::class); - /** @var MockObject|IUserManager */ - $userManager = $this->createMock(IUserManager::class); - $user = $this->createMock(IUser::class); - $userManager->method('get') - ->withAnyParameters() - ->willReturn($user); - - $this->userManager = $userManager; - $this->user = $user; - $this->processFileJob = new ProcessFileJob( $this->logger, - $this->rootFolder, $this->ocrService, - $this->eventService, - $this->viewFactory, - $this->filesystem, - $this->userManager, - $this->userSession, - $this->processingFileAccessor, $this->notificationService, $this->createMock(ITimeFactory::class) ); @@ -158,13 +107,8 @@ protected function setUp() : void { } public function testBackgroundJobCreatesErrorNotificationIfOcrFailed() { - $fileMock = $this->createValidFileMock(); - $this->rootFolder->method('getById') - ->with(42) - ->willReturn([$fileMock]); - $this->ocrService->expects($this->once()) - ->method('ocrFile') + ->method('runOcrProcess') ->withAnyParameters() ->willThrowException(new OcrNotPossibleException('Some error')); $appFake = \OC::$server->get(AppFake::class); @@ -179,25 +123,4 @@ public function testBackgroundJobCreatesErrorNotificationIfOcrFailed() { $this->assertEquals('ocr_error', $notification->getSubject()); $this->assertEquals('An error occured while executing the OCR process (Some error). Please have a look at your servers logfile for more details.', $notification->getSubjectParameters()['message']); } - - /** - * @return File|MockObject - */ - private function createValidFileMock(string $mimeType = 'application/pdf', string $content = 'someFileContent', string $fileExtension = "pdf", string $path = "/admin/files/somefile.pdf") { - /** @var MockObject|File */ - $fileMock = $this->createMock(File::class); - $fileMock->method('getType') - ->willReturn(FileInfo::TYPE_FILE); - $fileMock->method('getMimeType') - ->willReturn($mimeType); - $fileMock->method('getContent') - ->willReturn($content); - $fileMock->method('getId') - ->willReturn(42); - $fileMock->method('getExtension') - ->willReturn($fileExtension); - $fileMock->method('getPath') - ->willReturn($path); - return $fileMock; - } } diff --git a/tests/Unit/BackgroundJobs/ProcessFileJobTest.php b/tests/Unit/BackgroundJobs/ProcessFileJobTest.php index acf20c0..d021291 100644 --- a/tests/Unit/BackgroundJobs/ProcessFileJobTest.php +++ b/tests/Unit/BackgroundJobs/ProcessFileJobTest.php @@ -25,30 +25,18 @@ use Exception; use OC\BackgroundJob\JobList; -use OC\User\NoUserException; use OCA\WorkflowOcr\BackgroundJobs\ProcessFileJob; use OCA\WorkflowOcr\Exception\OcrNotPossibleException; use OCA\WorkflowOcr\Exception\OcrProcessorNotFoundException; -use OCA\WorkflowOcr\Helper\IProcessingFileAccessor; -use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult; -use OCA\WorkflowOcr\Service\IEventService; +use OCA\WorkflowOcr\Exception\OcrResultEmptyException; use OCA\WorkflowOcr\Service\INotificationService; use OCA\WorkflowOcr\Service\IOcrService; -use OCA\WorkflowOcr\Wrapper\IFilesystem; -use OCA\WorkflowOcr\Wrapper\IView; -use OCA\WorkflowOcr\Wrapper\IViewFactory; use OCP\AppFramework\Utility\ITimeFactory; use OCP\DB\QueryBuilder\IExpressionBuilder; use OCP\DB\QueryBuilder\IQueryBuilder; -use OCP\Files\File; -use OCP\Files\FileInfo; -use OCP\Files\IRootFolder; -use OCP\Files\Node; +use OCP\Files\NotFoundException; use OCP\IConfig; use OCP\IDBConnection; -use OCP\IUser; -use OCP\IUserManager; -use OCP\IUserSession; use PHPUnit\Framework\MockObject\MockObject; use PHPUnit\Framework\TestCase; use Psr\Log\LoggerInterface; @@ -56,75 +44,28 @@ class ProcessFileJobTest extends TestCase { /** @var LoggerInterface|MockObject */ private $logger; - /** @var IRootFolder|MockObject */ - private $rootFolder; + /** @var IOcrService|MockObject */ private $ocrService; - /** @var IEventService|MockObject */ - private $eventService; - /** @var IViewFactory|MockObject */ - private $viewFactory; - /** @var IFilesystem|MockObject */ - private $filesystem; - /** @var IUserSession|MockObject */ - private $userSession; - /** @var IUserManager|MockObject */ - private $userManager; - /** @var IUser|MockObject */ - private $user; - /** @var IProcessingFileAccessor|MockObject */ - private $processingFileAccessor; + /** @var INotificationService|MockObject */ private $notificationService; /** @var JobList */ private $jobList; /** @var ProcessFileJob */ private $processFileJob; - /** @var File[] */ - private $rootFolderGetById42ReturnValue; + public function setUp() : void { parent::setUp(); $this->logger = $this->createMock(LoggerInterface::class); $this->ocrService = $this->createMock(IOcrService::class); - $this->eventService = $this->createMock(IEventService::class); - $this->viewFactory = $this->createMock(IViewFactory::class); - $this->filesystem = $this->createMock(IFilesystem::class); - $this->userSession = $this->createMock(IUserSession::class); - $this->processingFileAccessor = $this->createMock(IProcessingFileAccessor::class); $this->notificationService = $this->createMock(INotificationService::class); - - /** @var MockObject|IRootFolder */ - $this->rootFolder = $this->createMock(IRootFolder::class); - $this->rootFolderGetById42ReturnValue = [$this->createValidFileMock()]; - $this->rootFolder->expects($this->any()) - ->method('getById') - ->with(42) - ->willReturnCallback(function () { - return $this->rootFolderGetById42ReturnValue; - }); - - /** @var MockObject|IUserManager */ - $userManager = $this->createMock(IUserManager::class); - $user = $this->createMock(IUser::class); - $userManager->method('get') - ->withAnyParameters() - ->willReturn($user); - - $this->userManager = $userManager; - $this->user = $user; $this->processFileJob = new ProcessFileJob( $this->logger, - $this->rootFolder, $this->ocrService, - $this->eventService, - $this->viewFactory, - $this->filesystem, - $this->userManager, - $this->userSession, - $this->processingFileAccessor, $this->notificationService, $this->createMock(ITimeFactory::class) ); @@ -170,22 +111,15 @@ public function setUp() : void { ]); } - public function testCatchesExceptionAndResetsUserEnvironment() { + public function testCatchesException() { $exception = new Exception('someEx'); - $this->filesystem->method('init') + $this->ocrService->method('runOcrProcess') ->willThrowException($exception); $this->logger->expects($this->once()) ->method('error') ->with($exception->getMessage(), ['exception' => $exception]); - // Make sure user-environment is reset after any exception - // so the user should be set on beginning but should also - // be reset to null after any run. - $this->userSession->expects($this->exactly(2)) - ->method('setUser') - ->withConsecutive([$this->user], [null]); - $this->processFileJob->start($this->jobList); } @@ -194,14 +128,8 @@ public function testCatchesExceptionAndResetsUserEnvironment() { */ public function testLogsErrorAndDoesNothingOnInvalidArguments($argument, $errorMessagePart) { $this->processFileJob->setArgument($argument); - $this->filesystem->expects($this->never()) - ->method('init') - ->withAnyParameters(); $this->ocrService->expects($this->never()) - ->method('ocrFile') - ->withAnyParameters(); - $this->viewFactory->expects($this->never()) - ->method('create') + ->method('runOcrProcess') ->withAnyParameters(); $this->logger->expects($this->once()) ->method('error') @@ -212,104 +140,17 @@ public function testLogsErrorAndDoesNothingOnInvalidArguments($argument, $errorM $this->processFileJob->start($this->jobList); } - /** - * @dataProvider dataProvider_ValidArguments - */ - public function testCallsInitFilesystem(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) { - $this->processFileJob->setArgument($arguments); - $this->filesystem->expects($this->once()) - ->method('init') - ->with($user, $rootFolderPath); - - $this->processFileJob->start($this->jobList); - } - - /** - * @dataProvider dataProvider_ValidArguments - */ - public function testCallsGetOnRootFolder(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) { - $this->processFileJob->setArgument($arguments); - $this->rootFolder->expects($this->once()) - ->method('getById') - ->with(42); - - $this->processFileJob->start($this->jobList); - } - - /** - * @dataProvider dataProvider_ValidArguments - */ - public function testCallsOcr_IfIsFile(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) { - $this->processFileJob->setArgument($arguments); - - $mimeType = 'application/pdf'; - $content = 'someFileContent'; - $fileMock = $this->createValidFileMock($mimeType, $content); - $this->rootFolderGetById42ReturnValue = [$fileMock]; - - $this->ocrService->expects($this->once()) - ->method('ocrFile') - ->with($fileMock); - - $this->processFileJob->start($this->jobList); - } - - /** - * @dataProvider dataProvider_ValidArguments - */ - public function testCreatesNewFileVersionAndEmitsTextRecognizedEvent(array $arguments, string $user, string $rootFolderPath, string $originalFileName, string $expectedOcrFilename) { - $this->processFileJob->setArgument($arguments); - $mimeType = 'application/pdf'; - $content = 'someFileContent'; - $ocrContent = 'someOcrProcessedFile'; - $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); // Extend this cases if we add new OCR processors - $originalFileMock = $this->createValidFileMock($mimeType, $content, $rootFolderPath, $originalFileName); - - $this->rootFolderGetById42ReturnValue = [$originalFileMock]; - - $this->ocrService->expects($this->once()) - ->method('ocrFile') - ->willReturn($ocrResult); - - /** @var MockObject|IView */ - $viewMock = $this->createMock(IView::class); - $viewMock->expects($this->once()) - ->method('file_put_contents') - ->with($expectedOcrFilename, $ocrContent); - $this->viewFactory->expects($this->once()) - ->method('create') - ->with($rootFolderPath) - ->willReturn($viewMock); - - $this->eventService->expects($this->once()) - ->method('textRecognized') - ->with($ocrResult, $originalFileMock); - - $this->processFileJob->start($this->jobList); - } - - public function testNotFoundLogsErrorAndSendsNotification_AndDoesNothingAfterwards() { - $this->rootFolderGetById42ReturnValue = []; + public function testNotFoundLogsErrorAndSendsNotification() { + $this->ocrService->method('runOcrProcess') + ->willThrowException(new NotFoundException('File was not found')); $this->logger->expects($this->once()) ->method('error') - ->with($this->stringContains('not found')); + ->with($this->stringContains('File was not found'), $this->callback(function ($subject) { + return is_array($subject) && ($subject['exception'] instanceof NotFoundException); + })); $this->notificationService->expects($this->once()) ->method('createErrorNotification') ->with($this->stringContains('An error occured while executing the OCR process (') && $this->stringContains('File was not found')); - $this->ocrService->expects($this->never()) - ->method('ocrFile'); - - $this->processFileJob->start($this->jobList); - } - - /** - * @dataProvider dataProvider_InvalidNodes - */ - public function testDoesNotCallOcr_OnNonFile($invalidNode) { - $this->rootFolderGetById42ReturnValue = [$invalidNode]; - - $this->ocrService->expects($this->never()) - ->method('ocrFile'); $this->processFileJob->start($this->jobList); } @@ -318,156 +159,25 @@ public function testDoesNotCallOcr_OnNonFile($invalidNode) { * @dataProvider dataProvider_OcrExceptions */ public function testLogsError_OnOcrException(Exception $exception) { - $fileMock = $this->createValidFileMock(); - $this->rootFolder->method('get') - ->with('/admin/files/somefile.pdf') - ->willReturn($fileMock); - - $this->ocrService->method('ocrFile') + $this->ocrService->method('runOcrProcess') ->willThrowException($exception); $this->logger->expects($this->once()) ->method('error'); - $this->viewFactory->expects($this->never()) - ->method('create'); - - $this->processFileJob->start($this->jobList); - } - - public function testThrowsNoUserException_OnNonExistingUser() { - // Unfortunately method definitions can't yet be overwritten in - // PHPUnit, see https://github.com/sebastianbergmann/phpunit-documentation-english/issues/169 - /** @var IUserManager|MockObject */ - $userManager = $this->createMock(IUserManager::class); - $userManager->method('get') - ->with('nonexistinguser') - ->willReturn(null); - - $this->logger->expects($this->once()) - ->method('error') - ->with($this->stringContains('nonexistinguser'), $this->callback(function ($subject) { - return is_array($subject) && ($subject['exception'] instanceof NoUserException); - })); - - $processFileJob = new ProcessFileJob( - $this->logger, - $this->rootFolder, - $this->ocrService, - $this->eventService, - $this->viewFactory, - $this->filesystem, - $userManager, - $this->userSession, - $this->processingFileAccessor, - $this->notificationService, - $this->createMock(ITimeFactory::class) - ); - $processFileJob->setId(111); - $arguments = ['fileId' => 42, 'uid' => 'nonexistinguser', 'settings' => '{}']; - $processFileJob->setArgument($arguments); - - $processFileJob->start($this->jobList); - } - - /** - * @dataProvider dataProvider_ValidArguments - */ - public function testCallsProcessingFileAccessor(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) { - $this->processFileJob->setArgument($arguments); - $mimeType = 'application/pdf'; - $content = 'someFileContent'; - $ocrContent = 'someOcrProcessedFile'; - $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); // Extend this cases if we add new OCR processors - - $this->rootFolderGetById42ReturnValue = [$this->createValidFileMock($mimeType, $content)]; - - $this->ocrService->expects($this->once()) - ->method('ocrFile') - ->willReturn($ocrResult); - - $viewMock = $this->createMock(IView::class); - $this->viewFactory->expects($this->once()) - ->method('create') - ->willReturn($viewMock); - - $calledWithFileId42 = 0; - $calledWithNull = 0; - $withIdCalledFirst = false; - - $this->processingFileAccessor->expects($this->exactly(2)) - ->method('setCurrentlyProcessedFileId') - ->with($this->callback(function ($id) use (&$calledWithFileId42, &$calledWithNull, &$withIdCalledFirst) { - if ($id === 42) { - $calledWithFileId42++; - $withIdCalledFirst = $calledWithNull === 0; - } elseif ($id === null) { - $calledWithNull++; - } - - return true; - })); - - $this->processFileJob->start($this->jobList); - - $this->assertEquals(1, $calledWithFileId42); - $this->assertEquals(1, $calledWithNull); - $this->assertTrue($withIdCalledFirst); - } - - /** - * @dataProvider dataProvider_ValidArguments - */ - public function testDoesNotCreateNewFileVersionIfOcrContentWasEmpty(array $arguments, string $user, string $rootFolderPath, string $originalFileExtension, string $expectedOcrFilename) { - $this->processFileJob->setArgument($arguments); - $mimeType = 'application/pdf'; - $content = 'someFileContent'; - $ocrContent = ''; - $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); - $fileId = $arguments['fileId']; - - $this->rootFolder->expects($this->once()) - ->method('getById') - ->with($fileId) - ->willReturn([$this->createValidFileMock($mimeType, $content)]); - - $this->ocrService->expects($this->once()) - ->method('ocrFile') - ->willReturn($ocrResult); - - $viewMock = $this->createMock(IView::class); - $this->viewFactory->expects($this->never()) - ->method('create') - ->willReturn($viewMock); - - $this->processingFileAccessor->expects($this->never()) - ->method('setCurrentlyProcessedFileId'); - - $this->eventService->expects($this->once()) - ->method('textRecognized'); - $this->processFileJob->start($this->jobList); } public function testLogsNonOcrExceptionsFromOcrService() { - $mimeType = 'application/pdf'; - $content = 'someFileContent'; $exception = new \Exception('someException'); - $fileMock = $this->createValidFileMock($mimeType, $content); - $this->rootFolder->method('get') - ->willReturn($fileMock); - $this->ocrService->expects($this->once()) - ->method('ocrFile') + ->method('runOcrProcess') ->willThrowException($exception); $this->logger->expects($this->once()) ->method('error'); - $this->viewFactory->expects($this->never()) - ->method('create'); - $this->processFileJob->start($this->jobList); } @@ -479,55 +189,11 @@ public function dataProvider_InvalidArguments() { return $arr; } - public function dataProvider_ValidArguments() { - $arr = [ - [['fileId' => 42, 'uid' => 'admin', 'settings' => '{}'], 'admin', '/admin/files', 'somefile.pdf', 'somefile.pdf'], - [['fileId' => 42, 'uid' => 'myuser', 'settings' => '{}'], 'myuser', '/myuser/files', 'someotherfile.jpg', 'someotherfile.jpg.pdf'] - ]; - return $arr; - } - - public function dataProvider_InvalidNodes() { - /** @var MockObject|Node */ - $folderMock = $this->createMock(Node::class); - $folderMock->method('getType') - ->willReturn(FileInfo::TYPE_FOLDER); - $fileInfoMock = $this->createMock(FileInfo::class); - $arr = [ - [$folderMock], - [$fileInfoMock], - [null], - [(object)['someField' => 'someValue']] - ]; - return $arr; - } - public function dataProvider_OcrExceptions() { return [ [new OcrNotPossibleException('Ocr not possible')], - [new OcrProcessorNotFoundException('audio/mpeg')] + [new OcrProcessorNotFoundException('audio/mpeg')], + [new OcrResultEmptyException('Ocr result was empty')] ]; } - - /** - * @return File|MockObject - */ - private function createValidFileMock(string $mimeType = 'application/pdf', string $content = 'someFileContent', string $rootFolderPath = '/admin/files', string $fileName = 'somefile.pdf') { - /** @var MockObject|File */ - $fileMock = $this->createMock(File::class); - $fileMock->method('getType') - ->willReturn(FileInfo::TYPE_FILE); - $fileMock->method('getMimeType') - ->willReturn($mimeType); - $fileMock->method('getContent') - ->willReturn($content); - $fileMock->method('getId') - ->willReturn(42); - $fileMock->method('getPath') - ->willReturn("$rootFolderPath/$fileName"); - #get extension from filename - $fileMock->method('getExtension') - ->willReturn(pathinfo($fileName, PATHINFO_EXTENSION)); - return $fileMock; - } } diff --git a/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php b/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php index 99d1e18..d372656 100644 --- a/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php +++ b/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php @@ -24,6 +24,7 @@ namespace OCA\WorkflowOcr\Tests\Unit\OcrProcessors; use OCA\WorkflowOcr\Exception\OcrNotPossibleException; +use OCA\WorkflowOcr\Exception\OcrResultEmptyException; use OCA\WorkflowOcr\Helper\ISidecarFileAccessor; use OCA\WorkflowOcr\Model\GlobalSettings; use OCA\WorkflowOcr\Model\WorkflowSettings; @@ -176,7 +177,7 @@ public function testThrowsErrorIfOcrFileWasEmpty() { $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); } catch (\Throwable $t) { $thrown = true; - $this->assertInstanceOf(OcrNotPossibleException::class, $t); + $this->assertInstanceOf(OcrResultEmptyException::class, $t); $this->assertEquals('OCRmyPDF did not produce any output for file /admin/files/somefile.pdf', $t->getMessage()); } diff --git a/tests/Unit/Service/OcrServiceTest.php b/tests/Unit/Service/OcrServiceTest.php index 1e6313a..f796aad 100644 --- a/tests/Unit/Service/OcrServiceTest.php +++ b/tests/Unit/Service/OcrServiceTest.php @@ -23,13 +23,30 @@ namespace OCA\WorkflowOcr\Tests\Unit\Service; +use Exception; +use InvalidArgumentException; +use OC\User\NoUserException; +use OCA\WorkflowOcr\Exception\OcrResultEmptyException; +use OCA\WorkflowOcr\Helper\IProcessingFileAccessor; use OCA\WorkflowOcr\Model\GlobalSettings; use OCA\WorkflowOcr\Model\WorkflowSettings; use OCA\WorkflowOcr\OcrProcessors\IOcrProcessor; use OCA\WorkflowOcr\OcrProcessors\IOcrProcessorFactory; +use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult; +use OCA\WorkflowOcr\Service\IEventService; use OCA\WorkflowOcr\Service\IGlobalSettingsService; use OCA\WorkflowOcr\Service\OcrService; +use OCA\WorkflowOcr\Wrapper\IFilesystem; +use OCA\WorkflowOcr\Wrapper\IView; +use OCA\WorkflowOcr\Wrapper\IViewFactory; use OCP\Files\File; +use OCP\Files\FileInfo; +use OCP\Files\IRootFolder; +use OCP\Files\Node; +use OCP\Files\NotFoundException; +use OCP\IUser; +use OCP\IUserManager; +use OCP\IUserSession; use OCP\SystemTag\ISystemTagObjectMapper; use PHPUnit\Framework\MockObject\MockObject; use PHPUnit\Framework\TestCase; @@ -46,6 +63,24 @@ class OcrServiceTest extends TestCase { private $systemTagObjectMapper; /** @var LoggerInterface|MockObject */ private $logger; + /** @var IRootFolder|MockObject */ + private $rootFolder; + /** @var IEventService|MockObject */ + private $eventService; + /** @var IViewFactory|MockObject */ + private $viewFactory; + /** @var IFilesystem|MockObject */ + private $filesystem; + /** @var IUserSession|MockObject */ + private $userSession; + /** @var IUserManager|MockObject */ + private $userManager; + /** @var IUser|MockObject */ + private $user; + /** @var IProcessingFileAccessor|MockObject */ + private $processingFileAccessor; + /** @var File[] */ + private $rootFolderGetById42ReturnValue; /** @var OcrService */ private $ocrService; /** @var File|MockObject */ @@ -55,12 +90,54 @@ public function setUp() : void { parent::setUp(); $this->globalSettingsService = $this->createMock(IGlobalSettingsService::class); - $this->ocrProcessorFactory = $this->createMock(IOcrProcessorFactory::class); $this->ocrProcessor = $this->createMock(IOcrProcessor::class); $this->systemTagObjectMapper = $this->createMock(ISystemTagObjectMapper::class); $this->logger = $this->createMock(LoggerInterface::class); $this->fileInput = $this->createMock(File::class); - $this->ocrService = new OcrService($this->ocrProcessorFactory, $this->globalSettingsService, $this->systemTagObjectMapper, $this->logger); + $this->eventService = $this->createMock(IEventService::class); + $this->viewFactory = $this->createMock(IViewFactory::class); + $this->filesystem = $this->createMock(IFilesystem::class); + $this->userSession = $this->createMock(IUserSession::class); + $this->processingFileAccessor = $this->createMock(IProcessingFileAccessor::class); + + /** @var MockObject|IRootFolder */ + $this->rootFolder = $this->createMock(IRootFolder::class); + $this->rootFolderGetById42ReturnValue = [$this->createValidFileMock()]; + $this->rootFolder->expects($this->any()) + ->method('getById') + ->with(42) + ->willReturnCallback(function () { + return $this->rootFolderGetById42ReturnValue; + }); + + /** @var MockObject|IUserManager */ + $userManager = $this->createMock(IUserManager::class); + $user = $this->createMock(IUser::class); + $userManager->method('get') + ->withAnyParameters() + ->willReturn($user); + + $this->userManager = $userManager; + $this->user = $user; + + /** @var MockObject|IOcrProcessorFactory */ + $this->ocrProcessorFactory = $this->createMock(IOcrProcessorFactory::class); + $this->ocrProcessorFactory->method('create') + ->withAnyParameters() + ->willReturn($this->ocrProcessor); + + $this->ocrService = new OcrService( + $this->ocrProcessorFactory, + $this->globalSettingsService, + $this->systemTagObjectMapper, + $this->userManager, + $this->filesystem, + $this->userSession, + $this->rootFolder, + $this->eventService, + $this->viewFactory, + $this->processingFileAccessor, + $this->logger); } public function testCallsOcrProcessor_WithCorrectArguments() { @@ -87,7 +164,7 @@ public function testCallsOcrProcessor_WithCorrectArguments() { ->method('ocrFile') ->with($this->fileInput, $settings, $globalSettings); - $this->ocrService->ocrFile($this->fileInput, $settings); + $this->ocrService->runOcrProcess(42, 'usr', $settings); } public function testCallsSystemTagObjectManager_WithCorrectArguments() { @@ -124,7 +201,7 @@ public function testCallsSystemTagObjectManager_WithCorrectArguments() { ->method('assignTags') ->withConsecutive(["42", "files", 3], ["42", "files", 4]); - $this->ocrService->ocrFile($this->fileInput, $settings); + $this->ocrService->runOcrProcess(42, 'usr', $settings); } public function testCatchesTagNotFoundException() { @@ -160,6 +237,339 @@ public function testCatchesTagNotFoundException() { $this->logger->expects($this->exactly(2)) ->method('warning'); - $this->ocrService->ocrFile($this->fileInput, $settings); + $this->ocrService->runOcrProcess(42, 'usr', $settings); + } + + public function testResetsUserEnvironmentOnException() { + $settings = new WorkflowSettings(); + $exception = new Exception('someEx'); + $this->filesystem->method('init') + ->willThrowException($exception); + + // Make sure user-environment is reset after any exception + // so the user should be set on beginning but should also + // be reset to null after any run. + $this->userSession->expects($this->exactly(2)) + ->method('setUser') + ->withConsecutive([$this->user], [null]); + + $thrown = false; + try { + $this->ocrService->runOcrProcess(42, 'usr', $settings); + } catch (Exception $e) { + $this->assertEquals($exception, $e); + $thrown = true; + } + $this->assertTrue($thrown); + } + + public function testCallsInitFilesystem() { + $settings = new WorkflowSettings(); + $this->filesystem->expects($this->once()) + ->method('init') + ->with('usr', '/usr/files'); + + $this->ocrService->runOcrProcess(42, 'usr', $settings); + } + + public function testCallsGetOnRootFolder() { + $settings = new WorkflowSettings(); + $this->rootFolder->expects($this->once()) + ->method('getById') + ->with(42); + + $this->ocrService->runOcrProcess(42, 'usr', $settings); + } + + public function testCallsOcrProcessorWithFile() { + $globalSettings = new GlobalSettings(); + $settings = new WorkflowSettings(); + + $mimeType = 'application/pdf'; + $content = 'someFileContent'; + $fileMock = $this->createValidFileMock($mimeType, $content); + $this->rootFolderGetById42ReturnValue = [$fileMock]; + + $this->globalSettingsService->expects($this->once()) + ->method('getGlobalSettings') + ->willReturn($globalSettings); + + $this->ocrProcessor->expects($this->once()) + ->method('ocrFile') + ->with($fileMock, $settings, $globalSettings); + + $this->ocrService->runOcrProcess(42, 'usr', $settings); + } + + /** + * @dataProvider dataProvider_OriginalAndNewFilesnames + */ + public function testCreatesNewFileVersionAndEmitsTextRecognizedEvent(string $originalFilename, string $expectedOcrFilename) { + $settings = new WorkflowSettings(); + + $rootFolderPath = '/usr/files'; + $mimeType = 'application/pdf'; + $content = 'someFileContent'; + $ocrContent = 'someOcrProcessedFile'; + $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); // Extend this cases if we add new OCR processors + $originalFileMock = $this->createValidFileMock($mimeType, $content, $rootFolderPath, $originalFilename); + + $this->rootFolderGetById42ReturnValue = [$originalFileMock]; + + $this->ocrProcessor->expects($this->once()) + ->method('ocrFile') + ->willReturn($ocrResult); + + /** @var MockObject|IView */ + $viewMock = $this->createMock(IView::class); + $viewMock->expects($this->once()) + ->method('file_put_contents') + ->with($expectedOcrFilename, $ocrContent); + $this->viewFactory->expects($this->once()) + ->method('create') + ->with($rootFolderPath) + ->willReturn($viewMock); + + $this->eventService->expects($this->once()) + ->method('textRecognized') + ->with($ocrResult, $originalFileMock); + + $this->ocrService->runOcrProcess(42, 'usr', $settings); + } + + public function testThrowsNotFoundExceptionWhenFileNotFound() { + $settings = new WorkflowSettings(); + $this->rootFolderGetById42ReturnValue = []; + + $this->expectException(NotFoundException::class); + $this->ocrService->runOcrProcess(42, 'usr', $settings); + } + + /** + * @dataProvider dataProvider_InvalidNodes + */ + public function testDoesNotCallOcr_OnNonFile($invalidNode) { + $settings = new WorkflowSettings(); + $this->rootFolderGetById42ReturnValue = [$invalidNode]; + + $this->ocrProcessor->expects($this->never()) + ->method('ocrFile'); + + $this->expectException(InvalidArgumentException::class); + $this->ocrService->runOcrProcess(42, 'usr', $settings); } + + public function testThrowsNoUserException_OnNonExistingUser() { + $settings = new WorkflowSettings(); + + // Unfortunately method definitions can't yet be overwritten in + // PHPUnit, see https://github.com/sebastianbergmann/phpunit-documentation-english/issues/169 + /** @var IUserManager|MockObject */ + $userManager = $this->createMock(IUserManager::class); + $userManager->method('get') + ->with('nonexistinguser') + ->willReturn(null); + + $this->ocrService = new OcrService( + $this->ocrProcessorFactory, + $this->globalSettingsService, + $this->systemTagObjectMapper, + $userManager, + $this->filesystem, + $this->userSession, + $this->rootFolder, + $this->eventService, + $this->viewFactory, + $this->processingFileAccessor, + $this->logger); + + $thrown = false; + try { + $this->ocrService->runOcrProcess(42, 'nonexistinguser', $settings); + } catch (NoUserException $e) { + $this->assertTrue(str_contains($e->getMessage(), 'nonexistinguser')); + $thrown = true; + } + $this->assertTrue($thrown); + } + + public function testCallsProcessingFileAccessor() { + $settings = new WorkflowSettings(); + $mimeType = 'application/pdf'; + $content = 'someFileContent'; + $ocrContent = 'someOcrProcessedFile'; + $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); // Extend this cases if we add new OCR processors + + $this->rootFolderGetById42ReturnValue = [$this->createValidFileMock($mimeType, $content)]; + + $this->ocrProcessor->expects($this->once()) + ->method('ocrFile') + ->willReturn($ocrResult); + + $viewMock = $this->createMock(IView::class); + $this->viewFactory->expects($this->once()) + ->method('create') + ->willReturn($viewMock); + + $calledWithFileId42 = 0; + $calledWithNull = 0; + $withIdCalledFirst = false; + + $this->processingFileAccessor->expects($this->exactly(2)) + ->method('setCurrentlyProcessedFileId') + ->with($this->callback(function ($id) use (&$calledWithFileId42, &$calledWithNull, &$withIdCalledFirst) { + if ($id === 42) { + $calledWithFileId42++; + $withIdCalledFirst = $calledWithNull === 0; + } elseif ($id === null) { + $calledWithNull++; + } + + return true; + })); + + $this->ocrService->runOcrProcess(42, 'usr', $settings); + + $this->assertEquals(1, $calledWithFileId42); + $this->assertEquals(1, $calledWithNull); + $this->assertTrue($withIdCalledFirst); + } + + public function testDoesNotCreateNewFileVersionIfOcrContentWasEmpty() { + $settings = new WorkflowSettings(); + $user = 'usr'; + $mimeType = 'application/pdf'; + $content = 'someFileContent'; + $ocrContent = ''; + $ocrResult = new OcrProcessorResult($ocrContent, "pdf", $ocrContent); + $fileId = 42; + + $this->rootFolder->expects($this->once()) + ->method('getById') + ->with($fileId) + ->willReturn([$this->createValidFileMock($mimeType, $content)]); + + $this->ocrProcessor->expects($this->once()) + ->method('ocrFile') + ->willReturn($ocrResult); + + $viewMock = $this->createMock(IView::class); + $this->viewFactory->expects($this->never()) + ->method('create') + ->willReturn($viewMock); + + $this->processingFileAccessor->expects($this->never()) + ->method('setCurrentlyProcessedFileId'); + + $this->eventService->expects($this->once()) + ->method('textRecognized'); + + $this->ocrService->runOcrProcess($fileId, $user, $settings); + } + + public function testOcrSkippedIfOcrModeIsSkipFileAndResultIsEmpty() { + $fileId = 42; + $settings = new WorkflowSettings('{"ocrMode": ' . WorkflowSettings::OCR_MODE_SKIP_FILE . '}'); + + $this->ocrProcessor->expects($this->once()) + ->method('ocrFile') + ->willThrowException(new OcrResultEmptyException('oops')); + $this->logger->expects($this->once()) + ->method('debug') + ->with($this->stringStartsWith('Skipping empty OCR result for file with id'), ['fileId' => $fileId]); + $this->viewFactory->expects($this->never()) + ->method('create'); + $this->eventService->expects($this->never()) + ->method('textRecognized'); + + $this->ocrService->runOcrProcess($fileId, 'usr', $settings); + } + + /** + * @dataProvider dataProvider_OcrModesThrowOnEmptyResult + */ + public function testOcrEmptyExceptionIsThrown(int $ocrMode) { + $fileId = 42; + $settings = new WorkflowSettings('{"ocrMode": ' . $ocrMode . '}'); + $ex = new OcrResultEmptyException('oops'); + + $this->ocrProcessor->expects($this->once()) + ->method('ocrFile') + ->willThrowException($ex); + $this->logger->expects($this->never()) + ->method('debug'); + $this->viewFactory->expects($this->never()) + ->method('create'); + $this->eventService->expects($this->never()) + ->method('textRecognized'); + + $thrown = false; + try { + $this->ocrService->runOcrProcess($fileId, 'usr', $settings); + } catch (OcrResultEmptyException $e) { + $this->assertEquals($ex, $e); + $thrown = true; + } + $this->assertTrue($thrown); + } + + public function dataProvider_InvalidNodes() { + /** @var MockObject|Node */ + $folderMock = $this->createMock(Node::class); + $folderMock->method('getType') + ->willReturn(FileInfo::TYPE_FOLDER); + $fileInfoMock = $this->createMock(FileInfo::class); + $arr = [ + [$folderMock], + [$fileInfoMock], + [null], + [(object)['someField' => 'someValue']] + ]; + return $arr; + } + + public function dataProvider_OcrModesThrowOnEmptyResult() { + return [ + [WorkflowSettings::OCR_MODE_FORCE_OCR], + [WorkflowSettings::OCR_MODE_SKIP_TEXT], + [WorkflowSettings::OCR_MODE_REDO_OCR] + ]; + } + + public function dataProvider_OriginalAndNewFilesnames() { + return [ + ['somefile.pdf', 'somefile.pdf'], + ['somefile.jpg', 'somefile.jpg.pdf'] + ]; + } + + /** + * @return File|MockObject + */ + private function createValidFileMock(string $mimeType = 'application/pdf', string $content = 'someFileContent', string $rootFolderPath = '/admin/files', string $fileName = 'somefile.pdf') { + /** @var MockObject|File */ + $fileMock = $this->createMock(File::class); + $fileMock->method('getType') + ->willReturn(FileInfo::TYPE_FILE); + $fileMock->method('getMimeType') + ->willReturn($mimeType); + $fileMock->method('getContent') + ->willReturn($content); + $fileMock->method('getId') + ->willReturn(42); + $fileMock->method('getPath') + ->willReturn("$rootFolderPath/$fileName"); + #get extension from filename + $fileMock->method('getExtension') + ->willReturn(pathinfo($fileName, PATHINFO_EXTENSION)); + return $fileMock; + } + + // public function dataProvider_ValidArguments() { + // $arr = [ + // [['fileId' => 42, 'uid' => 'admin', 'settings' => '{}'], 'admin', '/admin/files', 'somefile.pdf', 'somefile.pdf'], + // [['fileId' => 42, 'uid' => 'myuser', 'settings' => '{}'], 'myuser', '/myuser/files', 'someotherfile.jpg', 'someotherfile.jpg.pdf'] + // ]; + // return $arr; + // } } diff --git a/tests/psalm-baseline.xml b/tests/psalm-baseline.xml index 8b9d31b..3fa0ede 100644 --- a/tests/psalm-baseline.xml +++ b/tests/psalm-baseline.xml @@ -1,15 +1,5 @@ - - - $this->rootFolder - IRootFolder - IRootFolder - - - NoUserException - - $this->rootFolder @@ -35,6 +25,16 @@ $settings + + + $this->rootFolder + IRootFolder + IRootFolder + + + NoUserException + + \OC\Files\Filesystem From b8e07fa230410a061ffe96536e3d87f138a0c68e Mon Sep 17 00:00:00 2001 From: Robin Windey Date: Thu, 5 Oct 2023 07:24:52 +0000 Subject: [PATCH 2/2] Cleanup code --- lib/Service/IOcrService.php | 2 +- tests/Unit/Service/OcrServiceTest.php | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/lib/Service/IOcrService.php b/lib/Service/IOcrService.php index 10ee68c..a8c536b 100644 --- a/lib/Service/IOcrService.php +++ b/lib/Service/IOcrService.php @@ -32,7 +32,7 @@ interface IOcrService { /** * Processes OCR on the given file. Creates a new file version and emits appropriate events. * - * @param int $fileId The id if the file to be processed + * @param int $fileId The id if the file to be processed * @param string $uid The id of the user who has access to this file * @param WorkflowSettings $settings The settings to be used for processing * diff --git a/tests/Unit/Service/OcrServiceTest.php b/tests/Unit/Service/OcrServiceTest.php index f796aad..3c218ac 100644 --- a/tests/Unit/Service/OcrServiceTest.php +++ b/tests/Unit/Service/OcrServiceTest.php @@ -564,12 +564,4 @@ private function createValidFileMock(string $mimeType = 'application/pdf', strin ->willReturn(pathinfo($fileName, PATHINFO_EXTENSION)); return $fileMock; } - - // public function dataProvider_ValidArguments() { - // $arr = [ - // [['fileId' => 42, 'uid' => 'admin', 'settings' => '{}'], 'admin', '/admin/files', 'somefile.pdf', 'somefile.pdf'], - // [['fileId' => 42, 'uid' => 'myuser', 'settings' => '{}'], 'myuser', '/myuser/files', 'someotherfile.jpg', 'someotherfile.jpg.pdf'] - // ]; - // return $arr; - // } }