Skip to content

Commit

Permalink
Merge pull request #104 from OPUS4/issue51
Browse files Browse the repository at this point in the history
Index-Kommando unterstützt Optionen für Sammlungen
  • Loading branch information
j3nsch authored Apr 18, 2023
2 parents aa21f1e + de86cc8 commit c88ac7a
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 10 deletions.
7 changes: 6 additions & 1 deletion src/Console/Helper/DocumentHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,21 @@ class DocumentHelper
*
* @param int $start Start of ID range
* @param int $end End of ID range
* @param int $colId
* @return array Array of document IDs
*
* TODO move somewhere else, not Index specific functionality
*/
public function getDocumentIds($start, $end)
public function getDocumentIds($start, $end, $colId = 0)
{
$finder = Repository::getInstance()->getDocumentFinder();

$finder->setDocumentIdRange($start, $end);

if ($colId > 0) {
$finder->setCollectionId($colId);
}

return $finder->getIds();
}
}
12 changes: 9 additions & 3 deletions src/Console/Helper/IndexHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

namespace Opus\Search\Console\Helper;

use Opus\Common\Collection;
use Opus\Common\Config;
use Opus\Common\Console\Helper\ProgressBar;
use Opus\Common\Console\Helper\ProgressMatrix;
Expand Down Expand Up @@ -99,14 +100,15 @@ class IndexHelper
/**
* @param int $startId
* @param int $endId
* @param int $colId
* @return float|string
* @throws SearchException
* @throws ModelException
* @throws Zend_Config_Exception
*
* TODO Is the timestamp in the console output useful?
*/
public function index($startId, $endId = -1)
public function index($startId, $endId = -1, $colId = 0)
{
$output = $this->getOutput();
$blockSize = $this->getBlockSize();
Expand All @@ -130,7 +132,7 @@ public function index($startId, $endId = -1)
if ($singleDocument) {
$docIds = [$startId];
} else {
$docIds = $documentHelper->getDocumentIds($startId, $endId);
$docIds = $documentHelper->getDocumentIds($startId, $endId, $colId);
}

$docCount = count($docIds);
Expand Down Expand Up @@ -159,7 +161,7 @@ public function index($startId, $endId = -1)
if ($singleDocument) {
$output->writeln("Removing document <fg=yellow>$startId</> from index ... ");
$indexer->removeDocumentsFromIndexById($docIds);
} elseif ($removeAll) {
} elseif ($removeAll && $colId === 0) {
$output->writeln('Removing <fg=yellow>all</> documents from index ... ');
$indexer->removeAllDocumentsFromIndex();
} else {
Expand All @@ -170,6 +172,10 @@ public function index($startId, $endId = -1)

if ($singleDocument) {
$output->writeln("Indexing document <fg=yellow>$startId</> ...");
} elseif ($colId > 0) {
$col = Collection::get($colId);
$colTitle = $col->getDisplayName();
$output->writeln("Indexing documents in collection: \"${colTitle}\" (ID=$colId)");
} elseif ($endId !== null) {
$output->writeln("Indexing document from <fg=yellow>$startId</> to <fg=yellow>$endId</> ...");
} elseif ($startId !== null) {
Expand Down
68 changes: 66 additions & 2 deletions src/Console/IndexCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,20 @@

namespace Opus\Search\Console;

use Opus\Common\Collection;
use Opus\Common\CollectionRole;
use Opus\Common\Console\AbstractDocumentCommand;
use Opus\Common\Model\ModelException;
use Opus\Search\Console\Helper\IndexHelper;
use Opus\Search\SearchException;
use Symfony\Component\Console\Completion\CompletionInput;
use Symfony\Component\Console\Exception\InvalidOptionException;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Zend_Config_Exception;

use function count;
use function ctype_digit;
use function ltrim;
use function sprintf;
Expand All @@ -56,12 +60,21 @@ class IndexCommand extends AbstractDocumentCommand

const OPTION_TIMEOUT = 'timeout';

const OPTION_COLLECTION = 'col';

const OPTION_ROLE_NAME = 'role';

const OPTION_COLLECTION_NUMBER = 'number';

/** @var string */
protected static $defaultName = 'index:index';

/** @var int */
protected $blockSize = 10;

/** @var int */
protected $collectionId = 0;

protected function configure()
{
parent::configure();
Expand All @@ -83,6 +96,12 @@ protected function configure()
<fg=yellow>20 -</> will index all documents starting from 20
<fg=yellow>- 50</> will index all documents up to 50
You can also index just the documents in a collection.
Examples:
<fg=yellow>--col=15</> index all documents in collection ID=15
<fg=yellow>--role=ddc --number=02</> index all documents in matching collection
You can use the <fg=green>blocksize</> option to specify how many documents should be indexed
in a single request to the Solr server. Indexing multiple documents per request
improves performance. However sometimes this can cause problems if the indexing
Expand Down Expand Up @@ -120,6 +139,26 @@ protected function configure()
't',
InputOption::VALUE_REQUIRED,
'Timeout for extraction in seconds'
)->addOption(
self::OPTION_COLLECTION,
null,
InputOption::VALUE_REQUIRED,
'Collection-ID to index contained documents'
)->addOption(
self::OPTION_ROLE_NAME,
null,
InputOption::VALUE_REQUIRED,
'Name of collection role',
null,
function (CompletionInput $input) {
return ['insitutes', 'ddc', 'msc', 'authors'];
}
)->addOption(
self::OPTION_COLLECTION_NUMBER,
null,
InputOption::VALUE_REQUIRED,
'Number of collection in role',
null
)
->setAliases(['index']);
}
Expand All @@ -129,14 +168,38 @@ protected function processArguments(InputInterface $input)
parent::processArguments($input);

$blockSize = $input->getOption(self::OPTION_BLOCKSIZE);

$blockSize = ltrim($blockSize, '=');

if ($blockSize !== null && (! ctype_digit($blockSize) || ! $blockSize > 0)) {
throw new InvalidOptionException('Blocksize must be an integer >= 1');
} else {
$this->blockSize = $blockSize;
}

$colId = $input->getOption(self::OPTION_COLLECTION);

if ($colId !== null && ! ctype_digit($colId)) {
throw new InvalidOptionException('Col must be an integer >= 1');
} else {
$this->collectionId = (int) $colId;
}

if ($this->collectionId === 0) {
$roleName = $input->getOption(self::OPTION_ROLE_NAME);
$colNumber = $input->getOption(self::OPTION_COLLECTION_NUMBER);

if ($roleName !== null && $colNumber !== null) {
$role = CollectionRole::fetchByName($roleName);
if ($role !== null) {
$roleId = $role->getId();
$col = Collection::fetchCollectionsByRoleNumber($roleId, $colNumber);
if (count($col) === 1) {
$this->collectionId = $col[0]->getId();
}
// TODO DOCTRINE handle multiple matching collections found (Is that possible?)
}
}
}
}

/**
Expand All @@ -154,6 +217,7 @@ protected function execute(InputInterface $input, OutputInterface $output)

$startId = $this->startId;
$endId = $this->endId;
$colId = $this->collectionId;

$builder = new IndexHelper();
$builder->setOutput($output);
Expand All @@ -166,7 +230,7 @@ protected function execute(InputInterface $input, OutputInterface $output)
if ($this->isSingleDocument()) {
$runtime = $builder->index($startId);
} else {
$runtime = $builder->index($startId, $endId);
$runtime = $builder->index($startId, $endId, $colId);
}
$message = sprintf('Operation completed successfully in <fg=yellow>%.2f</> seconds.', $runtime);
$output->writeln($message);
Expand Down
17 changes: 13 additions & 4 deletions src/Solr/Solarium/Adapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@

class Adapter extends AbstractAdapter implements IndexingInterface, SearchingInterface, ExtractingInterface
{
/** @var string */
protected $serviceName;

/** @var Zend_Config */
protected $options;

Expand All @@ -109,15 +112,21 @@ class Adapter extends AbstractAdapter implements IndexingInterface, SearchingInt
*/
public function __construct($serviceName, $options)
{
$this->options = $options;
$adapter = new Curl();
$eventDispatcher = new EventDispatcher();
$this->serviceName = $serviceName;
$this->options = $options;
$adapter = new Curl();
$eventDispatcher = new EventDispatcher();

$this->client = new SolariumClient($adapter, $eventDispatcher, $options->toArray());

// ensure service is basically available
$this->ping();
}

public function ping()
{
$ping = $this->client->createPing();
$this->execute($ping, 'failed pinging service ' . $serviceName);
$this->execute($ping, 'failed pinging service ' . $this->serviceName);
}

/**
Expand Down
49 changes: 49 additions & 0 deletions test/Solr/Solarium/AdapterTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php

/**
* This file is part of OPUS. The software OPUS has been originally developed
* at the University of Stuttgart with funding from the German Research Net,
* the Federal Department of Higher Education and Research and the Ministry
* of Science, Research and the Arts of the State of Baden-Wuerttemberg.
*
* OPUS 4 is a complete rewrite of the original OPUS software and was developed
* by the Stuttgart University Library, the Library Service Center
* Baden-Wuerttemberg, the North Rhine-Westphalian Library Service Center,
* the Cooperative Library Network Berlin-Brandenburg, the Saarland University
* and State Library, the Saxon State Library - Dresden State and University
* Library, the Bielefeld University Library and the University Library of
* Hamburg University of Technology with funding from the German Research
* Foundation and the European Regional Development Fund.
*
* LICENCE
* OPUS is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the Licence, or any later version.
* OPUS is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details. You should have received a copy of the GNU General Public License
* along with OPUS; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* @copyright Copyright (c) 2023, OPUS 4 development team
* @license http://www.gnu.org/licenses/gpl.html General Public License
*/

namespace OpusTest\Search\Solr\Solarium;

use Opus\Search\Service;
use OpusTest\Search\TestAsset\DocumentBasedTestCase;

class AdapterTest extends DocumentBasedTestCase
{
/**
* @doesNotPerformAssertions
*/
public function testPing()
{
$service = Service::selectIndexingService(null, 'solr');

$service->ping();
}
}

0 comments on commit c88ac7a

Please sign in to comment.