diff --git a/conf/schema.xml b/conf/schema.xml
index 43eb842..9b4c8a2 100644
--- a/conf/schema.xml
+++ b/conf/schema.xml
@@ -122,59 +122,11 @@
-
-
-
-
-
-
id
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/conf/solrconfig.xml b/conf/solrconfig.xml
index 8a93f5b..4439ef7 100644
--- a/conf/solrconfig.xml
+++ b/conf/solrconfig.xml
@@ -61,7 +61,7 @@
explicit
- text
+ title
AND
diff --git a/src/Query.php b/src/Query.php
index d796122..623b2ca 100644
--- a/src/Query.php
+++ b/src/Query.php
@@ -33,6 +33,8 @@
namespace Opus\Search;
use InvalidArgumentException;
+use Opus\Common\Config;
+use Opus\Search\Config as SearchConfig;
use Opus\Search\Facet\Set;
use Opus\Search\Filter\AbstractFilterBase;
use RuntimeException;
@@ -42,6 +44,7 @@
use function array_merge;
use function array_shift;
use function array_unique;
+use function boolval;
use function count;
use function ctype_digit;
use function intval;
@@ -74,7 +77,7 @@
* @method int getRows( int $default = null )
* @method string[] getFields( array $default = null )
* @method array getSort( array $default = null )
- * @method bool getUnion( bool $default = null )
+ * @method bool getUnion( bool $default = false )
* @method AbstractFilterBase getFilter(AbstractFilterBase $default = null ) retrieves condition to be met by resulting documents
* @method Set getFacet( Set $default = null )
* @method $this setStart( int $offset )
@@ -86,6 +89,8 @@
* @method $this setFacet( Set $facet )
* @method $this addFields( string $fields )
* @method $this addSort( $sorting )
+ * @method $this setWeightedFields( int[] $weightedFields ) assigns boost factors to fields (e.g. [ 'title' => 10, 'abstract' => 0.5 ])
+ * @method $this setWeightMultiplier( int $multiplier ) multiplier to further increase boost factors when matching phrases
*/
class Query
{
@@ -95,14 +100,16 @@ class Query
public function reset()
{
$this->data = [
- 'start' => null,
- 'rows' => null,
- 'fields' => null,
- 'sort' => null,
- 'union' => null,
- 'filter' => null,
- 'facet' => null,
- 'subfilters' => null,
+ 'start' => null,
+ 'rows' => null,
+ 'fields' => null,
+ 'sort' => null,
+ 'union' => false,
+ 'filter' => null,
+ 'facet' => null,
+ 'subfilters' => null,
+ 'weightedfields' => null,
+ 'weightmultiplier' => null,
];
}
@@ -184,6 +191,83 @@ protected function normalizeDirection($ascending)
return $ascending;
}
+ /**
+ * Returns true if a weighted search shall be used, otherwise returns false.
+ *
+ * @return bool
+ */
+ public function getWeightedSearch()
+ {
+ if (! isset($this->data['weightedsearch'])) {
+ $config = Config::get();
+
+ if (isset($config->search->weightedSearch)) {
+ $this->data['weightedsearch'] = boolval($config->search->weightedSearch);
+ } else {
+ $this->data['weightedsearch'] = false;
+ }
+ }
+
+ return $this->data['weightedsearch'];
+ }
+
+ /**
+ * Set to true if a weighted search shall be used, otherwise set to false.
+ *
+ * @param bool $value
+ * @return $this fluent interface
+ */
+ public function setWeightedSearch($value)
+ {
+ $this->data['weightedsearch'] = ! ! $value;
+
+ return $this;
+ }
+
+ /**
+ * Returns boost factors keyed by field (e.g. [ 'title' => 10, 'abstract' => 0.5 ]).
+ *
+ * @return int[]
+ */
+ public function getWeightedFields()
+ {
+ if ($this->data['weightedfields'] === null) {
+ $config = Config::get();
+
+ if (isset($config->search->simple)) {
+ $this->data['weightedfields'] = $config->search->simple->toArray();
+ } else {
+ $this->data['weightedfields'] = [];
+ }
+ }
+
+ return $this->data['weightedfields'];
+ }
+
+ /**
+ * Returns a positive integer used as a multiplier to further increase field-specific boost factors when
+ * matching phrases (i.e., in cases where all query terms appear in close proximity).
+ *
+ * For example, with a weight multiplier of 5, the weightedfields array [ 'title' => 10, 'abstract' => 0.5 ]
+ * would be translated to [ 'title' => 50, 'abstract' => 2.5 ] when matching phrases.
+ *
+ * @return int
+ */
+ public function getWeightMultiplier()
+ {
+ if ($this->data['weightmultiplier'] === null) {
+ $config = Config::get();
+
+ if (isset($config->search->weightMultiplier)) {
+ $this->data['weightmultiplier'] = $config->search->weightMultiplier;
+ } else {
+ $this->data['weightmultiplier'] = 1;
+ }
+ }
+
+ return $this->data['weightmultiplier'];
+ }
+
/**
* Retrieves value of selected query parameter.
*
@@ -214,6 +298,7 @@ public function set($name, $value, $adding = false)
switch ($name) {
case 'start':
case 'rows':
+ case 'weightmultiplier':
if ($adding) {
throw new InvalidArgumentException('invalid parameter access on ' . $name);
}
@@ -300,6 +385,18 @@ public function set($name, $value, $adding = false)
case 'subfilters':
throw new RuntimeException('invalid access on sub filters');
+
+ case 'weightedfields':
+ if ($adding) {
+ throw new InvalidArgumentException('invalid parameter access on ' . $name);
+ }
+
+ if (! is_array($value)) {
+ throw new InvalidArgumentException('invalid query fields option');
+ }
+
+ $this->data[$name] = $value;
+ break;
}
return $this;
@@ -469,7 +566,7 @@ public function getSubFilters()
*/
public static function getParameterDefault($name, $fallbackIfMissing, $oldName = null)
{
- $config = Config::getDomainConfiguration();
+ $config = SearchConfig::getDomainConfiguration();
$defaults = $config->parameterDefaults;
if ($defaults instanceof Zend_Config) {
diff --git a/src/Result/Base.php b/src/Result/Base.php
index 2d9849f..a71232c 100644
--- a/src/Result/Base.php
+++ b/src/Result/Base.php
@@ -38,8 +38,10 @@
use Opus\Search\Log;
use RuntimeException;
+use function array_filter;
use function array_key_exists;
use function array_map;
+use function array_values;
use function count;
use function ctype_digit;
use function intval;
@@ -192,9 +194,11 @@ public function getFacet($fieldName)
* Retrieves set of matching and locally existing documents returned in
* response to some search query.
*
+ * @param bool $ignoreZeroScoreMatches ignore any matches with score 0.0
+ * (true) or not (false); defaults to true
* @return ResultMatch[]
*/
- public function getReturnedMatches()
+ public function getReturnedMatches($ignoreZeroScoreMatches = true)
{
if ($this->data['matches'] === null) {
return [];
@@ -208,7 +212,10 @@ public function getReturnedMatches()
foreach ($this->data['matches'] as $match) {
try {
$match->getDocument();
- $matches[] = $match;
+ $ignoreMatch = $ignoreZeroScoreMatches === true && $match->getScore() === 0.0;
+ if ($ignoreMatch !== true) {
+ $matches[] = $match;
+ }
} catch (DocumentException $e) {
Log::get()->warn('skipping matching but locally missing document #' . $match->getId());
}
@@ -223,18 +230,22 @@ public function getReturnedMatches()
*
* @note If query was requesting to retrieve non-qualified matches this set
* might include IDs of documents that doesn't exist locally anymore.
+ * @param bool $ignoreZeroScoreMatches ignore any matches with score 0.0
+ * (true) or not (false); defaults to true
* @return int[]
*/
- public function getReturnedMatchingIds()
+ public function getReturnedMatchingIds($ignoreZeroScoreMatches = true)
{
if ($this->data['matches'] === null) {
return [];
}
- return array_map(function ($match) {
- /** @var ResultMatch $match */
- return $match->getId();
+ $matchingIds = array_map(function (ResultMatch $match) use ($ignoreZeroScoreMatches) {
+ $ignoreMatch = $ignoreZeroScoreMatches === true && $match->getScore() === 0.0;
+ return $ignoreMatch !== true ? $match->getId() : null;
}, $this->data['matches']);
+
+ return array_values(array_filter($matchingIds));
}
/**
@@ -247,7 +258,7 @@ public function getReturnedMatchingIds()
* has changed in that it's returning set of Opus_Document instances
* rather than set of Opus_Search_Util_Result instances.
* @note The wording is less specific in that all information in response to
- * search query may considered results of search. Thus this new API
+ * search query may be considered results of search. Thus this new API
* prefers "matches" over "results".
*/
public function getResults()
diff --git a/src/Solr/Solarium/Adapter.php b/src/Solr/Solarium/Adapter.php
index bfc2096..96d5a73 100755
--- a/src/Solr/Solarium/Adapter.php
+++ b/src/Solr/Solarium/Adapter.php
@@ -77,6 +77,7 @@
use function file_exists;
use function filesize;
use function filter_var;
+use function implode;
use function in_array;
use function intval;
use function is_array;
@@ -615,6 +616,25 @@ protected function applyParametersOnQuery(
$query->setSorts($sortings);
}
+ $isWeightedSearch = $parameters->getWeightedSearch();
+ if ($isWeightedSearch === true) {
+ // get the edismax component
+ $edismax = $query->getEDisMax();
+
+ // NOTE: query is now an edismax query
+ $weightedFields = $parameters->getWeightedFields();
+ if (! empty($weightedFields)) {
+ $queryFields = $this->getQueryFieldsString($weightedFields);
+ $edismax->setQueryFields($queryFields);
+
+ $weightMultiplier = $parameters->getWeightMultiplier();
+ if ($weightMultiplier !== null) {
+ $phraseFields = $this->getPhraseFieldsString($weightedFields, $weightMultiplier);
+ $edismax->setPhraseFields($phraseFields);
+ }
+ }
+ }
+
$facet = $parameters->getFacet();
if ($facet !== null) {
$facetSet = $query->getFacetSet();
@@ -880,4 +900,38 @@ public function setTimeout($timeout)
$this->client->setOptions($options, true);
}
}
+
+ /**
+ * Converts an array containing boost factors keyed by field into a query fields string that can be used
+ * as input for the Solr `qf` request parameter.
+ *
+ * @param int[] $weightedFields assigns boost factors to fields, e.g.: [ 'title' => 10, 'abstract' => 0.5 ]
+ * @return string query fields string, e.g.: "title^10 abstract^0.5"
+ */
+ protected function getQueryFieldsString($weightedFields)
+ {
+ $queryFields = [];
+ foreach ($weightedFields as $field => $boostFactor) {
+ $queryFields[] = "$field^$boostFactor";
+ }
+
+ return implode(' ', $queryFields);
+ }
+
+ /**
+ * Generates a phrase fields string that can be used as input for the Solr `pf` request parameter.
+ *
+ * @param int[] $weightedFields assigns boost factors to fields, e.g.: [ 'title' => 10, 'abstract' => 0.5 ]
+ * @param int $weightMultiplier factor by which each boost factor will be multiplied when matching phrases, e.g.: 5
+ * @return string phrase fields string, e.g.: "title^50 abstract^2.5"
+ */
+ protected function getPhraseFieldsString($weightedFields, $weightMultiplier)
+ {
+ $phraseFields = [];
+ foreach ($weightedFields as $field => $boostFactor) {
+ $phraseFields[] = "$field^" . $boostFactor * $weightMultiplier;
+ }
+
+ return implode(' ', $phraseFields);
+ }
}
diff --git a/test/QueryTest.php b/test/QueryTest.php
index a1ec897..fc2fb7c 100644
--- a/test/QueryTest.php
+++ b/test/QueryTest.php
@@ -49,7 +49,7 @@ public function testInitiallyEmpty()
$this->assertFalse(isset($query->rows));
$this->assertFalse(isset($query->fields));
$this->assertFalse(isset($query->sort));
- $this->assertFalse(isset($query->union));
+ $this->assertFalse($query->union);
}
public function testSupportingExplicitGetter()
@@ -60,7 +60,7 @@ public function testSupportingExplicitGetter()
$this->assertNull($query->get('rows'));
$this->assertNull($query->get('fields'));
$this->assertNull($query->get('sort'));
- $this->assertNull($query->get('union'));
+ $this->assertFalse($query->get('union'));
}
public function testSupportingImplicitGetter()
@@ -71,7 +71,7 @@ public function testSupportingImplicitGetter()
$this->assertNull($query->rows);
$this->assertNull($query->fields);
$this->assertNull($query->sort);
- $this->assertNull($query->union);
+ $this->assertFalse($query->union);
}
public function testSupportingGetterMethods()
@@ -82,7 +82,7 @@ public function testSupportingGetterMethods()
$this->assertNull($query->getRows());
$this->assertNull($query->getFields());
$this->assertNull($query->getSort());
- $this->assertNull($query->getUnion());
+ $this->assertFalse($query->getUnion());
}
/**
diff --git a/test/Solr/Solarium/AdapterSearchingTest.php b/test/Solr/Solarium/AdapterSearchingTest.php
index f64d17c..e1c7d3c 100644
--- a/test/Solr/Solarium/AdapterSearchingTest.php
+++ b/test/Solr/Solarium/AdapterSearchingTest.php
@@ -33,19 +33,77 @@
namespace OpusTest\Search\Solr\Solarium;
use Exception;
+use Opus\Common\Document;
use Opus\Common\Person;
use Opus\Search\Query;
use Opus\Search\QueryFactory;
+use Opus\Search\SearchingInterface;
use Opus\Search\Service;
use Opus\Search\Solr\Solarium\Adapter;
use Opus\Search\Util\Query as QueryUtil;
use Opus\Search\Util\Searcher;
use OpusTest\Search\TestAsset\DocumentBasedTestCase;
+use function abs;
use function count;
+use function in_array;
class AdapterSearchingTest extends DocumentBasedTestCase
{
+ /** @var array[] */
+ protected static $additionalDocumentPropertySets = [
+ 'weightedTestDocA' => [
+ 'TitleMain' => [
+ 'Value' => 'Some Document',
+ 'Language' => 'eng',
+ ],
+ 'TitleAbstract' => [
+ 'Value' => 'Abstract A, full query string (test document) only occurs in abstract.',
+ 'Language' => 'eng',
+ ],
+ ],
+ 'weightedTestDocB' => [
+ 'TitleMain' => [
+ 'Value' => 'Another Test Document',
+ 'Language' => 'eng',
+ ],
+ 'TitleAbstract' => [
+ 'Value' => 'Abstract of document B, full query string only occurs in title.',
+ 'Language' => 'eng',
+ ],
+ ],
+ 'weightedTestDocC' => [
+ 'TitleMain' => [
+ 'Value' => 'Third One',
+ 'Language' => 'eng',
+ ],
+ 'TitleAbstract' => [
+ 'Value' => 'Abstract C, first query term (test) only occurs in abstract.\nSome more text.',
+ 'Language' => 'eng',
+ ],
+ ],
+ 'weightedTestDocD' => [
+ 'TitleMain' => [
+ 'Value' => 'Fourth One',
+ 'Language' => 'eng',
+ ],
+ 'TitleAbstract' => [
+ 'Value' => 'Abstract D, second query term (document) only occurs in abstract.\nEven more text.',
+ 'Language' => 'eng',
+ ],
+ ],
+ 'weightedTestDocE' => [
+ 'TitleMain' => [
+ 'Value' => 'Yet Another Test',
+ 'Language' => 'eng',
+ ],
+ 'TitleAbstract' => [
+ 'Value' => 'Abstract of document E, title & abstract contain one query term each.',
+ 'Language' => 'eng',
+ ],
+ ],
+ ];
+
public function testService()
{
$search = Service::selectSearchingService(null, 'solr');
@@ -210,22 +268,28 @@ public function testSearchWithDiacritics()
$docB->addPersonAuthor($author);
$docB->store();
- $index = Service::selectIndexingService(null, 'solr');
- $index->addDocumentsToIndex([$docA, $docB]);
+ $this->indexDocuments([$docA, $docB]);
- $search = new Searcher();
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'muller');
- $query = new QueryUtil(QueryUtil::SIMPLE);
- $query->setCatchAll('muller');
- $result = $search->search($query);
+ $query->setWeightedSearch(true);
+ $query->setWeightedFields(['author' => 1.0]);
- $this->assertEquals(2, $result->getAllMatchesCount());
+ $result = $search->customSearch($query);
+ $matchingIds = $result->getReturnedMatchingIds();
- $query = new QueryUtil(QueryUtil::SIMPLE);
- $query->setCatchAll('müller');
- $result = $search->search($query);
+ $this->assertEquals(2, count($matchingIds));
- $this->assertEquals(2, $result->getAllMatchesCount());
+ $filter = $search->createFilter();
+ $filter->createSimpleEqualityFilter('*')->addValue('müller');
+ $query->setFilter($filter);
+
+ $result = $search->customSearch($query);
+ $matchingIds = $result->getReturnedMatchingIds();
+
+ // when searching with diacritics, expect the same documents being found
+ $this->assertEquals(2, count($matchingIds));
}
public function testMapYearFacetIndexFieldsToYearAsset()
@@ -248,4 +312,377 @@ public function testMapYearFacetIndexFieldsToYearAsset()
$this->assertEquals(1, $result->getAllMatchesCount());
}
+
+ /**
+ * Test that a standard `AND` search (which uses Solr's standard query parser)
+ * finds all documents that contain both query terms in the default field ('title').
+ */
+ public function testStandardAndSearch()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $docC = $this->createDocument('weightedTestDocC'); // has only one query term (in abstract)
+ $docD = $this->createDocument('weightedTestDocD'); // has only one query term (in abstract)
+ $docE = $this->createDocument('weightedTestDocE'); // title & abstract contain one query term each
+ $this->indexDocuments([$docA, $docB, $docC, $docD, $docE]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ // use Solr's standard query parser (which, as currently configured
+ // in solarconfig.xml, by default only searches the 'title' field)
+ $query->setWeightedSearch(false);
+ $query->setUnion(false); // use AND as default query operator
+
+ $result = $search->customSearch($query);
+ $matchingIds = $result->getReturnedMatchingIds();
+
+ $this->assertEquals(1, count($matchingIds));
+
+ // expect only documents that contain both query terms in the default field ('title')
+ $this->assertTrue(in_array($docB->getId(), $matchingIds));
+ }
+
+ /**
+ * Test that a weighted `AND` search finds all documents that contain both
+ * query terms in the same field.
+ */
+ public function testWeightedAndSearchWithoutBoosts()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $docC = $this->createDocument('weightedTestDocC'); // has only one query term (in abstract)
+ $docD = $this->createDocument('weightedTestDocD'); // has only one query term (in abstract)
+ $docE = $this->createDocument('weightedTestDocE'); // title & abstract contain one query term each
+ $this->indexDocuments([$docA, $docB, $docC, $docD, $docE]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true); // use Solr's eDisMax query parser
+ $query->setWeightedFields(['abstract' => 1.0, 'title' => 1.0]); // assigns boost factors to fields
+ $query->setUnion(false); // use AND as default query operator
+
+ $result = $search->customSearch($query);
+ $matchingIds = $result->getReturnedMatchingIds();
+
+ $this->assertEquals(2, count($matchingIds));
+
+ // expect only documents that contain both query terms in the same field
+ $this->assertTrue(in_array($docA->getId(), $matchingIds));
+ $this->assertTrue(in_array($docB->getId(), $matchingIds));
+ }
+
+ /**
+ * Test that a weighted `OR` search finds all documents that contain at least
+ * one query term in one of their fields.
+ */
+ public function testWeightedOrSearchWithoutBoosts()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $docC = $this->createDocument('weightedTestDocC'); // has only one query term (in abstract)
+ $docD = $this->createDocument('weightedTestDocD'); // has only one query term (in abstract)
+ $docE = $this->createDocument('weightedTestDocE'); // title & abstract contain one query term each
+ $this->indexDocuments([$docA, $docB, $docC, $docD, $docE]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true);
+ $query->setWeightedFields(['abstract' => 1.0, 'title' => 1.0]);
+ $query->setUnion(true); // use OR as default query operator
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches();
+
+ // expect all of the above documents to get found
+ $this->assertEquals(5, count($matches));
+ }
+
+ /**
+ * Test that a weighted `OR` search with boosted phrase matching results in increased
+ * importance given to search results containing an exact occurrence of the search string.
+ */
+ public function testWeightedOrSearchWithBoostedPhraseMatching()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $docC = $this->createDocument('weightedTestDocC'); // has only one query term (in abstract)
+ $docD = $this->createDocument('weightedTestDocD'); // has only one query term (in abstract)
+ $docE = $this->createDocument('weightedTestDocE'); // title & abstract contain one query term each
+ $this->indexDocuments([$docA, $docB, $docC, $docD, $docE]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true);
+ $query->setWeightedFields(['abstract' => 1.0, 'title' => 1.0]);
+ $query->setWeightMultiplier(5); // multiplier to further increase boost factors when matching phrases
+ $query->setUnion(true); // use OR as default query operator
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches();
+
+ $this->assertEquals(5, count($matches));
+
+ // expect the two documents matching the exact occurrence of the search string to sort first
+ $highestScoringIds = [$matches[0]->getDocument()->getId(), $matches[1]->getDocument()->getId()];
+ $this->assertTrue(in_array($docA->getId(), $highestScoringIds));
+ $this->assertTrue(in_array($docB->getId(), $highestScoringIds));
+
+ // expect much greater scores for the two documents matching the exact occurrence of the search string
+ $this->assertTrue($matches[0]->getScore() > 1.0);
+ $this->assertTrue($matches[1]->getScore() > 1.0);
+
+ $this->assertTrue($matches[2]->getScore() < 1.0);
+ $this->assertTrue($matches[3]->getScore() < 1.0);
+ $this->assertTrue($matches[4]->getScore() < 1.0);
+ }
+
+ /**
+ * Test that a weighted `AND` search with a field's boost factor set to 0 will
+ * cause a document with a match just in that field to get a score of 0.
+ */
+ public function testWeightedAndSearchWithZeroedBoost()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $docE = $this->createDocument('weightedTestDocE'); // title & abstract contain one query term each
+ $this->indexDocuments([$docA, $docB, $docE]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true);
+ $query->setWeightedFields(['abstract' => 0, 'title' => 1.0]);
+ $query->setUnion(false); // use AND as default query operator
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches(false);
+
+ // expect only docA & docB to get found (which both contain the full query string in one of their fields)
+ $this->assertEquals(2, count($matches));
+
+ // expect docB (contains full query string in title) to sort first and with a score greater than 0
+ $this->assertEquals($docB->getId(), $matches[0]->getDocument()->getId());
+ $this->assertTrue($matches[0]->getScore() > 0.0);
+
+ // expect docA (contains full query string in abstract) to sort last and with a score of 0
+ $this->assertEquals($docA->getId(), $matches[1]->getDocument()->getId());
+ $this->assertTrue($matches[1]->getScore() === 0.0);
+ }
+
+ /**
+ * Test that a weighted `OR` search with a field's boost factor set to 0 will
+ * cause a document with a match just in that field to get a score of 0.
+ */
+ public function testWeightedOrSearchWithZeroedBoost()
+ {
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $docD = $this->createDocument('weightedTestDocD'); // has only one query term (in abstract)
+ $docE = $this->createDocument('weightedTestDocE'); // title & abstract contain one query term each
+ $this->indexDocuments([$docB, $docD, $docE]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true);
+ $query->setWeightedFields(['abstract' => 0, 'title' => 1.0]);
+ $query->setUnion(true); // use OR as default query operator
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches(false);
+
+ // expect all documents to get found since all of them contain at least one query term in one of their fields
+ $this->assertEquals(3, count($matches));
+
+ // expect docB (contains full query string in title) to sort first and with a score greater than 0
+ $this->assertEquals($docB->getId(), $matches[0]->getDocument()->getId());
+ $this->assertTrue($matches[0]->getScore() > 0.0);
+
+ // expect docE (contains part of query string in title) to sort in the middle and with a score greater than 0
+ $this->assertEquals($docE->getId(), $matches[1]->getDocument()->getId());
+ $this->assertTrue($matches[1]->getScore() > 0.0);
+
+ // expect docD (contains part of query string in abstract) to sort last and with a score of 0
+ $this->assertEquals($docD->getId(), $matches[2]->getDocument()->getId());
+ $this->assertTrue($matches[2]->getScore() === 0.0);
+ }
+
+ /**
+ * Test that a weighted `AND` search with a field's boost factor set to 0 will
+ * by default ignore any matches with a score of 0.
+ */
+ public function testWeightedAndSearchLeavingOutZeroScoredMatches()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $docE = $this->createDocument('weightedTestDocE'); // title & abstract contain one query term each
+ $this->indexDocuments([$docA, $docB, $docE]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true);
+ $query->setWeightedFields(['abstract' => 0, 'title' => 1.0]);
+ $query->setUnion(false); // use AND as default query operator
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches(); // by default, ignores any matches with score 0.0
+
+ // expect only docB to get found, since docA only contains the full query string in the (ignored) abstract
+ $this->assertEquals(1, count($matches));
+
+ // expect just docB (contains full query string in title) with a score greater than 0
+ $this->assertEquals($docB->getId(), $matches[0]->getDocument()->getId());
+ $this->assertTrue($matches[0]->getScore() > 0.0);
+ }
+
+ /**
+ * Test that a "weighted" search with undefined weights (i.e. no field-specific boost factors defined at all)
+ * will cause Solr to fall back to its standard query parser (which by default only searches the title field).
+ */
+ public function testWeightedSearchWithUndefinedWeights()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $this->indexDocuments([$docA, $docB]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true);
+
+ // without any boost factors assigned to fields, expect only docB
+ // (which contains the query string in the title) to be found
+ $query->setWeightedFields([]); // defining no weights causes Solr to fall back to its standard query parser
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches();
+
+ $this->assertEquals(1, count($matches));
+
+ $this->assertEquals($docB->getId(), $matches[0]->getDocument()->getId());
+ }
+
+ /**
+ * Test that a weighted search with equal weights (i.e. no fields being boosted) will result in
+ * similar scores for two documents that both contain the full query string in one of their fields.
+ */
+ public function testWeightedSearchWithEqualWeights()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $this->indexDocuments([$docA, $docB]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true);
+
+ // with equal boost factors, expect both documents being returned with roughly equal scores
+ $query->setWeightedFields(['abstract' => 1.0, 'title' => 1.0]);
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches();
+
+ $this->assertEquals(2, count($matches));
+
+ $this->assertTrue(abs($matches[0]->getScore() - $matches[1]->getScore()) < 1.0);
+ }
+
+ /**
+ * Test that a weighted search with different boost factors assigned to fields will influence
+ * result scores accordingly & cause a document with a match in a boosted field to sort first.
+ */
+ public function testWeightedSearchWithBoostedFields()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $this->indexDocuments([$docA, $docB]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $this->adjustConfiguration([
+ 'search' => [
+ 'weightedSearch' => true, // use the Solr eDisMax query parser
+ 'simple' => [
+ 'abstract' => 0.5, // decrease importance of abstract field
+ 'title' => 10, // increase importance of title field
+ ],
+ ],
+ ]);
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches();
+
+ $this->assertEquals(2, count($matches));
+
+ // expect clearly different scores between a document with a match in the boosted title field and one without
+ $this->assertTrue(abs($matches[0]->getScore() - $matches[1]->getScore()) > 1.0);
+
+ // expect the document containing the query string in the boosted title field to sort first
+ $this->assertEquals($docB->getId(), $matches[0]->getDocument()->getId());
+ }
+
+ /**
+ * Test that a weighted search with (compared to the previous test) swapped boost factors
+ * will also cause the sort order of search results to get swapped.
+ */
+ public function testWeightedSearchWithBoostedFieldsSwapped()
+ {
+ $docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
+ $docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
+ $this->indexDocuments([$docA, $docB]);
+
+ $search = Service::selectSearchingService(null, 'solr');
+ $query = $this->queryWithSearchString($search, 'test document');
+
+ $query->setWeightedSearch(true);
+ $query->setWeightedFields(['abstract' => 10.0, 'title' => 0.5]); // increase importance of abstract field
+
+ $result = $search->customSearch($query);
+ $matches = $result->getReturnedMatches();
+
+ $this->assertEquals(2, count($matches));
+
+ // expect clearly different scores between a document with a match in the boosted abstract field and one without
+ $this->assertTrue(abs($matches[0]->getScore() - $matches[1]->getScore()) > 1.0);
+
+ // expect the document containing the query string in the boosted abstract field to sort first
+ $this->assertEquals($docA->getId(), $matches[0]->getDocument()->getId());
+ }
+
+ /**
+ * Adds the given documents to the Solr index.
+ *
+ * @param Document[] $documents documents to be indexed
+ */
+ protected function indexDocuments($documents)
+ {
+ $index = Service::selectIndexingService(null, 'solr');
+ $index->addDocumentsToIndex($documents);
+ }
+
+ /**
+ * Returns a query object for the given search string sorting results by score in descending order.
+ *
+ * @param SearchingInterface $search searching service to work with
+ * @param string $searchString query string to search for
+ * @return Query
+ */
+ protected function queryWithSearchString($search, $searchString)
+ {
+ $query = new Query();
+ $query->addSorting('score', false);
+
+ // add query terms
+ $filter = $search->createFilter();
+ $filter->createSimpleEqualityFilter('*')->addValue($searchString);
+ $query->setFilter($filter);
+
+ return $query;
+ }
}
diff --git a/test/TestAsset/DocumentBasedTestCase.php b/test/TestAsset/DocumentBasedTestCase.php
index 561a48e..54bfcaf 100644
--- a/test/TestAsset/DocumentBasedTestCase.php
+++ b/test/TestAsset/DocumentBasedTestCase.php
@@ -44,6 +44,7 @@
use ReflectionClass;
use function array_key_exists;
+use function array_merge;
use function array_values;
use function basename;
use function file_get_contents;
@@ -155,12 +156,15 @@ class DocumentBasedTestCase extends TestCase
],
];
+ /** @var array[] */
+ protected static $additionalDocumentPropertySets;
+
/**
* @return array
*/
public static function documentPropertiesProvider()
{
- return self::$documentPropertySets;
+ return array_merge(static::$documentPropertySets, static::$additionalDocumentPropertySets ?? []);
}
/**
@@ -169,11 +173,13 @@ public static function documentPropertiesProvider()
*/
public static function getDocumentDescriptionByName($name)
{
- if (! array_key_exists($name, self::$documentPropertySets)) {
+ $documentPropertySets = self::documentPropertiesProvider();
+
+ if (! array_key_exists($name, $documentPropertySets)) {
throw new InvalidArgumentException("unknown document description");
}
- return self::$documentPropertySets[$name];
+ return $documentPropertySets[$name];
}
/**
@@ -187,9 +193,9 @@ public static function getDocumentDescriptionByName($name)
protected function createDocument($documentProperties = null)
{
if ($documentProperties === null) {
- $documentProperties = self::$documentPropertySets['article'];
+ $documentProperties = self::getDocumentDescriptionByName('article');
} if (is_string($documentProperties)) {
- $documentProperties = self::$documentPropertySets[$documentProperties];
+ $documentProperties = self::getDocumentDescriptionByName($documentProperties);
}
$document = Document::new();
diff --git a/test/Util/SearcherTest.php b/test/Util/SearcherTest.php
index d2e4c1f..57189e5 100644
--- a/test/Util/SearcherTest.php
+++ b/test/Util/SearcherTest.php
@@ -37,6 +37,7 @@
use Opus\Common\Document;
use Opus\Common\DocumentInterface;
use Opus\Common\Model\ModelException;
+use Opus\Common\Person;
use Opus\Model\Xml;
use Opus\Model\Xml\Cache;
use Opus\Model\Xml\Version1;
@@ -497,4 +498,47 @@ public function testFilterFacetQueriesByServerStatePublishedForUsers()
{
$this->markTestIncomplete('test not implemented yet - waiting for refactoring of isAdmin implementation');
}
+
+ public function testAdvancedSearch()
+ {
+ $rows = 5;
+ $ids = [];
+ for ($i = 0; $i < $rows; $i++) {
+ $document = Document::new();
+ $document->setServerState('published');
+ $document->store();
+ array_push($ids, $document->getId());
+ }
+
+ $doc = Document::get($ids[0]);
+ $author = Person::new();
+ $author->setLastName('Doe');
+ $author = $doc->addPersonAuthor($author);
+ $doc->store();
+
+ $doc = Document::get($ids[3]);
+ $author = Person::new();
+ $author->setLastName('doe');
+ $author = $doc->addPersonAuthor($author);
+ $doc->store();
+
+ $query = new Query(Query::ADVANCED);
+ $query->setStart(0);
+ $query->setRows(10);
+ $query->setSortField('score');
+ $query->setSortOrder('desc');
+ $query->setFilterQueries([]);
+ $query->setCatchAll(null);
+ $query->setFacetField(null);
+ $query->setReturnIdsOnly(false);
+ $query->setField('author', 'doe');
+ $query->getQ();
+
+ $searcher = new Searcher();
+ $results = $searcher->search($query);
+
+ $this->assertEquals(2, $results->getAllMatchesCount());
+ $this->assertContains($ids[0], $results->getReturnedMatchingIds());
+ $this->assertContains($ids[3], $results->getReturnedMatchingIds());
+ }
}