Skip to content

Commit

Permalink
#37 When searching Solr, matches with a score of 0 are now ignored by…
Browse files Browse the repository at this point in the history
… default

Setting a field's boost factor to 0 (via the "search.simple" config option) will cause documents with matches just in that field to get a score of 0
  • Loading branch information
extracts committed Dec 4, 2023
1 parent 0d51ed8 commit 4577fcf
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 9 deletions.
25 changes: 18 additions & 7 deletions src/Result/Base.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@
use Opus\Search\Log;
use RuntimeException;

use function array_filter;
use function array_key_exists;
use function array_map;
use function array_values;
use function count;
use function ctype_digit;
use function intval;
Expand Down Expand Up @@ -192,9 +194,11 @@ public function getFacet($fieldName)
* Retrieves set of matching and locally existing documents returned in
* response to some search query.
*
* @param bool $ignoreZeroScoreMatches ignore any matches with score 0.0
* (true) or not (false); defaults to true
* @return ResultMatch[]
*/
public function getReturnedMatches()
public function getReturnedMatches($ignoreZeroScoreMatches = true)
{
if ($this->data['matches'] === null) {
return [];
Expand All @@ -208,7 +212,10 @@ public function getReturnedMatches()
foreach ($this->data['matches'] as $match) {
try {
$match->getDocument();
$matches[] = $match;
$ignoreMatch = $ignoreZeroScoreMatches === true && $match->getScore() === 0.0;
if ($ignoreMatch !== true) {
$matches[] = $match;
}
} catch (DocumentException $e) {
Log::get()->warn('skipping matching but locally missing document #' . $match->getId());
}
Expand All @@ -223,18 +230,22 @@ public function getReturnedMatches()
*
* @note If query was requesting to retrieve non-qualified matches this set
* might include IDs of documents that doesn't exist locally anymore.
* @param bool $ignoreZeroScoreMatches ignore any matches with score 0.0
* (true) or not (false); defaults to true
* @return int[]
*/
public function getReturnedMatchingIds()
public function getReturnedMatchingIds($ignoreZeroScoreMatches = true)
{
if ($this->data['matches'] === null) {
return [];
}

return array_map(function ($match) {
/** @var ResultMatch $match */
return $match->getId();
$matchingIds = array_map(function (ResultMatch $match) use ($ignoreZeroScoreMatches) {
$ignoreMatch = $ignoreZeroScoreMatches === true && $match->getScore() === 0.0;
return $ignoreMatch !== true ? $match->getId() : null;
}, $this->data['matches']);

return array_values(array_filter($matchingIds));
}

/**
Expand All @@ -247,7 +258,7 @@ public function getReturnedMatchingIds()
* has changed in that it's returning set of Opus_Document instances
* rather than set of Opus_Search_Util_Result instances.
* @note The wording is less specific in that all information in response to
* search query may considered results of search. Thus this new API
* search query may be considered results of search. Thus this new API
* prefers "matches" over "results".
*/
public function getResults()
Expand Down
33 changes: 31 additions & 2 deletions test/Solr/Solarium/AdapterSearchingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ public function testWeightedAndSearchWithZeroedBoost()
$query->setUnion(false); // use AND as default query operator

$result = $search->customSearch($query);
$matches = $result->getReturnedMatches();
$matches = $result->getReturnedMatches(false);

// expect only docA & docB to get found (which both contain the full query string in one of their fields)
$this->assertEquals(2, count($matches));
Expand Down Expand Up @@ -486,7 +486,7 @@ public function testWeightedOrSearchWithZeroedBoost()
$query->setUnion(true); // use OR as default query operator

$result = $search->customSearch($query);
$matches = $result->getReturnedMatches();
$matches = $result->getReturnedMatches(false);

// expect all documents to get found since all of them contain at least one query term in one of their fields
$this->assertEquals(3, count($matches));
Expand All @@ -504,6 +504,35 @@ public function testWeightedOrSearchWithZeroedBoost()
$this->assertTrue($matches[2]->getScore() === 0.0);
}

/**
* Test that a weighted `AND` search with a field's boost factor set to 0 will
* by default ignore any matches with a score of 0.
*/
public function testWeightedAndSearchLeavingOutZeroScoredMatches()
{
$docA = $this->createDocument('weightedTestDocA'); // full query string only occurs in abstract
$docB = $this->createDocument('weightedTestDocB'); // full query string only occurs in title
$docE = $this->createDocument('weightedTestDocE'); // title & abstract contain one query term each
$this->indexDocuments([$docA, $docB, $docE]);

$search = Service::selectSearchingService(null, 'solr');
$query = $this->queryWithSearchString($search, 'test document');

$query->setWeightedSearch(true);
$query->setWeightedFields(['abstract' => 0, 'title' => 1.0]);
$query->setUnion(false); // use AND as default query operator

$result = $search->customSearch($query);
$matches = $result->getReturnedMatches(); // by default, ignores any matches with score 0.0

// expect only docB to get found, since docA only contains the full query string in the (ignored) abstract
$this->assertEquals(1, count($matches));

// expect just docB (contains full query string in title) with a score greater than 0
$this->assertEquals($docB->getId(), $matches[0]->getDocument()->getId());
$this->assertTrue($matches[0]->getScore() > 0.0);
}

/**
* Test that a "weighted" search with undefined weights (i.e. no field-specific boost factors defined at all)
* will cause Solr to fall back to its standard query parser (which by default only searches the title field).
Expand Down

0 comments on commit 4577fcf

Please sign in to comment.