Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a weighted search #122

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
be3ee4f
#37 Adds a test that searches Solr using the eDisMax query parser (WIP)
extracts Nov 16, 2023
214c501
#37 Fix "call to undefined method" error
extracts Nov 16, 2023
0f387b9
#37 Keep Solr-specific terminology & syntax out of Opus\Search\Query
extracts Nov 17, 2023
105ca56
#37 Completes search test which verifies that a weighted Solr search …
extracts Nov 17, 2023
b0b22a4
#37 Default values for getters that return a boolean value must be tr…
extracts Nov 22, 2023
b290e62
#37 The weighted search test now shows that searching with boosted fi…
extracts Nov 22, 2023
abe0045
#37 Adopt tests to Opus\Search\Query->getUnion() returning either tru…
extracts Nov 22, 2023
527e42c
#37 The weighted search test now also verifies that swapping field we…
extracts Nov 23, 2023
541bc43
#37 Default to the "search.weightedSearch" & "search.simple" configur…
extracts Nov 23, 2023
c78afc7
#37 Now checks the sort order of weighted search results; moves testi…
extracts Nov 23, 2023
66c06a1
#37 Reuse test documents between weighted search tests
extracts Nov 23, 2023
c622821
#37 Fixes a namespace conflict
extracts Nov 23, 2023
42b9eb0
#37 Verify the sort order of weighted search results via the document…
extracts Nov 24, 2023
f6d7469
#37 Implements explicit getters getWeightedSearch() & getWeightedFiel…
extracts Nov 24, 2023
ad067cb
#37 Removes the weightedsearch key from the initial data array so tha…
extracts Nov 29, 2023
e0593c0
#37 Fix missing return statement in setWeightedSearch() which uses a …
extracts Nov 29, 2023
357b053
#37 Adds a weight multiplier to generate a value for the Solr "pf" re…
extracts Dec 1, 2023
54d1760
#37 Adds a test that compares the search behaviour of the standard & …
extracts Dec 1, 2023
a47e997
#37 Fix coding style
extracts Dec 1, 2023
efbba3b
#37 Replaces redundant boiler plate code with separate helper methods
extracts Dec 1, 2023
e635d6f
#37 More (and more granular) tests that test weighted search behavior
extracts Dec 1, 2023
0d51ed8
#37 Removes the catchall fields "text" & "simple" from the Solr schem…
extracts Dec 3, 2023
4577fcf
#37 When searching Solr, matches with a score of 0 are now ignored by…
extracts Dec 4, 2023
9e165b1
#37 Adopts a test that searches the author field so that it uses a we…
extracts Dec 4, 2023
8a08dff
Merge branch '4.8.1' into weightedSearch37
j3nsch May 17, 2024
6066e6a
Merge pull request #130 from OPUS4/weightedSearch37tmp
j3nsch May 17, 2024
cf8cb36
#131 Added test for advanced search
j3nsch May 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions src/Query.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
* @method string[] getFields( array $default = null )
* @method array getSort( array $default = null )
* @method bool getUnion( bool $default = null )
* @method bool getWeightedSearch( bool $default = null )
extracts marked this conversation as resolved.
Show resolved Hide resolved
* @method array getWeightedFields( int[] $default = null )
* @method AbstractFilterBase getFilter(AbstractFilterBase $default = null ) retrieves condition to be met by resulting documents
* @method Set getFacet( Set $default = null )
* @method $this setStart( int $offset )
Expand All @@ -86,6 +88,8 @@
* @method $this setFacet( Set $facet )
* @method $this addFields( string $fields )
* @method $this addSort( $sorting )
* @method $this setWeightedSearch( bool $isWeightedSearch )
* @method $this setWeightedFields( int[] $weightedFields ) assigns boost factors to fields (e.g. [ 'title' => 10, 'abstract' => 0.5 ])
*/
class Query
{
Expand All @@ -95,14 +99,16 @@ class Query
public function reset()
{
$this->data = [
'start' => null,
'rows' => null,
'fields' => null,
'sort' => null,
'union' => null,
'filter' => null,
'facet' => null,
'subfilters' => null,
'start' => null,
'rows' => null,
'fields' => null,
'sort' => null,
'union' => null,
'filter' => null,
'facet' => null,
'subfilters' => null,
'weightedsearch' => null,
extracts marked this conversation as resolved.
Show resolved Hide resolved
'weightedfields' => null,
];
}

Expand Down Expand Up @@ -267,6 +273,7 @@ public function set($name, $value, $adding = false)
break;

case 'union':
case 'weightedsearch':
if ($adding) {
throw new InvalidArgumentException('invalid parameter access on ' . $name);
}
Expand Down Expand Up @@ -300,6 +307,18 @@ public function set($name, $value, $adding = false)

case 'subfilters':
throw new RuntimeException('invalid access on sub filters');

case 'weightedfields':
if ($adding) {
throw new InvalidArgumentException('invalid parameter access on ' . $name);
}

if (! is_array($value)) {
throw new InvalidArgumentException('invalid query fields option');
}

$this->data[$name] = $value;
break;
}

return $this;
Expand Down
31 changes: 31 additions & 0 deletions src/Solr/Solarium/Adapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
use function file_exists;
use function filesize;
use function filter_var;
use function implode;
use function in_array;
use function intval;
use function is_array;
Expand Down Expand Up @@ -615,6 +616,19 @@ protected function applyParametersOnQuery(
$query->setSorts($sortings);
}

$isWeightedSearch = $parameters->getWeightedSearch();
if ($isWeightedSearch === true) {
// get the edismax component
$edismax = $query->getEDisMax();

// NOTE: query is now an edismax query
$weightedFields = $parameters->getWeightedFields();
if (! empty($weightedFields)) {
$queryFields = $this->getQueryFieldsString($weightedFields);
$edismax->setQueryFields($queryFields);
extracts marked this conversation as resolved.
Show resolved Hide resolved
}
}

$facet = $parameters->getFacet();
if ($facet !== null) {
$facetSet = $query->getFacetSet();
Expand Down Expand Up @@ -880,4 +894,21 @@ public function setTimeout($timeout)
$this->client->setOptions($options, true);
}
}

/**
* Converts an array containing boost factors keyed by field into a query fields string that can be used
* as input for the Solr `qf` request parameter.
*
* @param int[] $weightedFields assigns boost factors to fields, e.g.: [ 'title' => 10, 'abstract' => 0.5 ]
* @return string query fields string, e.g.: "title^10 abstract^0.5"
*/
protected function getQueryFieldsString($weightedFields)
{
$queryFields = [];
foreach ($weightedFields as $field => $boostFactor) {
$queryFields[] = "$field^$boostFactor";
}

return implode(' ', $queryFields);
}
}
48 changes: 48 additions & 0 deletions test/Solr/Solarium/AdapterSearchingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
namespace OpusTest\Search\Solr\Solarium;

use Exception;
use Opus\Common\Config;
use Opus\Common\Person;
use Opus\Search\Query;
use Opus\Search\QueryFactory;
Expand All @@ -43,6 +44,7 @@
use OpusTest\Search\TestAsset\DocumentBasedTestCase;

use function count;
use function round;

class AdapterSearchingTest extends DocumentBasedTestCase
{
Expand Down Expand Up @@ -248,4 +250,50 @@ public function testMapYearFacetIndexFieldsToYearAsset()

$this->assertEquals(1, $result->getAllMatchesCount());
}

public function testSearchWithQueryParserEDisMax()
extracts marked this conversation as resolved.
Show resolved Hide resolved
{
// assign boost factors to fields
$this->adjustConfiguration([
'search' => [
'simple' => [
'title' => 10,
'abstract' => 0.5,
],
],
]);

$config = Config::get();
$weightedFields = $config->search->simple->toArray();

$docA = $this->createDocument('testdocA'); // phrase 'test document' only occurs in abstract
$docB = $this->createDocument('testdocB'); // phrase 'test document' occurs in title & abstract

$index = Service::selectIndexingService(null, 'solr');
$index->addDocumentsToIndex([$docA, $docB]);

$search = Service::selectSearchingService(null, 'solr');

$query = new Query();

// use the Solr eDisMax query parser (i.e., add Solr request param `defType=edismax`)
$query->setWeightedSearch(true);

// add Solr request param `qf=...` which assigns boost factors to fields
$query->setWeightedFields($weightedFields);

$filter = $search->createFilter();
$filter->createSimpleEqualityFilter('*')->addValue('test document');
$query->setFilter($filter);

$result = $search->customSearch($query);
$matches = $result->getReturnedMatches();

$this->assertEquals(2, count($matches));

$firstResultScore = $matches[0]->getScore();
$secondResultScore = $matches[1]->getScore();

$this->assertTrue(round($firstResultScore, 1) > round($secondResultScore, 1));
}
}
20 changes: 20 additions & 0 deletions test/TestAsset/DocumentBasedTestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,26 @@ class DocumentBasedTestCase extends TestCase
'BelongsToBibliography' => 1,
'EmbargoDate' => '2010-01-04',
],
'testdocA' => [
extracts marked this conversation as resolved.
Show resolved Hide resolved
'TitleMain' => [
'Value' => 'Some Document',
'Language' => 'eng',
],
'TitleAbstract' => [
'Value' => 'Abstract of test document A.\nSome more text.',
'Language' => 'eng',
],
],
'testdocB' => [
'TitleMain' => [
'Value' => 'Another Test Document',
'Language' => 'eng',
],
'TitleAbstract' => [
'Value' => 'Abstract of test document B.\nSome blah blah text.',
'Language' => 'eng',
],
],
];

/**
Expand Down