Skip to content

Commit

Permalink
Fix pagination bug (#207)
Browse files Browse the repository at this point in the history
* Fix pagination bug

Because of the way the backend processes the limit and offset values we
ended up only ever getting ten results (aka the value of limit), this
this turns out to be because we limit in the vespa query and then limit
again in the result. Switching instead to have a fixed vespa limit resolves
this issue until we can move limits to the vespa query

* Fix failing tests
  • Loading branch information
olaughter authored Jan 11, 2024
1 parent 8dfc8e1 commit 9de373f
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 20 deletions.
2 changes: 1 addition & 1 deletion app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
OPENSEARCH_JIT_MAX_DOC_COUNT: int = int(os.getenv("OPENSEARCH_JIT_MAX_DOC_COUNT", "20"))

# Vespa Config
VESPA_SEARCH_LIMIT: int = int(os.getenv("VESPA_SEARCH_LIMIT", "500"))
VESPA_SEARCH_LIMIT: int = int(os.getenv("VESPA_SEARCH_LIMIT", "100"))
VESPA_SEARCH_MATCHES_PER_DOC: int = int(
os.getenv("VESPA_SEARCH_MAX_MATCHES_PER_DOC", "100")
)
Expand Down
3 changes: 1 addition & 2 deletions app/core/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,15 +1251,14 @@ def process_vespa_search_response(

def create_vespa_search_params(db: Session, search_body: SearchRequestBody):
"""Create Vespa search parameters from a F/E search request body"""
search_body.limit = min(search_body.limit, VESPA_SEARCH_LIMIT)
search_body.max_passages_per_doc = min(
search_body.max_passages_per_doc, VESPA_SEARCH_MATCHES_PER_DOC
)

return DataAccessSearchParams(
query_string=search_body.query_string,
exact_match=search_body.exact_match,
limit=search_body.limit,
limit=VESPA_SEARCH_LIMIT,
max_hits_per_family=search_body.max_passages_per_doc,
keyword_filters=_convert_filters(db, search_body.keyword_filters),
year_range=search_body.year_range,
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def test_create_vespa_search_params(
)

# Test constant values
assert produced_search_parameters.limit == min(limit, VESPA_SEARCH_LIMIT)
assert produced_search_parameters.limit == VESPA_SEARCH_LIMIT
assert produced_search_parameters.max_hits_per_family == min(
max_passages, VESPA_SEARCH_MATCHES_PER_DOC
)
Expand Down
54 changes: 38 additions & 16 deletions tests/routes/test_vespasearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,41 @@ def test_simple_pagination_families(test_vespa, client, test_db, monkeypatch):
monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
_populate_db_families(test_db)

doc_slugs = []
for offset in range(3):
params = {
"query_string": "and",
"limit": 1,
"offset": offset,
}
body = _make_search_request(client, params)
FIXTURE_COUNT = 4
LIMIT = 2

for f in body["families"]:
for d in f["family_documents"]:
doc_slugs.append(d["document_slug"])
# Query one
params = {
"query_string": "and",
"limit": LIMIT,
"offset": 0,
}
body_one = _make_search_request(client, params)
assert body_one["hits"] == FIXTURE_COUNT
assert len(body_one["families"]) == LIMIT
assert (
body_one["families"][0]["family_slug"]
== "agriculture-sector-plan-2015-2019_7999"
)
assert (
body_one["families"][1]["family_slug"]
== "national-environment-policy-of-guinea_f0df"
)

assert len(set(doc_slugs)) == len(doc_slugs)
# Query two
params = {
"query_string": "and",
"limit": LIMIT,
"offset": 2,
}
body_two = _make_search_request(client, params)
assert body_two["hits"] == FIXTURE_COUNT
assert len(body_two["families"]) == LIMIT
assert (
body_two["families"][0]["family_slug"]
== "submission-to-the-unfccc-ahead-of-the-first-technical-dialogue-of-the-global-stocktake-formally-submitted-by-observer-organization-climateworks-foundation-on-behalf-of-the-igst-consortium_e760"
)
assert body_two["families"][1]["family_slug"] == "national-energy-strategy_980b"


@pytest.mark.search
Expand Down Expand Up @@ -117,15 +138,14 @@ def test_benchmark_families_search(


@pytest.mark.search
@pytest.mark.parametrize("exact_match", [True, False])
def test_specific_doc_returned(exact_match, test_vespa, monkeypatch, client, test_db):
def test_specific_doc_returned(test_vespa, monkeypatch, client, test_db):
monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
_populate_db_families(test_db)

family_name_query = "Agriculture Sector Plan 2015-2019"
params = {
"query_string": family_name_query,
"exact_match": exact_match,
"exact_match": True,
"limit": 1,
}
body = _make_search_request(client, params)
Expand Down Expand Up @@ -167,7 +187,9 @@ def test_search_params_contract(
},
)

query_spy.assert_called_once_with(parameters=params)
expected_params = params
expected_params.limit = 150
query_spy.assert_called_once_with(parameters=expected_params)


@pytest.mark.search
Expand Down

0 comments on commit 9de373f

Please sign in to comment.