Skip to content

Commit

Permalink
Allow setting max_hits and limit for vespa (#200)
Browse files Browse the repository at this point in the history
These values where previously hardcoded, so despite this being supported
by the data access library interface with vespa, and the backend api
accepting parameters for these, they would nevertheless always get set
to the same values.

This passes those api parameters through to the data access library, and
uses the previous set values as actual upper limits —— which we still
need because of limiting excessive compute and avoiding timeouts.
  • Loading branch information
olaughter authored Dec 13, 2023
1 parent 4681a75 commit a6a3374
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 13 deletions.
4 changes: 2 additions & 2 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@
OPENSEARCH_JIT_MAX_DOC_COUNT: int = int(os.getenv("OPENSEARCH_JIT_MAX_DOC_COUNT", "20"))

# Vespa Config
VESPA_SEARCH_LIMIT: int = int(os.getenv("VESPA_SEARCH_LIMIT", "150"))
VESPA_SEARCH_LIMIT: int = int(os.getenv("VESPA_SEARCH_LIMIT", "500"))
VESPA_SEARCH_MATCHES_PER_DOC: int = int(
os.getenv("VESPA_SEARCH_MAX_MATCHES_PER_DOC", "20")
os.getenv("VESPA_SEARCH_MAX_MATCHES_PER_DOC", "100")
)
VESPA_SECRETS_LOCATION: str = os.getenv("VESPA_SECRETS_LOCATION", "/secrets")
VESPA_URL: str = os.getenv("VESPA_URL", "")
Expand Down
9 changes: 7 additions & 2 deletions app/core/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,11 +1251,16 @@ def process_vespa_search_response(

def create_vespa_search_params(db: Session, search_body: SearchRequestBody):
"""Create Vespa search parameters from a F/E search request body"""
search_body.limit = min(search_body.limit, VESPA_SEARCH_LIMIT)
search_body.max_passages_per_doc = min(
search_body.max_passages_per_doc, VESPA_SEARCH_MATCHES_PER_DOC
)

return DataAccessSearchParams(
query_string=search_body.query_string,
exact_match=search_body.exact_match,
limit=VESPA_SEARCH_LIMIT,
max_hits_per_family=VESPA_SEARCH_MATCHES_PER_DOC,
limit=search_body.limit,
max_hits_per_family=search_body.max_passages_per_doc,
keyword_filters=_convert_filters(db, search_body.keyword_filters),
year_range=search_body.year_range,
sort_by=_convert_sort_field(search_body.sort_field),
Expand Down
58 changes: 49 additions & 9 deletions tests/core/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ def db_setup(test_db):
@pytest.mark.parametrize(
(
"query_string,exact_match,year_range,sort_field,sort_order,"
"keyword_filters,limit,offset,continuation_token"
"keyword_filters,max_passages,limit,offset,continuation_token"
),
[
("hello", True, None, None, SortOrder.ASCENDING, None, 10, 10, None),
("hello", True, None, None, SortOrder.ASCENDING, None, 10, 10, 10, None),
(
"world",
True,
Expand All @@ -53,6 +53,7 @@ def db_setup(test_db):
SortOrder.DESCENDING,
{FilterField.CATEGORY: ["Legislative"], FilterField.REGION: ["europe"]},
10,
10,
0,
"ABC",
),
Expand All @@ -63,6 +64,7 @@ def db_setup(test_db):
SortField.DATE,
SortOrder.ASCENDING,
{FilterField.SOURCE: ["UNFCCC"]},
20,
10,
0,
None,
Expand All @@ -77,18 +79,31 @@ def db_setup(test_db):
FilterField.COUNTRY: ["germany", "France"],
FilterField.REGION: ["europe"],
},
20,
10,
0,
"ABC",
),
("hello", True, None, SortField.TITLE, SortOrder.ASCENDING, None, 10, 0, None),
(
"hello",
True,
None,
SortField.TITLE,
SortOrder.ASCENDING,
None,
10,
10,
0,
None,
),
(
"world",
True,
(1940, 1960),
SortField.DATE,
SortOrder.DESCENDING,
None,
50,
100,
10,
"ABC",
Expand All @@ -100,6 +115,7 @@ def db_setup(test_db):
None,
SortOrder.ASCENDING,
{FilterField.LANGUAGE: ["english"]},
1000,
10,
0,
None,
Expand All @@ -111,12 +127,35 @@ def db_setup(test_db):
SortField.TITLE,
SortOrder.DESCENDING,
None,
100,
10,
0,
"ABC",
),
(
"hello",
True,
None,
SortField.DATE,
SortOrder.ASCENDING,
None,
10,
15,
5,
None,
),
(
"world",
True,
(1940, 1960),
None,
SortOrder.DESCENDING,
None,
10,
10,
0,
"ABC",
),
("hello", True, None, SortField.DATE, SortOrder.ASCENDING, None, 15, 5, None),
("world", True, (1940, 1960), None, SortOrder.DESCENDING, None, 10, 0, "ABC"),
],
)
def test_create_vespa_search_params(
Expand All @@ -127,6 +166,7 @@ def test_create_vespa_search_params(
sort_field,
sort_order,
keyword_filters,
max_passages,
limit,
offset,
continuation_token,
Expand All @@ -136,7 +176,7 @@ def test_create_vespa_search_params(
search_request_body = SearchRequestBody(
query_string=query_string,
exact_match=exact_match,
max_passages_per_doc=10,
max_passages_per_doc=max_passages,
keyword_filters=keyword_filters,
year_range=year_range,
sort_field=sort_field,
Expand All @@ -153,9 +193,9 @@ def test_create_vespa_search_params(
)

# Test constant values
assert produced_search_parameters.limit == VESPA_SEARCH_LIMIT
assert (
produced_search_parameters.max_hits_per_family == VESPA_SEARCH_MATCHES_PER_DOC
assert produced_search_parameters.limit == min(limit, VESPA_SEARCH_LIMIT)
assert produced_search_parameters.max_hits_per_family == min(
max_passages, VESPA_SEARCH_MATCHES_PER_DOC
)

# Test simple passthrough data first
Expand Down

0 comments on commit a6a3374

Please sign in to comment.