Skip to content

Commit

Permalink
Fix keyword filter handling (#213)
Browse files Browse the repository at this point in the history
* Setup geographies properly so we can test filters

* Fix false positive test following geography setup

* fix linting errors

* Fix keyword filter handling

Previously a bug fix attempted to resolve the fact that postgres and
vespa use different titles for fields. However it has led to keyword_filters
being preprocessed more than once for vespa and not at all for browse.

* Fix failing test that now needs geography

* Increase expected latency

Following additional browse preprocessing that brings this up a bit
  • Loading branch information
olaughter authored Jan 23, 2024
1 parent db7ac85 commit aa38cac
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 18 deletions.
15 changes: 6 additions & 9 deletions app/api/api_v1/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"""
import json
import logging
import os
from datetime import datetime
from io import BytesIO
from typing import Mapping, Sequence
Expand All @@ -30,9 +29,7 @@
VESPA_URL,
)
from app.core.download import (
convert_dump_to_csv,
generate_data_dump_as_csv,
get_whole_database_dump,
)
from app.core.lookups import get_countries_for_region, get_country_by_slug
from app.core.search import (
Expand Down Expand Up @@ -68,14 +65,15 @@
def _search_request(
db: Session, search_body: SearchRequestBody, use_vespa: bool = True
) -> SearchResponse:
if search_body.keyword_filters is not None and use_vespa is False:
search_body.keyword_filters = process_search_keyword_filters(
db,
search_body.keyword_filters,
)
is_browse_request = not search_body.query_string
if is_browse_request:
# Service browse requests from RDS
if search_body.keyword_filters is not None:
search_body.keyword_filters = process_search_keyword_filters(
db,
search_body.keyword_filters,
)

return browse_rds_families(
db=db,
req=_get_browse_args_from_search_request_body(search_body),
Expand Down Expand Up @@ -259,7 +257,6 @@ def process_search_keyword_filters(
request_filters: Mapping[FilterField, Sequence[str]],
) -> Mapping[FilterField, Sequence[str]]:
filter_map = {}

for field, values in request_filters.items():
if field == FilterField.REGION:
field = FilterField.COUNTRY
Expand Down
6 changes: 5 additions & 1 deletion tests/routes/setup_search_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from app.db.models.law_policy.metadata import (
FamilyMetadata,
)
from app.db.models.law_policy import Geography
from app.db.models.document.physical_document import (
LanguageSource,
PhysicalDocument,
Expand Down Expand Up @@ -106,11 +107,14 @@ def _create_family(db: Session, family: VespaFixture):
family_id = _parse_id(family)
family_import_id = family["fields"]["family_import_id"]

geo = family["fields"]["family_geography"]
geography = db.query(Geography).filter(Geography.value == geo).one()

family_object = Family(
title=family["fields"]["family_name"],
import_id=family_import_id,
description=family["fields"]["family_description"],
geography_id=1,
geography_id=geography.id,
family_category=FamilyCategory(family["fields"]["family_category"]),
)
db.add(family_object)
Expand Down
14 changes: 6 additions & 8 deletions tests/routes/test_vespasearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def test_no_doc_if_in_postgres_but_not_vespa(test_vespa, client, test_db, monkey
"document_languages": ["French"],
"document_import_id": "CCLW.executive.111.222",
"family_description": "",
"family_geography": "CAN",
"family_publication_ts": "2011-08-01T00:00:00+00:00",
"family_import_id": "CCLW.family.111.0",
},
Expand Down Expand Up @@ -171,7 +172,7 @@ def test_benchmark_families_search(
_populate_db_families(test_db)

# This is high as it's meant as a last resort for catching new perfomance problems
REASONABLE_LATENCY_MS = 25
REASONABLE_LATENCY_MS = 50

times = []
for _ in range(1, 10):
Expand Down Expand Up @@ -279,27 +280,24 @@ def test_keyword_country_filters(
_populate_db_families(test_db)
base_params = {"query_string": query}

# Get all documents and iterate over there country codes
# to confirm they are returned when filtered on
# Get all documents and iterate over their country codes to confirm that each are
# the specific one that is returned in the query (as they each have a unique
# country code)
all_body = _make_search_request(client, params=base_params)
families = [f for f in all_body["families"]]
assert len(families) >= 4

for family in families:
country_code = family["family_geography"]

# Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA)
if country_code == "Other":
return

country_slug = get_country_slug_from_country_code(test_db, country_code)

params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}}
body_with_filters = _make_search_request(client, params=params)
filtered_family_slugs = [
f["family_slug"] for f in body_with_filters["families"]
]

assert len(filtered_family_slugs) == 1
assert family["family_slug"] in filtered_family_slugs


Expand Down

0 comments on commit aa38cac

Please sign in to comment.