Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Surfacing mcf required filters in the backend #327

Merged
merged 25 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions app/api/api_v1/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ def _search_request(db: Session, search_body: SearchRequestBody) -> SearchRespon
search_body.documents_only = True
search_body.exact_match = False
try:
data_access_search_params = create_vespa_search_params(db, search_body)
data_access_search_response = _VESPA_CONNECTION.search(
parameters=data_access_search_params
cpr_sdk_search_params = create_vespa_search_params(db, search_body)
cpr_sdk_search_response = _VESPA_CONNECTION.search(
parameters=cpr_sdk_search_params
)
except QueryError as e:
raise HTTPException(
Expand All @@ -67,7 +67,7 @@ def _search_request(db: Session, search_body: SearchRequestBody) -> SearchRespon
)
return process_vespa_search_response(
db,
data_access_search_response,
cpr_sdk_search_response,
limit=search_body.page_size,
offset=search_body.offset,
).increment_pages()
Expand Down
5 changes: 5 additions & 0 deletions app/api/api_v1/schemas/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@ class SearchResponseFamily(BaseModel):
The geographical location of the family in ISO 3166-1 alpha-3
"""

family_geographies: List[str]
"""
The geographical locations of the family in ISO 3166-1 alpha-3
jesse-c marked this conversation as resolved.
Show resolved Hide resolved
"""

family_metadata: dict
"""
An object if metadata for the family, the schema will change given the family_source
Expand Down
1 change: 1 addition & 0 deletions app/core/browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def to_search_response_family(
family_last_updated_date=family_last_updated_date,
family_source=cast(str, organisation.name),
family_geography=geography_value,
family_geographies=[row.value for row in family.geographies],
family_title_match=False,
family_description_match=False,
# ↓ Stuff we don't currently use for browse ↓
Expand Down
11 changes: 7 additions & 4 deletions app/core/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,9 @@ def _convert_filter_field(filter_field: str) -> Optional[str]:
if filter_field == FilterField.CATEGORY:
return filter_fields["category"]
if filter_field == FilterField.COUNTRY:
return filter_fields["geography"]
return filter_fields["geographies"]
if filter_field == FilterField.REGION:
return filter_fields["geography"]
return filter_fields["geographies"]
if filter_field == FilterField.LANGUAGE:
return filter_fields["language"]
if filter_field == FilterField.SOURCE:
Expand Down Expand Up @@ -310,7 +310,7 @@ def _convert_filters(
new_keyword_filters[new_field] = new_values

# Regions and countries filters should only include the overlap
geo_field = filter_fields["geography"]
geo_field = filter_fields["geographies"]
if regions and countries:
values = list(set(countries).intersection(regions))
if values:
Expand All @@ -326,6 +326,7 @@ def _convert_filters(
return None


# TODO: Add a test for this function
def _process_vespa_search_response_families(
db: Session,
vespa_families: Sequence[CprSdkResponseFamily],
Expand All @@ -341,6 +342,7 @@ def _process_vespa_search_response_families(
vespa_families_to_process = vespa_families[offset : limit + offset]
all_response_family_ids = [vf.id for vf in vespa_families_to_process]

# TODO: Potential disparity between what's in postgres and vespa
family_and_family_metadata: Sequence[tuple[Family, FamilyMetadata]] = (
db.query(Family, FamilyMetadata)
.filter(Family.import_id.in_(all_response_family_ids))
Expand Down Expand Up @@ -391,6 +393,7 @@ def _process_vespa_search_response_families(
or hit.family_category is None
or hit.family_source is None
or hit.family_geography is None
THOR300 marked this conversation as resolved.
Show resolved Hide resolved
or hit.family_geographies is None
):
_LOGGER.error(
"Skipping hit with empty required family info for import "
Expand Down Expand Up @@ -424,6 +427,7 @@ def _process_vespa_search_response_families(
prev_continuation_token=vespa_family.prev_continuation_token,
family_documents=[],
family_geography=hit.family_geography,
family_geographies=hit.family_geographies,
family_metadata=cast(dict, db_family_metadata.value),
)
response_family_lookup[family_import_id] = response_family
Expand Down Expand Up @@ -478,7 +482,6 @@ def _process_vespa_search_response_families(

response_families.append(response_family)
response_family = None

return response_families


Expand Down
24 changes: 12 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "navigator_backend"
version = "1.14.19"
version = "1.15.0"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand All @@ -10,7 +10,7 @@ python = "^3.10"
Authlib = "^0.15.5"
bcrypt = "^3.2.0"
boto3 = "^1.26"
cpr_sdk = { version = "1.3.11", extras = ["vespa"] }
cpr_sdk = { version = "1.4.2", extras = ["vespa"] }
fastapi = "^0.104.1"
fastapi-health = "^0.4.0"
fastapi-pagination = { extras = ["sqlalchemy"], version = "^0.12.19" }
Expand Down
28 changes: 26 additions & 2 deletions tests/search/setup_search_tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import random
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from typing import Iterable, Mapping, Optional, Sequence
Expand Down Expand Up @@ -63,7 +64,9 @@ def _fixture_docs() -> Iterable[tuple[VespaFixture, VespaFixture]]:
yield doc, family


def _populate_db_families(db: Session, max_docs: int = VESPA_FIXTURE_COUNT) -> None:
def _populate_db_families(
db: Session, max_docs: int = VESPA_FIXTURE_COUNT, deterministic_metadata=False
) -> None:
"""
Sets up the database using fixtures

Expand All @@ -75,7 +78,10 @@ def _populate_db_families(db: Session, max_docs: int = VESPA_FIXTURE_COUNT) -> N
if doc["fields"]["family_document_ref"] not in seen_family_ids:
_create_family(db, family)
_create_family_event(db, family)
_create_family_metadata(db, family)
if not deterministic_metadata:
THOR300 marked this conversation as resolved.
Show resolved Hide resolved
_create_family_metadata(db, family)
else:
_create_family_metadata_deterministic(db, family)
seen_family_ids.append(doc["fields"]["family_document_ref"])
_create_document(db, doc, family)
if count == max_docs:
Expand Down Expand Up @@ -182,6 +188,24 @@ def _create_family_metadata(db: Session, family: VespaFixture):
db.commit()


def _create_family_metadata_deterministic(db: Session, family: VespaFixture):
metadata_values = defaultdict(list)
family_metadata = family["fields"]["metadata"]
if not family_metadata:
return
for metadata in family_metadata:
name = metadata["name"] # type: ignore
value = metadata["value"] # type: ignore
THOR300 marked this conversation as resolved.
Show resolved Hide resolved
metadata_values[name].append(value)

family_metadata = FamilyMetadata(
family_import_id=family["fields"]["family_import_id"],
value=metadata_values,
)
db.add(family_metadata)
db.commit()


def _create_document(
db: Session,
doc: VespaFixture,
Expand Down
Loading
Loading