Skip to content

Commit

Permalink
Feature/pdct 1512 remove family geography from the searchresponse (#378)
Browse files Browse the repository at this point in the history
* Update test to use family_geographies

* Remove family_geography & sort imports

* Update family_geography to family_geographies

* Remove family_geography

* Make download this search support multiple geographies

* Create vespa test families with multiple geos

* Use multiple geographies instead of singular

* Use multiple geographies in download

* Remove family_geography

* Bump to 1.17.9

* Rename CSV column to Geographies

* Remove geography filter test as famil_geo not in search response

* Improve testing for download spreadsheet cols
  • Loading branch information
katybaulch authored Oct 14, 2024
1 parent 055317a commit f9b9cee
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 81 deletions.
5 changes: 0 additions & 5 deletions app/api/api_v1/schemas/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,6 @@ class SearchResponseFamily(BaseModel):
The name given to the type of corpus the family belongs to. E.G. 'Laws and Policies'
"""

family_geography: str
"""
The geographical location of the family in ISO 3166-1 alpha-3
"""

family_geographies: List[str]
"""
The geographical locations of the family in ISO 3166-1 alpha-3
Expand Down
1 change: 0 additions & 1 deletion app/core/browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def to_search_response_family(
family_source=cast(str, organisation.name),
corpus_import_id=cast(str, corpus.import_id),
corpus_type_name=cast(str, corpus.corpus_type_name),
family_geography=geography_value,
family_geographies=[row.value for row in family.geographies],
family_title_match=False,
family_description_match=False,
Expand Down
15 changes: 10 additions & 5 deletions app/core/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"Family Summary",
"Family URL",
"Family Publication Date",
"Geography",
"Geographies",
"Document Title",
"Document URL",
"Document Content URL",
Expand Down Expand Up @@ -157,6 +157,12 @@ def process_result_into_csv(
metadata_keys[family_source] = list(
[key.title() for key in family_metadata.keys()]
)

family_geos = ";".join(
[cast(str, geo) for geo in family.family_geographies]
if family is not None
else []
)
metadata: dict[str, str] = defaultdict(str)
for k in family_metadata:
metadata[k.title()] = ";".join(family_metadata.get(k, []))
Expand Down Expand Up @@ -210,6 +216,7 @@ def process_result_into_csv(
if physical_document is not None
else []
)

row = {
"Collection Name": collection_name,
"Collection Summary": collection_summary,
Expand All @@ -222,7 +229,7 @@ def process_result_into_csv(
"Document Content URL": document_content,
"Document Type": doc_type_from_family_document_metadata(document),
"Document Content Matches Search Phrase": document_match,
"Geography": family.family_geography,
"Geographies": family_geos,
"Category": family.family_category,
"Languages": document_languages,
"Source": family_source,
Expand All @@ -243,7 +250,7 @@ def process_result_into_csv(
"Document Content URL": "",
"Document Type": "",
"Document Content Matches Search Phrase": "n/a",
"Geography": family.family_geography,
"Geographies": family_geos,
"Category": family.family_category,
"Languages": "",
"Source": family_source,
Expand Down Expand Up @@ -389,7 +396,6 @@ def _process_vespa_search_response_families(
or hit.family_name is None
or hit.family_category is None
or hit.family_source is None
or hit.family_geography is None
or hit.family_geographies is None
):
_LOGGER.error(
Expand Down Expand Up @@ -425,7 +431,6 @@ def _process_vespa_search_response_families(
continuation_token=vespa_family.continuation_token,
prev_continuation_token=vespa_family.prev_continuation_token,
family_documents=[],
family_geography=hit.family_geography,
family_geographies=hit.family_geographies,
family_metadata=cast(dict, db_family_metadata.value),
)
Expand Down
6 changes: 3 additions & 3 deletions docs/api/search.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ A list of family objects, each following the scheme below:
  "family_date": "string",
  "family_last_updated_date": "string",
  "family_source": "string",
  "family_geography": "string",
  "family_geographies": "string[]",
  "family_metadata": {},
  "family_title_match": true,
  "family_description_match": true,
Expand Down Expand Up @@ -197,9 +197,9 @@ event of this family of documents.

The source, currently organisation name. Either “CCLW” or “UNFCCC”

##### family_geography
##### family_geographies

The geographical location of the family in [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3)
The geographical location(s) of the family in [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3)

##### family_metadata

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "navigator_backend"
version = "1.17.8"
version = "1.17.9"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
1 change: 0 additions & 1 deletion tests/non_search/core/test_browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def test_browse_rds_families(data_db):
assert family.family_source == "CCLW"
assert family.corpus_import_id == "CCLW.corpus.i00000001.n0000"
assert family.corpus_type_name == "Laws and Policies"
assert family.family_geography == "South Asia"
assert family.family_geographies == ["South Asia"]
assert family.family_metadata == {}
assert family.total_passage_hits == 0
Expand Down
7 changes: 0 additions & 7 deletions tests/search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,6 @@ def test_create_vespa_search_params(
assert converted_keyword_filters is not None
assert produced_search_parameters.filters is not None
assert produced_search_parameters.filters == CprSdkFilters(
family_geography=(
converted_keyword_filters["family_geography"]
if "family_geography" in converted_keyword_filters.keys()
else []
),
family_category=(
converted_keyword_filters["family_category"]
if "family_category" in converted_keyword_filters.keys()
Expand Down Expand Up @@ -624,7 +619,6 @@ def _generate_search_response_hits(spec: FamSpec) -> Sequence[CprSdkHit]:
family_slug=slugify(spec.family_name),
family_category=spec.family_category,
family_publication_ts=datetime.fromisoformat(spec.family_ts),
family_geography=spec.family_geo,
family_geographies=spec.family_geos,
corpus_import_id=spec.corpus_import_id,
corpus_type_name=spec.corpus_type_name,
Expand Down Expand Up @@ -658,7 +652,6 @@ def _generate_search_response_hits(spec: FamSpec) -> Sequence[CprSdkHit]:
family_slug=slugify(spec.family_name),
family_category=spec.family_category,
family_publication_ts=datetime.fromisoformat(spec.family_ts),
family_geography=spec.family_geo,
family_geographies=spec.family_geos,
corpus_import_id=spec.corpus_import_id,
corpus_type_name=spec.corpus_type_name,
Expand Down
14 changes: 7 additions & 7 deletions tests/search/vespa/setup_search_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,6 @@ def _populate_db_families(
def _create_family(db: Session, family: VespaFixture):
family_import_id = family["fields"]["family_import_id"]

geo = family["fields"]["family_geography"]
geography = db.query(Geography).filter(Geography.value == geo).one()

family_object = Family(
title=family["fields"]["family_name"],
import_id=family_import_id,
Expand All @@ -129,11 +126,14 @@ def _create_family(db: Session, family: VespaFixture):
)
db.add(family_object)
db.commit()
db.add(
FamilyGeography(
family_import_id=family_object.import_id, geography_id=geography.id

for geo in family["fields"]["family_geographies"]:
geography = db.query(Geography).filter(Geography.value == geo).one()
db.add(
FamilyGeography(
family_import_id=family_object.import_id, geography_id=geography.id
)
)
)

family_slug = Slug(
name=family["fields"]["family_slug"],
Expand Down
53 changes: 4 additions & 49 deletions tests/search/vespa/test_range_and_keyword_filters_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,52 +13,6 @@
)


@pytest.mark.search
@patch(
"app.api.api_v1.routers.search.AppTokenFactory.verify_corpora_in_db",
return_value=True,
)
@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
def test_keyword_country_filters__geography(
mock_corpora_exist_in_db,
label,
query,
test_vespa,
data_client,
data_db,
monkeypatch,
valid_token,
):
monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
_populate_db_families(data_db)
base_params = {"query_string": query}

# Get all documents and iterate over their country codes to confirm that each are
# the specific one that is returned in the query (as they each have a unique
# country code)
all_body = _make_search_request(data_client, valid_token, params=base_params)
families = [f for f in all_body["families"]]
assert len(families) == VESPA_FIXTURE_COUNT

for family in families:
assert family["family_geography"] in family["family_geographies"]
country_code = family["family_geography"]

country_slug = get_country_slug_from_country_code(data_db, country_code)

params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}}
body_with_filters = _make_search_request(
data_client, valid_token, params=params
)
filtered_family_slugs = [
f["family_slug"] for f in body_with_filters["families"]
]
assert len(filtered_family_slugs) == 1
assert family["family_slug"] in filtered_family_slugs

assert mock_corpora_exist_in_db.assert_called


@pytest.mark.search
@patch(
"app.api.api_v1.routers.search.AppTokenFactory.verify_corpora_in_db",
Expand Down Expand Up @@ -87,7 +41,6 @@ def test_keyword_country_filters__geographies(
assert len(families) == VESPA_FIXTURE_COUNT

for family in families:
assert family["family_geography"] in family["family_geographies"]
for country_code in family["family_geographies"]:
country_slug = get_country_slug_from_country_code(data_db, country_code)

Expand Down Expand Up @@ -134,12 +87,14 @@ def test_keyword_region_filters(
assert len(families) == VESPA_FIXTURE_COUNT

for family in families:
country_code = family["family_geography"]
country_codes = family["family_geographies"]

# Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA)
if country_code == "Other":
if country_codes == ["Other"]:
return

country_code = country_codes[0]

parent_id = (
data_db.query(Geography)
.filter(Geography.value == country_code)
Expand Down
27 changes: 26 additions & 1 deletion tests/search/vespa/test_this_vespa_search_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@
SEARCH_ENDPOINT = "/api/v1/searches"
CSV_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-csv"

_CSV_SEARCH_RESPONSE_COLUMNS = [
"Collection Name",
"Collection Summary",
"Family Name",
"Family Summary",
"Family URL",
"Family Publication Date",
"Geographies",
"Document Title",
"Document URL",
"Document Content URL",
"Document Type",
"Document Content Matches Search Phrase",
"Category",
"Languages",
"Source",
]


def _make_download_request(
client,
Expand Down Expand Up @@ -74,11 +92,18 @@ def test_csv_content(

csv_content = csv.DictReader(StringIO(csv_response.text))
for row, family in zip(csv_content, families):
assert all(col in row.keys() for col in _CSV_SEARCH_RESPONSE_COLUMNS)

assert row["Family Name"] == family["family_name"]
assert row["Family Summary"] == family["family_description"]
assert row["Family Publication Date"] == family["family_date"]
assert row["Category"] == family["family_category"]
assert row["Geography"] == family["family_geography"]

assert isinstance(row["Geographies"], str)
if len(family["family_geographies"]) > 1:
assert (
row["Geographies"].count(";") == len(family["family_geographies"]) - 1
)

# TODO: Add collections to test db setup to provide document level coverage

Expand Down
1 change: 0 additions & 1 deletion tests/unit/app/schemas/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def test_search_response() -> None:
datetime.now()
), # You can replace this with an actual date string
family_source="Example Source",
family_geography="Example Geography",
family_geographies=["Example Geography"],
family_metadata={"key1": "value1", "key2": "value2"},
corpus_import_id="test.corpus.0.1",
Expand Down

0 comments on commit f9b9cee

Please sign in to comment.