Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pdct 1512 remove family geography from the searchresponse #378

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions app/api/api_v1/schemas/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,6 @@ class SearchResponseFamily(BaseModel):
The name given to the type of corpus the family belongs to. E.G. 'Laws and Policies'
"""

family_geography: str
"""
The geographical location of the family in ISO 3166-1 alpha-3
"""

family_geographies: List[str]
"""
The geographical locations of the family in ISO 3166-1 alpha-3
Expand Down
1 change: 0 additions & 1 deletion app/core/browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def to_search_response_family(
family_source=cast(str, organisation.name),
corpus_import_id=cast(str, corpus.import_id),
corpus_type_name=cast(str, corpus.corpus_type_name),
family_geography=geography_value,
family_geographies=[row.value for row in family.geographies],
family_title_match=False,
family_description_match=False,
Expand Down
15 changes: 10 additions & 5 deletions app/core/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"Family Summary",
"Family URL",
"Family Publication Date",
"Geography",
"Geographies",
"Document Title",
"Document URL",
"Document Content URL",
Expand Down Expand Up @@ -157,6 +157,12 @@ def process_result_into_csv(
metadata_keys[family_source] = list(
[key.title() for key in family_metadata.keys()]
)

family_geos = ";".join(
[cast(str, geo) for geo in family.family_geographies]
if family is not None
else []
)
metadata: dict[str, str] = defaultdict(str)
for k in family_metadata:
metadata[k.title()] = ";".join(family_metadata.get(k, []))
Expand Down Expand Up @@ -210,6 +216,7 @@ def process_result_into_csv(
if physical_document is not None
else []
)

row = {
"Collection Name": collection_name,
"Collection Summary": collection_summary,
Expand All @@ -222,7 +229,7 @@ def process_result_into_csv(
"Document Content URL": document_content,
"Document Type": doc_type_from_family_document_metadata(document),
"Document Content Matches Search Phrase": document_match,
"Geography": family.family_geography,
"Geographies": family_geos,
"Category": family.family_category,
"Languages": document_languages,
"Source": family_source,
Expand All @@ -243,7 +250,7 @@ def process_result_into_csv(
"Document Content URL": "",
"Document Type": "",
"Document Content Matches Search Phrase": "n/a",
"Geography": family.family_geography,
"Geographies": family_geos,
"Category": family.family_category,
"Languages": "",
"Source": family_source,
Expand Down Expand Up @@ -389,7 +396,6 @@ def _process_vespa_search_response_families(
or hit.family_name is None
or hit.family_category is None
or hit.family_source is None
or hit.family_geography is None
or hit.family_geographies is None
):
_LOGGER.error(
Expand Down Expand Up @@ -425,7 +431,6 @@ def _process_vespa_search_response_families(
continuation_token=vespa_family.continuation_token,
prev_continuation_token=vespa_family.prev_continuation_token,
family_documents=[],
family_geography=hit.family_geography,
family_geographies=hit.family_geographies,
family_metadata=cast(dict, db_family_metadata.value),
)
Expand Down
6 changes: 3 additions & 3 deletions docs/api/search.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ A list of family objects, each following the scheme below:
  "family_date": "string",
  "family_last_updated_date": "string",
  "family_source": "string",
  "family_geography": "string",
  "family_geographies": "string[]",
  "family_metadata": {},
  "family_title_match": true,
  "family_description_match": true,
Expand Down Expand Up @@ -197,9 +197,9 @@ event of this family of documents.

The source, currently organisation name. Either “CCLW” or “UNFCCC”

##### family_geography
##### family_geographies

The geographical location of the family in [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3)
The geographical location(s) of the family in [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3)

##### family_metadata

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "navigator_backend"
version = "1.17.8"
version = "1.17.9"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
1 change: 0 additions & 1 deletion tests/non_search/core/test_browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def test_browse_rds_families(data_db):
assert family.family_source == "CCLW"
assert family.corpus_import_id == "CCLW.corpus.i00000001.n0000"
assert family.corpus_type_name == "Laws and Policies"
assert family.family_geography == "South Asia"
assert family.family_geographies == ["South Asia"]
assert family.family_metadata == {}
assert family.total_passage_hits == 0
Expand Down
7 changes: 0 additions & 7 deletions tests/search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,6 @@ def test_create_vespa_search_params(
assert converted_keyword_filters is not None
assert produced_search_parameters.filters is not None
assert produced_search_parameters.filters == CprSdkFilters(
family_geography=(
converted_keyword_filters["family_geography"]
if "family_geography" in converted_keyword_filters.keys()
else []
),
family_category=(
converted_keyword_filters["family_category"]
if "family_category" in converted_keyword_filters.keys()
Expand Down Expand Up @@ -624,7 +619,6 @@ def _generate_search_response_hits(spec: FamSpec) -> Sequence[CprSdkHit]:
family_slug=slugify(spec.family_name),
family_category=spec.family_category,
family_publication_ts=datetime.fromisoformat(spec.family_ts),
family_geography=spec.family_geo,
family_geographies=spec.family_geos,
corpus_import_id=spec.corpus_import_id,
corpus_type_name=spec.corpus_type_name,
Expand Down Expand Up @@ -658,7 +652,6 @@ def _generate_search_response_hits(spec: FamSpec) -> Sequence[CprSdkHit]:
family_slug=slugify(spec.family_name),
family_category=spec.family_category,
family_publication_ts=datetime.fromisoformat(spec.family_ts),
family_geography=spec.family_geo,
family_geographies=spec.family_geos,
corpus_import_id=spec.corpus_import_id,
corpus_type_name=spec.corpus_type_name,
Expand Down
14 changes: 7 additions & 7 deletions tests/search/vespa/setup_search_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,6 @@ def _populate_db_families(
def _create_family(db: Session, family: VespaFixture):
family_import_id = family["fields"]["family_import_id"]

geo = family["fields"]["family_geography"]
geography = db.query(Geography).filter(Geography.value == geo).one()

family_object = Family(
title=family["fields"]["family_name"],
import_id=family_import_id,
Expand All @@ -129,11 +126,14 @@ def _create_family(db: Session, family: VespaFixture):
)
db.add(family_object)
db.commit()
db.add(
FamilyGeography(
family_import_id=family_object.import_id, geography_id=geography.id

for geo in family["fields"]["family_geographies"]:
geography = db.query(Geography).filter(Geography.value == geo).one()
db.add(
FamilyGeography(
family_import_id=family_object.import_id, geography_id=geography.id
)
)
)

family_slug = Slug(
name=family["fields"]["family_slug"],
Expand Down
53 changes: 4 additions & 49 deletions tests/search/vespa/test_range_and_keyword_filters_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,52 +13,6 @@
)


@pytest.mark.search
@patch(
"app.api.api_v1.routers.search.AppTokenFactory.verify_corpora_in_db",
return_value=True,
)
@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
def test_keyword_country_filters__geography(
mock_corpora_exist_in_db,
label,
query,
test_vespa,
data_client,
data_db,
monkeypatch,
valid_token,
):
monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
_populate_db_families(data_db)
base_params = {"query_string": query}

# Get all documents and iterate over their country codes to confirm that each are
# the specific one that is returned in the query (as they each have a unique
# country code)
all_body = _make_search_request(data_client, valid_token, params=base_params)
families = [f for f in all_body["families"]]
assert len(families) == VESPA_FIXTURE_COUNT

for family in families:
assert family["family_geography"] in family["family_geographies"]
country_code = family["family_geography"]

country_slug = get_country_slug_from_country_code(data_db, country_code)

params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}}
body_with_filters = _make_search_request(
data_client, valid_token, params=params
)
filtered_family_slugs = [
f["family_slug"] for f in body_with_filters["families"]
]
assert len(filtered_family_slugs) == 1
assert family["family_slug"] in filtered_family_slugs

assert mock_corpora_exist_in_db.assert_called


@pytest.mark.search
@patch(
"app.api.api_v1.routers.search.AppTokenFactory.verify_corpora_in_db",
Expand Down Expand Up @@ -87,7 +41,6 @@ def test_keyword_country_filters__geographies(
assert len(families) == VESPA_FIXTURE_COUNT

for family in families:
assert family["family_geography"] in family["family_geographies"]
for country_code in family["family_geographies"]:
country_slug = get_country_slug_from_country_code(data_db, country_code)

Expand Down Expand Up @@ -134,12 +87,14 @@ def test_keyword_region_filters(
assert len(families) == VESPA_FIXTURE_COUNT

for family in families:
country_code = family["family_geography"]
country_codes = family["family_geographies"]

# Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA)
if country_code == "Other":
if country_codes == ["Other"]:
return

country_code = country_codes[0]

parent_id = (
data_db.query(Geography)
.filter(Geography.value == country_code)
Expand Down
27 changes: 26 additions & 1 deletion tests/search/vespa/test_this_vespa_search_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@
SEARCH_ENDPOINT = "/api/v1/searches"
CSV_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-csv"

_CSV_SEARCH_RESPONSE_COLUMNS = [
"Collection Name",
"Collection Summary",
"Family Name",
"Family Summary",
"Family URL",
"Family Publication Date",
"Geographies",
"Document Title",
"Document URL",
"Document Content URL",
"Document Type",
"Document Content Matches Search Phrase",
"Category",
"Languages",
"Source",
]


def _make_download_request(
client,
Expand Down Expand Up @@ -74,11 +92,18 @@ def test_csv_content(

csv_content = csv.DictReader(StringIO(csv_response.text))
for row, family in zip(csv_content, families):
assert all(col in row.keys() for col in _CSV_SEARCH_RESPONSE_COLUMNS)

assert row["Family Name"] == family["family_name"]
assert row["Family Summary"] == family["family_description"]
assert row["Family Publication Date"] == family["family_date"]
assert row["Category"] == family["family_category"]
assert row["Geography"] == family["family_geography"]

assert isinstance(row["Geographies"], str)
if len(family["family_geographies"]) > 1:
assert (
row["Geographies"].count(";") == len(family["family_geographies"]) - 1
)

# TODO: Add collections to test db setup to provide document level coverage

Expand Down
1 change: 0 additions & 1 deletion tests/unit/app/schemas/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def test_search_response() -> None:
datetime.now()
), # You can replace this with an actual date string
family_source="Example Source",
family_geography="Example Geography",
family_geographies=["Example Geography"],
family_metadata={"key1": "value1", "key2": "value2"},
corpus_import_id="test.corpus.0.1",
Expand Down
Loading