diff --git a/app/api/api_v1/schemas/search.py b/app/api/api_v1/schemas/search.py index d295f84b..d71d8cf7 100644 --- a/app/api/api_v1/schemas/search.py +++ b/app/api/api_v1/schemas/search.py @@ -256,11 +256,6 @@ class SearchResponseFamily(BaseModel): The name given to the type of corpus the family belongs to. E.G. 'Laws and Policies' """ - family_geography: str - """ - The geographical location of the family in ISO 3166-1 alpha-3 - """ - family_geographies: List[str] """ The geographical locations of the family in ISO 3166-1 alpha-3 diff --git a/app/core/browse.py b/app/core/browse.py index 6496978f..a6652a9d 100644 --- a/app/core/browse.py +++ b/app/core/browse.py @@ -59,7 +59,6 @@ def to_search_response_family( family_source=cast(str, organisation.name), corpus_import_id=cast(str, corpus.import_id), corpus_type_name=cast(str, corpus.corpus_type_name), - family_geography=geography_value, family_geographies=[row.value for row in family.geographies], family_title_match=False, family_description_match=False, diff --git a/app/core/search.py b/app/core/search.py index 1c0c7687..afd343f3 100644 --- a/app/core/search.py +++ b/app/core/search.py @@ -54,7 +54,7 @@ "Family Summary", "Family URL", "Family Publication Date", - "Geography", + "Geographies", "Document Title", "Document URL", "Document Content URL", @@ -157,6 +157,12 @@ def process_result_into_csv( metadata_keys[family_source] = list( [key.title() for key in family_metadata.keys()] ) + + family_geos = ";".join( + [cast(str, geo) for geo in family.family_geographies] + if family is not None + else [] + ) metadata: dict[str, str] = defaultdict(str) for k in family_metadata: metadata[k.title()] = ";".join(family_metadata.get(k, [])) @@ -210,6 +216,7 @@ def process_result_into_csv( if physical_document is not None else [] ) + row = { "Collection Name": collection_name, "Collection Summary": collection_summary, @@ -222,7 +229,7 @@ def process_result_into_csv( "Document Content URL": document_content, "Document Type": doc_type_from_family_document_metadata(document), "Document Content Matches Search Phrase": document_match, - "Geography": family.family_geography, + "Geographies": family_geos, "Category": family.family_category, "Languages": document_languages, "Source": family_source, @@ -243,7 +250,7 @@ def process_result_into_csv( "Document Content URL": "", "Document Type": "", "Document Content Matches Search Phrase": "n/a", - "Geography": family.family_geography, + "Geographies": family_geos, "Category": family.family_category, "Languages": "", "Source": family_source, @@ -389,7 +396,6 @@ def _process_vespa_search_response_families( or hit.family_name is None or hit.family_category is None or hit.family_source is None - or hit.family_geography is None or hit.family_geographies is None ): _LOGGER.error( @@ -425,7 +431,6 @@ def _process_vespa_search_response_families( continuation_token=vespa_family.continuation_token, prev_continuation_token=vespa_family.prev_continuation_token, family_documents=[], - family_geography=hit.family_geography, family_geographies=hit.family_geographies, family_metadata=cast(dict, db_family_metadata.value), ) diff --git a/docs/api/search.md b/docs/api/search.md index c0a2a5fb..d41433b5 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -155,7 +155,7 @@ A list of family objects, each following the scheme below:   "family_date": "string",   "family_last_updated_date": "string",   "family_source": "string", -  "family_geography": "string", +  "family_geographies": "string[]",   "family_metadata": {},   "family_title_match": true,   "family_description_match": true, @@ -197,9 +197,9 @@ event of this family of documents. The source, currently organisation name. Either “CCLW” or “UNFCCC” -##### family_geography +##### family_geographies -The geographical location of the family in [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) +The geographical location(s) of the family in [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) ##### family_metadata diff --git a/pyproject.toml b/pyproject.toml index 6f7ffe7a..e56a8156 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.17.8" +version = "1.17.9" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/non_search/core/test_browse.py b/tests/non_search/core/test_browse.py index d0adcfe7..c15f7ab0 100644 --- a/tests/non_search/core/test_browse.py +++ b/tests/non_search/core/test_browse.py @@ -29,7 +29,6 @@ def test_browse_rds_families(data_db): assert family.family_source == "CCLW" assert family.corpus_import_id == "CCLW.corpus.i00000001.n0000" assert family.corpus_type_name == "Laws and Policies" - assert family.family_geography == "South Asia" assert family.family_geographies == ["South Asia"] assert family.family_metadata == {} assert family.total_passage_hits == 0 diff --git a/tests/search/test_search.py b/tests/search/test_search.py index ae782c38..10ab5da9 100644 --- a/tests/search/test_search.py +++ b/tests/search/test_search.py @@ -331,11 +331,6 @@ def test_create_vespa_search_params( assert converted_keyword_filters is not None assert produced_search_parameters.filters is not None assert produced_search_parameters.filters == CprSdkFilters( - family_geography=( - converted_keyword_filters["family_geography"] - if "family_geography" in converted_keyword_filters.keys() - else [] - ), family_category=( converted_keyword_filters["family_category"] if "family_category" in converted_keyword_filters.keys() @@ -624,7 +619,6 @@ def _generate_search_response_hits(spec: FamSpec) -> Sequence[CprSdkHit]: family_slug=slugify(spec.family_name), family_category=spec.family_category, family_publication_ts=datetime.fromisoformat(spec.family_ts), - family_geography=spec.family_geo, family_geographies=spec.family_geos, corpus_import_id=spec.corpus_import_id, corpus_type_name=spec.corpus_type_name, @@ -658,7 +652,6 @@ def _generate_search_response_hits(spec: FamSpec) -> Sequence[CprSdkHit]: family_slug=slugify(spec.family_name), family_category=spec.family_category, family_publication_ts=datetime.fromisoformat(spec.family_ts), - family_geography=spec.family_geo, family_geographies=spec.family_geos, corpus_import_id=spec.corpus_import_id, corpus_type_name=spec.corpus_type_name, diff --git a/tests/search/vespa/setup_search_tests.py b/tests/search/vespa/setup_search_tests.py index 2056e5aa..47113cec 100644 --- a/tests/search/vespa/setup_search_tests.py +++ b/tests/search/vespa/setup_search_tests.py @@ -118,9 +118,6 @@ def _populate_db_families( def _create_family(db: Session, family: VespaFixture): family_import_id = family["fields"]["family_import_id"] - geo = family["fields"]["family_geography"] - geography = db.query(Geography).filter(Geography.value == geo).one() - family_object = Family( title=family["fields"]["family_name"], import_id=family_import_id, @@ -129,11 +126,14 @@ def _create_family(db: Session, family: VespaFixture): ) db.add(family_object) db.commit() - db.add( - FamilyGeography( - family_import_id=family_object.import_id, geography_id=geography.id + + for geo in family["fields"]["family_geographies"]: + geography = db.query(Geography).filter(Geography.value == geo).one() + db.add( + FamilyGeography( + family_import_id=family_object.import_id, geography_id=geography.id + ) ) - ) family_slug = Slug( name=family["fields"]["family_slug"], diff --git a/tests/search/vespa/test_range_and_keyword_filters_search.py b/tests/search/vespa/test_range_and_keyword_filters_search.py index a50945b6..2e892a5b 100644 --- a/tests/search/vespa/test_range_and_keyword_filters_search.py +++ b/tests/search/vespa/test_range_and_keyword_filters_search.py @@ -13,52 +13,6 @@ ) -@pytest.mark.search -@patch( - "app.api.api_v1.routers.search.AppTokenFactory.verify_corpora_in_db", - return_value=True, -) -@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) -def test_keyword_country_filters__geography( - mock_corpora_exist_in_db, - label, - query, - test_vespa, - data_client, - data_db, - monkeypatch, - valid_token, -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - base_params = {"query_string": query} - - # Get all documents and iterate over their country codes to confirm that each are - # the specific one that is returned in the query (as they each have a unique - # country code) - all_body = _make_search_request(data_client, valid_token, params=base_params) - families = [f for f in all_body["families"]] - assert len(families) == VESPA_FIXTURE_COUNT - - for family in families: - assert family["family_geography"] in family["family_geographies"] - country_code = family["family_geography"] - - country_slug = get_country_slug_from_country_code(data_db, country_code) - - params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}} - body_with_filters = _make_search_request( - data_client, valid_token, params=params - ) - filtered_family_slugs = [ - f["family_slug"] for f in body_with_filters["families"] - ] - assert len(filtered_family_slugs) == 1 - assert family["family_slug"] in filtered_family_slugs - - assert mock_corpora_exist_in_db.assert_called - - @pytest.mark.search @patch( "app.api.api_v1.routers.search.AppTokenFactory.verify_corpora_in_db", @@ -87,7 +41,6 @@ def test_keyword_country_filters__geographies( assert len(families) == VESPA_FIXTURE_COUNT for family in families: - assert family["family_geography"] in family["family_geographies"] for country_code in family["family_geographies"]: country_slug = get_country_slug_from_country_code(data_db, country_code) @@ -134,12 +87,14 @@ def test_keyword_region_filters( assert len(families) == VESPA_FIXTURE_COUNT for family in families: - country_code = family["family_geography"] + country_codes = family["family_geographies"] # Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA) - if country_code == "Other": + if country_codes == ["Other"]: return + country_code = country_codes[0] + parent_id = ( data_db.query(Geography) .filter(Geography.value == country_code) diff --git a/tests/search/vespa/test_this_vespa_search_download.py b/tests/search/vespa/test_this_vespa_search_download.py index 3c0d0c0d..e5dd0757 100644 --- a/tests/search/vespa/test_this_vespa_search_download.py +++ b/tests/search/vespa/test_this_vespa_search_download.py @@ -16,6 +16,24 @@ SEARCH_ENDPOINT = "/api/v1/searches" CSV_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-csv" +_CSV_SEARCH_RESPONSE_COLUMNS = [ + "Collection Name", + "Collection Summary", + "Family Name", + "Family Summary", + "Family URL", + "Family Publication Date", + "Geographies", + "Document Title", + "Document URL", + "Document Content URL", + "Document Type", + "Document Content Matches Search Phrase", + "Category", + "Languages", + "Source", +] + def _make_download_request( client, @@ -74,11 +92,18 @@ def test_csv_content( csv_content = csv.DictReader(StringIO(csv_response.text)) for row, family in zip(csv_content, families): + assert all(col in row.keys() for col in _CSV_SEARCH_RESPONSE_COLUMNS) + assert row["Family Name"] == family["family_name"] assert row["Family Summary"] == family["family_description"] assert row["Family Publication Date"] == family["family_date"] assert row["Category"] == family["family_category"] - assert row["Geography"] == family["family_geography"] + + assert isinstance(row["Geographies"], str) + if len(family["family_geographies"]) > 1: + assert ( + row["Geographies"].count(";") == len(family["family_geographies"]) - 1 + ) # TODO: Add collections to test db setup to provide document level coverage diff --git a/tests/unit/app/schemas/test_schemas.py b/tests/unit/app/schemas/test_schemas.py index 4721c86a..40fc7d1b 100644 --- a/tests/unit/app/schemas/test_schemas.py +++ b/tests/unit/app/schemas/test_schemas.py @@ -131,7 +131,6 @@ def test_search_response() -> None: datetime.now() ), # You can replace this with an actual date string family_source="Example Source", - family_geography="Example Geography", family_geographies=["Example Geography"], family_metadata={"key1": "value1", "key2": "value2"}, corpus_import_id="test.corpus.0.1",