From 393669ebcc7e4a7747d5eeb8e53ffbc4a424e053 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:10:33 +0100 Subject: [PATCH 01/13] Update test to use family_geographies --- tests/search/vespa/test_range_and_keyword_filters_search.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/search/vespa/test_range_and_keyword_filters_search.py b/tests/search/vespa/test_range_and_keyword_filters_search.py index a50945b6..24d182c1 100644 --- a/tests/search/vespa/test_range_and_keyword_filters_search.py +++ b/tests/search/vespa/test_range_and_keyword_filters_search.py @@ -134,12 +134,14 @@ def test_keyword_region_filters( assert len(families) == VESPA_FIXTURE_COUNT for family in families: - country_code = family["family_geography"] + country_codes = family["family_geographies"] # Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA) - if country_code == "Other": + if country_codes == ["Other"]: return + country_code = country_codes[0] + parent_id = ( data_db.query(Geography) .filter(Geography.value == country_code) From 275b09bd8a11aab7e2932d1a81de0b50d1b32c3c Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:58:05 +0100 Subject: [PATCH 02/13] Remove family_geography & sort imports --- tests/search/test_search.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/search/test_search.py b/tests/search/test_search.py index ae782c38..10ab5da9 100644 --- a/tests/search/test_search.py +++ b/tests/search/test_search.py @@ -331,11 +331,6 @@ def test_create_vespa_search_params( assert converted_keyword_filters is not None assert produced_search_parameters.filters is not None assert produced_search_parameters.filters == CprSdkFilters( - family_geography=( - converted_keyword_filters["family_geography"] - if "family_geography" in converted_keyword_filters.keys() - else [] - ), family_category=( converted_keyword_filters["family_category"] if "family_category" in converted_keyword_filters.keys() @@ -624,7 +619,6 @@ def _generate_search_response_hits(spec: FamSpec) -> Sequence[CprSdkHit]: family_slug=slugify(spec.family_name), family_category=spec.family_category, family_publication_ts=datetime.fromisoformat(spec.family_ts), - family_geography=spec.family_geo, family_geographies=spec.family_geos, corpus_import_id=spec.corpus_import_id, corpus_type_name=spec.corpus_type_name, @@ -658,7 +652,6 @@ def _generate_search_response_hits(spec: FamSpec) -> Sequence[CprSdkHit]: family_slug=slugify(spec.family_name), family_category=spec.family_category, family_publication_ts=datetime.fromisoformat(spec.family_ts), - family_geography=spec.family_geo, family_geographies=spec.family_geos, corpus_import_id=spec.corpus_import_id, corpus_type_name=spec.corpus_type_name, From eecb052a3c8d758c3983d2832ae5267df0fd9654 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:58:37 +0100 Subject: [PATCH 03/13] Update family_geography to family_geographies --- docs/api/search.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api/search.md b/docs/api/search.md index c0a2a5fb..d41433b5 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -155,7 +155,7 @@ A list of family objects, each following the scheme below:   "family_date": "string",   "family_last_updated_date": "string",   "family_source": "string", -  "family_geography": "string", +  "family_geographies": "string[]",   "family_metadata": {},   "family_title_match": true,   "family_description_match": true, @@ -197,9 +197,9 @@ event of this family of documents. The source, currently organisation name. Either “CCLW” or “UNFCCC” -##### family_geography +##### family_geographies -The geographical location of the family in [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) +The geographical location(s) of the family in [ISO 3166-1 alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) ##### family_metadata From e8d43f30549e1c164fb4aa47ae16ef24619fb434 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:58:51 +0100 Subject: [PATCH 04/13] Remove family_geography --- app/api/api_v1/schemas/search.py | 5 ----- app/core/browse.py | 1 - tests/non_search/core/test_browse.py | 1 - 3 files changed, 7 deletions(-) diff --git a/app/api/api_v1/schemas/search.py b/app/api/api_v1/schemas/search.py index d295f84b..d71d8cf7 100644 --- a/app/api/api_v1/schemas/search.py +++ b/app/api/api_v1/schemas/search.py @@ -256,11 +256,6 @@ class SearchResponseFamily(BaseModel): The name given to the type of corpus the family belongs to. E.G. 'Laws and Policies' """ - family_geography: str - """ - The geographical location of the family in ISO 3166-1 alpha-3 - """ - family_geographies: List[str] """ The geographical locations of the family in ISO 3166-1 alpha-3 diff --git a/app/core/browse.py b/app/core/browse.py index 6496978f..a6652a9d 100644 --- a/app/core/browse.py +++ b/app/core/browse.py @@ -59,7 +59,6 @@ def to_search_response_family( family_source=cast(str, organisation.name), corpus_import_id=cast(str, corpus.import_id), corpus_type_name=cast(str, corpus.corpus_type_name), - family_geography=geography_value, family_geographies=[row.value for row in family.geographies], family_title_match=False, family_description_match=False, diff --git a/tests/non_search/core/test_browse.py b/tests/non_search/core/test_browse.py index d0adcfe7..c15f7ab0 100644 --- a/tests/non_search/core/test_browse.py +++ b/tests/non_search/core/test_browse.py @@ -29,7 +29,6 @@ def test_browse_rds_families(data_db): assert family.family_source == "CCLW" assert family.corpus_import_id == "CCLW.corpus.i00000001.n0000" assert family.corpus_type_name == "Laws and Policies" - assert family.family_geography == "South Asia" assert family.family_geographies == ["South Asia"] assert family.family_metadata == {} assert family.total_passage_hits == 0 From 69f8fd79461d31ef9f83ce26fd08ab9ef79bb7d6 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:00:49 +0100 Subject: [PATCH 05/13] Make download this search support multiple geographies --- app/core/search.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/app/core/search.py b/app/core/search.py index 1c0c7687..41042008 100644 --- a/app/core/search.py +++ b/app/core/search.py @@ -168,6 +168,12 @@ def process_result_into_csv( collection_name = collection.title collection_summary = collection.description + family_geos = ";".join( + [cast(str, geo) for geo in family.family_geographies] + if family is not None + else [] + ) + family_documents: Sequence[FamilyDocument] = extra_required_info["documents"][ family.family_slug ] @@ -210,6 +216,7 @@ def process_result_into_csv( if physical_document is not None else [] ) + row = { "Collection Name": collection_name, "Collection Summary": collection_summary, @@ -222,7 +229,7 @@ def process_result_into_csv( "Document Content URL": document_content, "Document Type": doc_type_from_family_document_metadata(document), "Document Content Matches Search Phrase": document_match, - "Geography": family.family_geography, + "Geographies": family_geos, "Category": family.family_category, "Languages": document_languages, "Source": family_source, @@ -243,7 +250,7 @@ def process_result_into_csv( "Document Content URL": "", "Document Type": "", "Document Content Matches Search Phrase": "n/a", - "Geography": family.family_geography, + "Geographies": family_geos, "Category": family.family_category, "Languages": "", "Source": family_source, @@ -389,7 +396,6 @@ def _process_vespa_search_response_families( or hit.family_name is None or hit.family_category is None or hit.family_source is None - or hit.family_geography is None or hit.family_geographies is None ): _LOGGER.error( @@ -425,7 +431,6 @@ def _process_vespa_search_response_families( continuation_token=vespa_family.continuation_token, prev_continuation_token=vespa_family.prev_continuation_token, family_documents=[], - family_geography=hit.family_geography, family_geographies=hit.family_geographies, family_metadata=cast(dict, db_family_metadata.value), ) From 05613509e01d00f4aca3e20941a84389e41f8df0 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:11:38 +0100 Subject: [PATCH 06/13] Create vespa test families with multiple geos --- tests/search/vespa/setup_search_tests.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/search/vespa/setup_search_tests.py b/tests/search/vespa/setup_search_tests.py index 2056e5aa..47113cec 100644 --- a/tests/search/vespa/setup_search_tests.py +++ b/tests/search/vespa/setup_search_tests.py @@ -118,9 +118,6 @@ def _populate_db_families( def _create_family(db: Session, family: VespaFixture): family_import_id = family["fields"]["family_import_id"] - geo = family["fields"]["family_geography"] - geography = db.query(Geography).filter(Geography.value == geo).one() - family_object = Family( title=family["fields"]["family_name"], import_id=family_import_id, @@ -129,11 +126,14 @@ def _create_family(db: Session, family: VespaFixture): ) db.add(family_object) db.commit() - db.add( - FamilyGeography( - family_import_id=family_object.import_id, geography_id=geography.id + + for geo in family["fields"]["family_geographies"]: + geography = db.query(Geography).filter(Geography.value == geo).one() + db.add( + FamilyGeography( + family_import_id=family_object.import_id, geography_id=geography.id + ) ) - ) family_slug = Slug( name=family["fields"]["family_slug"], From 86049a68988008826364f77780d8b7aea0c5e645 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:20:37 +0100 Subject: [PATCH 07/13] Use multiple geographies instead of singular --- .../vespa/test_range_and_keyword_filters_search.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/search/vespa/test_range_and_keyword_filters_search.py b/tests/search/vespa/test_range_and_keyword_filters_search.py index 24d182c1..69548830 100644 --- a/tests/search/vespa/test_range_and_keyword_filters_search.py +++ b/tests/search/vespa/test_range_and_keyword_filters_search.py @@ -41,12 +41,14 @@ def test_keyword_country_filters__geography( assert len(families) == VESPA_FIXTURE_COUNT for family in families: - assert family["family_geography"] in family["family_geographies"] - country_code = family["family_geography"] + country_codes = family["family_geographies"] - country_slug = get_country_slug_from_country_code(data_db, country_code) + country_slugs = [ + get_country_slug_from_country_code(data_db, country_code) + for country_code in country_codes + ] - params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}} + params = {**base_params, **{"keyword_filters": {"countries": [country_slugs]}}} body_with_filters = _make_search_request( data_client, valid_token, params=params ) From 2353af591798dd393e07b1720e27ce2d354bafb8 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:20:57 +0100 Subject: [PATCH 08/13] Use multiple geographies in download --- tests/search/vespa/test_this_vespa_search_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/search/vespa/test_this_vespa_search_download.py b/tests/search/vespa/test_this_vespa_search_download.py index 3c0d0c0d..709695c0 100644 --- a/tests/search/vespa/test_this_vespa_search_download.py +++ b/tests/search/vespa/test_this_vespa_search_download.py @@ -78,7 +78,7 @@ def test_csv_content( assert row["Family Summary"] == family["family_description"] assert row["Family Publication Date"] == family["family_date"] assert row["Category"] == family["family_category"] - assert row["Geography"] == family["family_geography"] + assert row["Geographies"] == family["family_geographies"] # TODO: Add collections to test db setup to provide document level coverage From 58cbb69f79556b7566f1c6639f610aea8b3011e4 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:21:47 +0100 Subject: [PATCH 09/13] Remove family_geography --- tests/unit/app/schemas/test_schemas.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/app/schemas/test_schemas.py b/tests/unit/app/schemas/test_schemas.py index 4721c86a..40fc7d1b 100644 --- a/tests/unit/app/schemas/test_schemas.py +++ b/tests/unit/app/schemas/test_schemas.py @@ -131,7 +131,6 @@ def test_search_response() -> None: datetime.now() ), # You can replace this with an actual date string family_source="Example Source", - family_geography="Example Geography", family_geographies=["Example Geography"], family_metadata={"key1": "value1", "key2": "value2"}, corpus_import_id="test.corpus.0.1", From 66a68558f11410a0d1a164b964603dc705a76e15 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:39:36 +0100 Subject: [PATCH 10/13] Bump to 1.17.9 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6f7ffe7a..e56a8156 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.17.8" +version = "1.17.9" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] From 106ee6c798892ae8244a69daba7898f9c29b4bc1 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:50:28 +0100 Subject: [PATCH 11/13] Rename CSV column to Geographies --- app/core/search.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/core/search.py b/app/core/search.py index 41042008..afd343f3 100644 --- a/app/core/search.py +++ b/app/core/search.py @@ -54,7 +54,7 @@ "Family Summary", "Family URL", "Family Publication Date", - "Geography", + "Geographies", "Document Title", "Document URL", "Document Content URL", @@ -157,6 +157,12 @@ def process_result_into_csv( metadata_keys[family_source] = list( [key.title() for key in family_metadata.keys()] ) + + family_geos = ";".join( + [cast(str, geo) for geo in family.family_geographies] + if family is not None + else [] + ) metadata: dict[str, str] = defaultdict(str) for k in family_metadata: metadata[k.title()] = ";".join(family_metadata.get(k, [])) @@ -168,12 +174,6 @@ def process_result_into_csv( collection_name = collection.title collection_summary = collection.description - family_geos = ";".join( - [cast(str, geo) for geo in family.family_geographies] - if family is not None - else [] - ) - family_documents: Sequence[FamilyDocument] = extra_required_info["documents"][ family.family_slug ] From 3f2d2fd8d0d142f53c8a61ba70979b53a52e2e95 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:52:03 +0100 Subject: [PATCH 12/13] Remove geography filter test as famil_geo not in search response --- .../test_range_and_keyword_filters_search.py | 49 ------------------- 1 file changed, 49 deletions(-) diff --git a/tests/search/vespa/test_range_and_keyword_filters_search.py b/tests/search/vespa/test_range_and_keyword_filters_search.py index 69548830..2e892a5b 100644 --- a/tests/search/vespa/test_range_and_keyword_filters_search.py +++ b/tests/search/vespa/test_range_and_keyword_filters_search.py @@ -13,54 +13,6 @@ ) -@pytest.mark.search -@patch( - "app.api.api_v1.routers.search.AppTokenFactory.verify_corpora_in_db", - return_value=True, -) -@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) -def test_keyword_country_filters__geography( - mock_corpora_exist_in_db, - label, - query, - test_vespa, - data_client, - data_db, - monkeypatch, - valid_token, -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - base_params = {"query_string": query} - - # Get all documents and iterate over their country codes to confirm that each are - # the specific one that is returned in the query (as they each have a unique - # country code) - all_body = _make_search_request(data_client, valid_token, params=base_params) - families = [f for f in all_body["families"]] - assert len(families) == VESPA_FIXTURE_COUNT - - for family in families: - country_codes = family["family_geographies"] - - country_slugs = [ - get_country_slug_from_country_code(data_db, country_code) - for country_code in country_codes - ] - - params = {**base_params, **{"keyword_filters": {"countries": [country_slugs]}}} - body_with_filters = _make_search_request( - data_client, valid_token, params=params - ) - filtered_family_slugs = [ - f["family_slug"] for f in body_with_filters["families"] - ] - assert len(filtered_family_slugs) == 1 - assert family["family_slug"] in filtered_family_slugs - - assert mock_corpora_exist_in_db.assert_called - - @pytest.mark.search @patch( "app.api.api_v1.routers.search.AppTokenFactory.verify_corpora_in_db", @@ -89,7 +41,6 @@ def test_keyword_country_filters__geographies( assert len(families) == VESPA_FIXTURE_COUNT for family in families: - assert family["family_geography"] in family["family_geographies"] for country_code in family["family_geographies"]: country_slug = get_country_slug_from_country_code(data_db, country_code) From 902c70cf086608e21b4fa7506d9f7cb8b508774d Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 10 Oct 2024 16:53:23 +0100 Subject: [PATCH 13/13] Improve testing for download spreadsheet cols --- .../vespa/test_this_vespa_search_download.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/search/vespa/test_this_vespa_search_download.py b/tests/search/vespa/test_this_vespa_search_download.py index 709695c0..e5dd0757 100644 --- a/tests/search/vespa/test_this_vespa_search_download.py +++ b/tests/search/vespa/test_this_vespa_search_download.py @@ -16,6 +16,24 @@ SEARCH_ENDPOINT = "/api/v1/searches" CSV_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-csv" +_CSV_SEARCH_RESPONSE_COLUMNS = [ + "Collection Name", + "Collection Summary", + "Family Name", + "Family Summary", + "Family URL", + "Family Publication Date", + "Geographies", + "Document Title", + "Document URL", + "Document Content URL", + "Document Type", + "Document Content Matches Search Phrase", + "Category", + "Languages", + "Source", +] + def _make_download_request( client, @@ -74,11 +92,18 @@ def test_csv_content( csv_content = csv.DictReader(StringIO(csv_response.text)) for row, family in zip(csv_content, families): + assert all(col in row.keys() for col in _CSV_SEARCH_RESPONSE_COLUMNS) + assert row["Family Name"] == family["family_name"] assert row["Family Summary"] == family["family_description"] assert row["Family Publication Date"] == family["family_date"] assert row["Category"] == family["family_category"] - assert row["Geographies"] == family["family_geographies"] + + assert isinstance(row["Geographies"], str) + if len(family["family_geographies"]) > 1: + assert ( + row["Geographies"].count(";") == len(family["family_geographies"]) - 1 + ) # TODO: Add collections to test db setup to provide document level coverage