From 993a4044d08e29608bc963f3fcd6854292fad77c Mon Sep 17 00:00:00 2001 From: annaCPR Date: Tue, 24 Sep 2024 11:18:44 +0100 Subject: [PATCH] Feature/pdct 1402 validate collection exists before creating family (#214) * Validate that a collection exists before saving a family * Refactor to remove duplication * Bump patch version --- app/service/ingest.py | 103 ++++++++++++++---- pyproject.toml | 2 +- .../service/ingest/test_ingest_service.py | 17 ++- 3 files changed, 98 insertions(+), 24 deletions(-) diff --git a/app/service/ingest.py b/app/service/ingest.py index 7415d4b9..7cffc1ca 100644 --- a/app/service/ingest.py +++ b/app/service/ingest.py @@ -5,6 +5,7 @@ import of data and other services for validation etc. """ +from enum import Enum from typing import Any, Optional from fastapi import HTTPException, status @@ -28,6 +29,15 @@ ) +class IngestEntityList(str, Enum): + """Name of the list of entities that can be ingested.""" + + Collections = "collections" + Families = "families" + Documents = "documents" + Events = "events" + + @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) def save_collections( collection_data: list[dict[str, Any]], @@ -154,37 +164,90 @@ def save_events( return event_import_ids -def validate_entity_relationships(data: dict[str, Any]) -> None: +def _collect_import_ids( + entity_list_name: IngestEntityList, + data: dict[str, Any], + import_id_type_name: Optional[str] = None, +) -> list[str]: """ - Validates relationships between entities contained in data based on import_ids. - For documents, it validates that the family the document is linked to exists. + Extracts a list of import_ids (or family_import_ids if specified) for the specified entity list in data. + + :param IngestEntityList entity_list_name: The name of the entity list from which the import_ids are to be extracted. + :param dict[str, Any] data: The data structure containing the entity lists used for extraction. + :param Optional[str] import_id_type_name: the name of the type of import_id to be extracted or None. + :return list[str]: A list of extracted import_ids for the specified entity list. + """ + import_id_key = import_id_type_name or "import_id" + import_ids = [] + if entity_list_name.value in data: + for entity in data[entity_list_name.value]: + import_ids.append(entity[import_id_key]) + return import_ids + + +def _match_import_ids( + parent_references: list[str], parent_import_ids: set[str] +) -> None: + """ + Validates that all the references to parent entities exist in the set of parent import_ids passed in + + :param list[str] parent_references: List of import_ids referencing parent entities to be validated. + :param set[str] parent_import_ids: Set of parent import_ids to validate against. + :raises ValidationError: raised if a parent reference is not found in the parent_import_ids. + """ + for id in parent_references: + if id not in parent_import_ids: + raise ValidationError(f"No entity with id {id} found") + + +def _validate_collections_exist_for_families(data: dict[str, Any]) -> None: + """ + Validates that collections the families are linked to exist based on import_id links in data. :param dict[str, Any] data: The data object containing entities to be validated. - :raises ValidationError: raised should there be any unmatched relationships. """ - families = [] + collections = _collect_import_ids(IngestEntityList.Collections, data) + collections_set = set(collections) + + family_collection_import_ids = [] if "families" in data: for fam in data["families"]: - families.append(fam["import_id"]) + family_collection_import_ids.extend(fam["collections"]) + + _match_import_ids(family_collection_import_ids, collections_set) - document_family_import_ids = [] - if "documents" in data: - for entity in data["documents"]: - document_family_import_ids.append(entity["family_import_id"]) - event_family_import_ids = [] - if "events" in data: - for event in data["events"]: - event_family_import_ids.append(event["family_import_id"]) +def _validate_families_exist_for_events_and_documents(data: dict[str, Any]) -> None: + """ + Validates that families the documents and events are linked to exist + based on import_id links in data. + :param dict[str, Any] data: The data object containing entities to be validated. + """ + families = _collect_import_ids(IngestEntityList.Families, data) families_set = set(families) - for doc_fam in document_family_import_ids: - if doc_fam not in families_set: - raise ValidationError(f"No family with id {doc_fam} found for document") - for event_fam in event_family_import_ids: - if event_fam not in families_set: - raise ValidationError(f"No family with id {event_fam} found for event") + document_family_import_ids = _collect_import_ids( + IngestEntityList.Documents, data, "family_import_id" + ) + event_family_import_ids = _collect_import_ids( + IngestEntityList.Events, data, "family_import_id" + ) + + _match_import_ids(document_family_import_ids, families_set) + _match_import_ids(event_family_import_ids, families_set) + + +def validate_entity_relationships(data: dict[str, Any]) -> None: + """ + Validates relationships between entities contained in data. + For documents, it validates that the family the document is linked to exists. + + :param dict[str, Any] data: The data object containing entities to be validated. + """ + + _validate_collections_exist_for_families(data) + _validate_families_exist_for_events_and_documents(data) @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) diff --git a/pyproject.toml b/pyproject.toml index 5ecf8b32..77aefac8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "admin_backend" -version = "2.15.2" +version = "2.15.3" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/unit_tests/service/ingest/test_ingest_service.py b/tests/unit_tests/service/ingest/test_ingest_service.py index 47217a88..e1bdebe9 100644 --- a/tests/unit_tests/service/ingest/test_ingest_service.py +++ b/tests/unit_tests/service/ingest/test_ingest_service.py @@ -146,7 +146,7 @@ def test_validate_entity_relationships_when_no_family_matching_document(): with pytest.raises(ValidationError) as e: ingest_service.validate_entity_relationships(test_data) - assert f"No family with id {fam_import_id} found for document" == e.value.message + assert f"No entity with id {fam_import_id} found" == e.value.message def test_validate_entity_relationships_when_no_family_matching_event(): @@ -157,7 +157,18 @@ def test_validate_entity_relationships_when_no_family_matching_event(): with pytest.raises(ValidationError) as e: ingest_service.validate_entity_relationships(test_data) - assert f"No family with id {fam_import_id} found for event" == e.value.message + assert f"No entity with id {fam_import_id} found" == e.value.message + + +def test_validate_entity_relationships_when_no_collection_matching_family(): + coll_import_id = "test.new.collection.0" + test_data = { + "families": [{"import_id": "test.new.event.0", "collections": [coll_import_id]}] + } + + with pytest.raises(ValidationError) as e: + ingest_service.validate_entity_relationships(test_data) + assert f"No entity with id {coll_import_id} found" == e.value.message def test_save_documents_when_no_family(): @@ -170,7 +181,7 @@ def test_save_documents_when_no_family(): with pytest.raises(ValidationError) as e: ingest_service.import_data(test_data, "test") - assert f"No family with id {fam_import_id} found for document" == e.value.message + assert f"No entity with id {fam_import_id} found" == e.value.message def test_save_events_when_data_invalid(validation_service_mock):