Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pdct 1402 validate collection exists before creating family #214

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 83 additions & 20 deletions app/service/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import of data and other services for validation etc.
"""

from enum import Enum
from typing import Any, Optional

from fastapi import HTTPException, status
Expand All @@ -28,6 +29,15 @@
)


class IngestEntityList(str, Enum):
"""Name of the list of entities that can be ingested."""

Collections = "collections"
Families = "families"
Documents = "documents"
Events = "events"


@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
def save_collections(
collection_data: list[dict[str, Any]],
Expand Down Expand Up @@ -154,37 +164,90 @@ def save_events(
return event_import_ids


def validate_entity_relationships(data: dict[str, Any]) -> None:
def _collect_import_ids(
entity_list_name: IngestEntityList,
data: dict[str, Any],
import_id_type_name: Optional[str] = None,
) -> list[str]:
"""
Validates relationships between entities contained in data based on import_ids.
For documents, it validates that the family the document is linked to exists.
Extracts a list of import_ids (or family_import_ids if specified) for the specified entity list in data.

:param IngestEntityList entity_list_name: The name of the entity list from which the import_ids are to be extracted.
:param dict[str, Any] data: The data structure containing the entity lists used for extraction.
:param Optional[str] import_id_type_name: the name of the type of import_id to be extracted or None.
:return list[str]: A list of extracted import_ids for the specified entity list.
"""
import_id_key = import_id_type_name or "import_id"
import_ids = []
if entity_list_name.value in data:
for entity in data[entity_list_name.value]:
import_ids.append(entity[import_id_key])
return import_ids


def _match_import_ids(
parent_references: list[str], parent_import_ids: set[str]
) -> None:
"""
Validates that all the references to parent entities exist in the set of parent import_ids passed in

:param list[str] parent_references: List of import_ids referencing parent entities to be validated.
:param set[str] parent_import_ids: Set of parent import_ids to validate against.
:raises ValidationError: raised if a parent reference is not found in the parent_import_ids.
"""
for id in parent_references:
if id not in parent_import_ids:
raise ValidationError(f"No entity with id {id} found")


def _validate_collections_exist_for_families(data: dict[str, Any]) -> None:
"""
Validates that collections the families are linked to exist based on import_id links in data.

:param dict[str, Any] data: The data object containing entities to be validated.
:raises ValidationError: raised should there be any unmatched relationships.
"""
families = []
collections = _collect_import_ids(IngestEntityList.Collections, data)
collections_set = set(collections)

family_collection_import_ids = []
if "families" in data:
for fam in data["families"]:
families.append(fam["import_id"])
family_collection_import_ids.extend(fam["collections"])

_match_import_ids(family_collection_import_ids, collections_set)

document_family_import_ids = []
if "documents" in data:
for entity in data["documents"]:
document_family_import_ids.append(entity["family_import_id"])

event_family_import_ids = []
if "events" in data:
for event in data["events"]:
event_family_import_ids.append(event["family_import_id"])
def _validate_families_exist_for_events_and_documents(data: dict[str, Any]) -> None:
"""
Validates that families the documents and events are linked to exist
based on import_id links in data.

:param dict[str, Any] data: The data object containing entities to be validated.
"""
families = _collect_import_ids(IngestEntityList.Families, data)
families_set = set(families)
for doc_fam in document_family_import_ids:
if doc_fam not in families_set:
raise ValidationError(f"No family with id {doc_fam} found for document")

for event_fam in event_family_import_ids:
if event_fam not in families_set:
raise ValidationError(f"No family with id {event_fam} found for event")
document_family_import_ids = _collect_import_ids(
IngestEntityList.Documents, data, "family_import_id"
)
event_family_import_ids = _collect_import_ids(
IngestEntityList.Events, data, "family_import_id"
)

_match_import_ids(document_family_import_ids, families_set)
_match_import_ids(event_family_import_ids, families_set)


def validate_entity_relationships(data: dict[str, Any]) -> None:
"""
Validates relationships between entities contained in data.
For documents, it validates that the family the document is linked to exists.

:param dict[str, Any] data: The data object containing entities to be validated.
"""

_validate_collections_exist_for_families(data)
_validate_families_exist_for_events_and_documents(data)


@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "admin_backend"
version = "2.15.2"
version = "2.15.3"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
17 changes: 14 additions & 3 deletions tests/unit_tests/service/ingest/test_ingest_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_validate_entity_relationships_when_no_family_matching_document():

with pytest.raises(ValidationError) as e:
ingest_service.validate_entity_relationships(test_data)
assert f"No family with id {fam_import_id} found for document" == e.value.message
assert f"No entity with id {fam_import_id} found" == e.value.message


def test_validate_entity_relationships_when_no_family_matching_event():
Expand All @@ -157,7 +157,18 @@ def test_validate_entity_relationships_when_no_family_matching_event():

with pytest.raises(ValidationError) as e:
ingest_service.validate_entity_relationships(test_data)
assert f"No family with id {fam_import_id} found for event" == e.value.message
assert f"No entity with id {fam_import_id} found" == e.value.message


def test_validate_entity_relationships_when_no_collection_matching_family():
coll_import_id = "test.new.collection.0"
test_data = {
"families": [{"import_id": "test.new.event.0", "collections": [coll_import_id]}]
}

with pytest.raises(ValidationError) as e:
ingest_service.validate_entity_relationships(test_data)
assert f"No entity with id {coll_import_id} found" == e.value.message


def test_save_documents_when_no_family():
Expand All @@ -170,7 +181,7 @@ def test_save_documents_when_no_family():

with pytest.raises(ValidationError) as e:
ingest_service.import_data(test_data, "test")
assert f"No family with id {fam_import_id} found for document" == e.value.message
assert f"No entity with id {fam_import_id} found" == e.value.message


def test_save_events_when_data_invalid(validation_service_mock):
Expand Down
Loading