From 4eef7ca115367dc8d2e4e6156f716c067998edf1 Mon Sep 17 00:00:00 2001 From: annaCPR Date: Thu, 12 Sep 2024 10:23:55 +0100 Subject: [PATCH] Bulk import fixes (#213) * Give user_language_name a default value on IngestDocumentDto * Do not look up entity counter if import_id already provided when saving documents * Do not look up entity counter if import_id already provided when saving events * Validate family exists before saving events * Bump patch version * Move comment --- app/model/ingest.py | 2 +- app/repository/document.py | 17 +++++++------ app/repository/event.py | 17 ++++++------- app/service/ingest.py | 25 +++++++++++++------ pyproject.toml | 2 +- .../ingest/test_ingest_template.py | 1 + .../routers/ingest/test_ingest_template.py | 1 + .../service/ingest/test_ingest_service.py | 15 +++++++++-- 8 files changed, 51 insertions(+), 29 deletions(-) diff --git a/app/model/ingest.py b/app/model/ingest.py index be2751dc..830871ae 100644 --- a/app/model/ingest.py +++ b/app/model/ingest.py @@ -94,7 +94,7 @@ class IngestDocumentDTO(BaseModel): metadata: Json title: str source_url: Optional[AnyHttpUrl] = None - user_language_name: Optional[str] + user_language_name: Optional[str] = None def to_document_create_dto(self) -> DocumentCreateDTO: """ diff --git a/app/repository/document.py b/app/repository/document.py index ce7485a2..1cf384cb 100644 --- a/app/repository/document.py +++ b/app/repository/document.py @@ -404,16 +404,17 @@ def create(db: Session, document: DocumentCreateDTO) -> str: # Update the FamilyDocument with the new PhysicalDocument id family_doc.physical_document_id = phys_doc.id - # Generate the import_id for the new document - org = family_repo.get_organisation(db, cast(str, family_doc.family_import_id)) - if org is None: - raise ValidationError( - f"Cannot find counter to generate id for {family_doc.family_import_id}" + if not family_doc.import_id: + org = family_repo.get_organisation( + db, cast(str, family_doc.family_import_id) ) + if org is None: + raise ValidationError( + f"Cannot find counter to generate id for {family_doc.family_import_id}" + ) + org_name = cast(str, org.name) - org_name = cast(str, org.name) - - if not family_doc.import_id: + # Generate the import_id for the new document family_doc.import_id = cast( Column, generate_import_id(db, CountedEntity.Document, org_name) ) diff --git a/app/repository/event.py b/app/repository/event.py index 5cb60b11..305a0d7d 100644 --- a/app/repository/event.py +++ b/app/repository/event.py @@ -172,16 +172,15 @@ def create(db: Session, event: EventCreateDTO) -> str: family_import_id = new_family_event.family_import_id - # Generate the import_id for the new event - org = family_repo.get_organisation(db, cast(str, family_import_id)) - if org is None: - raise ValidationError( - f"Cannot find counter to generate id for {family_import_id}" - ) - - org_name = cast(str, org.name) - if not new_family_event.import_id: + org = family_repo.get_organisation(db, cast(str, family_import_id)) + if org is None: + raise ValidationError( + f"Cannot find counter to generate id for {family_import_id}" + ) + org_name = cast(str, org.name) + + # Generate the import_id for the new event new_family_event.import_id = cast( Column, generate_import_id(db, CountedEntity.Event, org_name) ) diff --git a/app/service/ingest.py b/app/service/ingest.py index 522b7e90..7415d4b9 100644 --- a/app/service/ingest.py +++ b/app/service/ingest.py @@ -167,15 +167,24 @@ def validate_entity_relationships(data: dict[str, Any]) -> None: for fam in data["families"]: families.append(fam["import_id"]) - documents = [] + document_family_import_ids = [] if "documents" in data: - for doc in data["documents"]: - documents.append(doc["family_import_id"]) - - family_document_set = set(families) - unmatched = [x for x in documents if x not in family_document_set] - if unmatched: - raise ValidationError(f"No family with id {unmatched} found") + for entity in data["documents"]: + document_family_import_ids.append(entity["family_import_id"]) + + event_family_import_ids = [] + if "events" in data: + for event in data["events"]: + event_family_import_ids.append(event["family_import_id"]) + + families_set = set(families) + for doc_fam in document_family_import_ids: + if doc_fam not in families_set: + raise ValidationError(f"No family with id {doc_fam} found for document") + + for event_fam in event_family_import_ids: + if event_fam not in families_set: + raise ValidationError(f"No family with id {event_fam} found for event") @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) diff --git a/pyproject.toml b/pyproject.toml index 91919f28..5ecf8b32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "admin_backend" -version = "2.15.1" +version = "2.15.2" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/integration_tests/ingest/test_ingest_template.py b/tests/integration_tests/ingest/test_ingest_template.py index 24db08a6..3fafcf66 100644 --- a/tests/integration_tests/ingest/test_ingest_template.py +++ b/tests/integration_tests/ingest/test_ingest_template.py @@ -213,6 +213,7 @@ def test_get_template_unfcc( "user_language_name": { "anyOf": [{"type": "string"}, {"type": "null"}], "title": "User Language Name", + "default": None, }, } ], diff --git a/tests/unit_tests/routers/ingest/test_ingest_template.py b/tests/unit_tests/routers/ingest/test_ingest_template.py index c00c0f45..521ba27c 100644 --- a/tests/unit_tests/routers/ingest/test_ingest_template.py +++ b/tests/unit_tests/routers/ingest/test_ingest_template.py @@ -113,6 +113,7 @@ def test_ingest_template_when_ok( "user_language_name": { "anyOf": [{"type": "string"}, {"type": "null"}], "title": "User Language Name", + "default": None, }, } ], diff --git a/tests/unit_tests/service/ingest/test_ingest_service.py b/tests/unit_tests/service/ingest/test_ingest_service.py index d7f400b0..47217a88 100644 --- a/tests/unit_tests/service/ingest/test_ingest_service.py +++ b/tests/unit_tests/service/ingest/test_ingest_service.py @@ -146,7 +146,18 @@ def test_validate_entity_relationships_when_no_family_matching_document(): with pytest.raises(ValidationError) as e: ingest_service.validate_entity_relationships(test_data) - assert f"No family with id ['{fam_import_id}'] found" == e.value.message + assert f"No family with id {fam_import_id} found for document" == e.value.message + + +def test_validate_entity_relationships_when_no_family_matching_event(): + fam_import_id = "test.new.family.0" + test_data = { + "events": [{"import_id": "test.new.event.0", "family_import_id": fam_import_id}] + } + + with pytest.raises(ValidationError) as e: + ingest_service.validate_entity_relationships(test_data) + assert f"No family with id {fam_import_id} found for event" == e.value.message def test_save_documents_when_no_family(): @@ -159,7 +170,7 @@ def test_save_documents_when_no_family(): with pytest.raises(ValidationError) as e: ingest_service.import_data(test_data, "test") - assert f"No family with id ['{fam_import_id}'] found" == e.value.message + assert f"No family with id {fam_import_id} found for document" == e.value.message def test_save_events_when_data_invalid(validation_service_mock):