diff --git a/app/api/api_v1/schemas/document.py b/app/api/api_v1/schemas/document.py index f532e3b6..13918b60 100644 --- a/app/api/api_v1/schemas/document.py +++ b/app/api/api_v1/schemas/document.py @@ -121,12 +121,13 @@ class DocumentParserInput(BaseModel): source_url: Optional[str] download_url: Optional[str] + import_id: str slug: str + family_import_id: str + family_slug: str type: str source: str - import_id: str - family_import_id: str category: str geography: str languages: Sequence[str] diff --git a/app/core/ingestion/pipeline.py b/app/core/ingestion/pipeline.py index de6163a9..932c9267 100644 --- a/app/core/ingestion/pipeline.py +++ b/app/core/ingestion/pipeline.py @@ -41,7 +41,9 @@ def generate_pipeline_ingest_input(db: Session) -> Sequence[DocumentParserInput] category=str(family.family_category), publication_ts=family.published_date or fallback_date, import_id=cast(str, family_document.import_id), + slug=cast(str, family_document.slugs[-1].name), family_import_id=cast(str, family.import_id), + family_slug=cast(str, family.slugs[-1].name), source_url=( cast(str, family_document.physical_document.source_url) if family_document.physical_document is not None @@ -50,7 +52,6 @@ def generate_pipeline_ingest_input(db: Session) -> Sequence[DocumentParserInput] download_url=None, type=cast(str, family_document.document_type or ""), source=cast(str, organisation.name), - slug=cast(str, family_document.slugs[-1].name), geography=cast(str, geography.value), languages=[ cast(str, lang.name) diff --git a/tests/core/validation/test_util.py b/tests/core/validation/test_util.py index 03994549..df729680 100644 --- a/tests/core/validation/test_util.py +++ b/tests/core/validation/test_util.py @@ -87,8 +87,9 @@ def test_write_documents_to_s3(test_s3_client, mocker): type="executive", source="CCLW", import_id="1234-5678", - family_import_id="family_1234-5678", slug="geo_2008_name_1234_5678", + family_import_id="family_1234-5678", + family_slug="geo_2008_family_1234_5679", category="category", geography="GEO", languages=[], diff --git a/tests/routes/test_admin_unfccc.py b/tests/routes/test_admin_unfccc.py index 23e87611..1b1ecdd4 100644 --- a/tests/routes/test_admin_unfccc.py +++ b/tests/routes/test_admin_unfccc.py @@ -23,11 +23,12 @@ "description": "Nationally determined contributions under the Paris Agreement. Revised note by the secretariat, Synthesis Report from UNFCCC Secretariat in 2021", "source_url": "https://unfccc.int/sites/default/files/resource/cma2021_08r01_S.pdf", "download_url": "url of downloaded document", + "import_id": "UNFCCC.Document.1.0", "slug": "Doc-slug", + "family_import_id": "UNFCCC.family.1.0", + "family_slug": "Family-slug", "type": "Synthesis Report", "source": "UNFCCC", - "import_id": "UNFCCC.Document.1.0", - "family_import_id": "UNFCCC.family.1.0", "category": "UNFCCC", "geography": "GBR", "languages": [],