Skip to content

Commit

Permalink
For Vespa indexing, add family slug to db-state so it does not need to
Browse files Browse the repository at this point in the history
be calculated later by the backend.
  • Loading branch information
Joel Wright committed Oct 9, 2023
1 parent 1529e0f commit bcfe461
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 6 deletions.
5 changes: 3 additions & 2 deletions app/api/api_v1/schemas/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,13 @@ class DocumentParserInput(BaseModel):
source_url: Optional[str]
download_url: Optional[str]

import_id: str
slug: str
family_import_id: str
family_slug: str

type: str
source: str
import_id: str
family_import_id: str
category: str
geography: str
languages: Sequence[str]
Expand Down
3 changes: 2 additions & 1 deletion app/core/ingestion/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ def generate_pipeline_ingest_input(db: Session) -> Sequence[DocumentParserInput]
category=str(family.family_category),
publication_ts=family.published_date or fallback_date,
import_id=cast(str, family_document.import_id),
slug=cast(str, family_document.slugs[-1].name),
family_import_id=cast(str, family.import_id),
family_slug=cast(str, family.slugs[-1].name),
source_url=(
cast(str, family_document.physical_document.source_url)
if family_document.physical_document is not None
Expand All @@ -50,7 +52,6 @@ def generate_pipeline_ingest_input(db: Session) -> Sequence[DocumentParserInput]
download_url=None,
type=cast(str, family_document.document_type or ""),
source=cast(str, organisation.name),
slug=cast(str, family_document.slugs[-1].name),
geography=cast(str, geography.value),
languages=[
cast(str, lang.name)
Expand Down
3 changes: 2 additions & 1 deletion tests/core/validation/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,9 @@ def test_write_documents_to_s3(test_s3_client, mocker):
type="executive",
source="CCLW",
import_id="1234-5678",
family_import_id="family_1234-5678",
slug="geo_2008_name_1234_5678",
family_import_id="family_1234-5678",
family_slug="geo_2008_family_1234_5679",
category="category",
geography="GEO",
languages=[],
Expand Down
5 changes: 3 additions & 2 deletions tests/routes/test_admin_unfccc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@
"description": "Nationally determined contributions under the Paris Agreement. Revised note by the secretariat, Synthesis Report from UNFCCC Secretariat in 2021",
"source_url": "https://unfccc.int/sites/default/files/resource/cma2021_08r01_S.pdf",
"download_url": "url of downloaded document",
"import_id": "UNFCCC.Document.1.0",
"slug": "Doc-slug",
"family_import_id": "UNFCCC.family.1.0",
"family_slug": "Family-slug",
"type": "Synthesis Report",
"source": "UNFCCC",
"import_id": "UNFCCC.Document.1.0",
"family_import_id": "UNFCCC.family.1.0",
"category": "UNFCCC",
"geography": "GBR",
"languages": [],
Expand Down

0 comments on commit bcfe461

Please sign in to comment.