Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix multi-collection unfccc ingest rows #118

Merged
merged 4 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 60 additions & 36 deletions app/core/ingestion/collection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Optional
from typing import Any, Optional, cast

from sqlalchemy.orm import Session
from app.core.ingestion.params import IngestParameters
Expand All @@ -13,6 +13,29 @@
from app.db.models.law_policy.collection import CollectionFamily, CollectionOrganisation


def handle_cclw_collection_and_link(
db: Session,
params: IngestParameters,
org_id: int,
family_import_id: str,
result: dict[str, Any],
) -> Optional[Collection]:
collection = handle_create_collection(
db,
params.cpr_collection_ids[0], # Only every one for CCLW
params.collection_name,
params.collection_summary,
org_id,
result,
)

if collection is not None:
handle_link_collection_to_family(
db, params.cpr_collection_ids, cast(str, family_import_id), result
)
return collection


def create_collection(
db: Session,
row: CollectionIngestRow,
Expand Down Expand Up @@ -65,11 +88,12 @@ def create_collection(
)


def handle_collection_and_link(
def handle_create_collection(
db: Session,
params: IngestParameters,
collection_id: str,
collection_name: str,
collection_summary: str,
org_id: int,
family_import_id: str,
result: dict[str, Any],
) -> Optional[Collection]:
"""
Expand All @@ -85,34 +109,27 @@ def handle_collection_and_link(
:param [dict[str, Any]]: a result dict in which to record what was created.
:return [Collection | None]: A collection if one was created, otherwise None.
"""
if not params.cpr_collection_id or params.cpr_collection_id == "n/a":
if not collection_id or collection_id == "n/a":
return None

# First check for the actual collection
existing_collection = (
db.query(Collection)
.filter(Collection.import_id == params.cpr_collection_id)
.one_or_none()
db.query(Collection).filter(Collection.import_id == collection_id).one_or_none()
)

if existing_collection is None:
if params.create_collections is False:
id = params.cpr_collection_id
msg = f"Collection {id} is not pre-exsiting so not linking"
raise ValueError(msg)

collection = create(
db,
Collection,
import_id=params.cpr_collection_id,
title=params.collection_name,
extra={"description": params.collection_summary},
import_id=collection_id,
title=collection_name,
extra={"description": collection_summary},
)

collection_organisation = create(
db,
CollectionOrganisation,
collection_import_id=collection.import_id,
collection_import_id=collection_id,
organisation_id=org_id,
)

Expand All @@ -121,30 +138,37 @@ def handle_collection_and_link(
else:
collection = existing_collection
updated = {}
update_if_changed(updated, "title", params.collection_name, collection)
update_if_changed(updated, "description", params.collection_summary, collection)
update_if_changed(updated, "title", collection_name, collection)
update_if_changed(updated, "description", collection_summary, collection)
if len(updated) > 0:
result["collection"] = updated
db.add(collection)
db.flush()

# Second check for the family - collection link
existing_link = (
db.query(CollectionFamily)
.filter_by(
collection_import_id=params.cpr_collection_id,
family_import_id=params.cpr_family_id,
)
.one_or_none()
)
return collection

if existing_link is None:
collection_family = create(
db,
CollectionFamily,
collection_import_id=collection.import_id,
family_import_id=family_import_id,

def handle_link_collection_to_family(
db: Session,
collection_ids: list[str],
family_import_id: str,
result: dict[str, Any],
) -> None:
for collection_id in collection_ids:
existing_link = (
db.query(CollectionFamily)
.filter_by(
collection_import_id=collection_id,
family_import_id=family_import_id,
)
.one_or_none()
)
result["collection_family"] = to_dict(collection_family)

return collection
if existing_link is None:
collection_family = create(
db,
CollectionFamily,
collection_import_id=collection_id,
family_import_id=family_import_id,
)
result["collection_family"] = to_dict(collection_family)
2 changes: 1 addition & 1 deletion app/core/ingestion/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ class IngestParameters:
geography: str
cpr_document_id: str
cpr_family_id: str
cpr_collection_id: str
cpr_collection_ids: list[str]
cpr_family_slug: str
cpr_document_slug: str
23 changes: 8 additions & 15 deletions app/core/ingestion/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from sqlalchemy.orm import Session
from app.core.ingestion.collection import (
create_collection,
handle_collection_and_link,
handle_cclw_collection_and_link,
handle_link_collection_to_family,
)
from app.core.ingestion.cclw.event import family_event_from_row
from app.core.ingestion.family import handle_family_from_params
Expand Down Expand Up @@ -78,7 +79,7 @@ def add_metadata(db: Session, import_id: str, taxonomy: Taxonomy, taxonomy_id: i
geography=row.geography,
cpr_document_id=row.cpr_document_id,
cpr_family_id=row.cpr_family_id,
cpr_collection_id=row.cpr_collection_id,
cpr_collection_ids=[row.cpr_collection_id],
cpr_family_slug=row.cpr_family_slug,
cpr_document_slug=row.cpr_document_slug,
)
Expand Down Expand Up @@ -108,7 +109,7 @@ def add_metadata(db: Session, import_id: str, taxonomy: Taxonomy, taxonomy_id: i
geography=row.geography,
cpr_document_id=row.cpr_document_id,
cpr_family_id=row.cpr_family_id,
cpr_collection_id=row.cpr_collection_id,
cpr_collection_ids=row.cpr_collection_id,
cpr_family_slug=row.cpr_family_slug,
cpr_document_slug=row.cpr_document_slug,
)
Expand Down Expand Up @@ -138,12 +139,8 @@ def ingest_cclw_document_row(
)
params = build_params_from_cclw(row)
family = handle_family_from_params(db, params, context.org_id, result)
handle_collection_and_link(
db,
params,
context.org_id,
cast(str, family.import_id),
result,
handle_cclw_collection_and_link(
db, params, context.org_id, cast(str, family.import_id), result
)

_LOGGER.info(
Expand Down Expand Up @@ -181,12 +178,8 @@ def ingest_unfccc_document_row(

params = build_params_from_unfccc(row)
family = handle_family_from_params(db, params, context.org_id, result)
handle_collection_and_link(
db,
params,
context.org_id,
cast(str, family.import_id),
result,
handle_link_collection_to_family(
db, params.cpr_collection_ids, cast(str, family.import_id), result
)

ctx = cast(UNFCCCIngestContext, context)
Expand Down
3 changes: 1 addition & 2 deletions app/core/ingestion/unfccc/ingest_row_unfccc.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,14 @@ class UNFCCCDocumentIngestRow(BaseIngestRow):
document_variant: str
language: list[str]

cpr_collection_id: str
cpr_collection_id: list[str]
cpr_document_id: str
cpr_family_id: str
cpr_family_slug: str
cpr_document_slug: str
cpr_document_status: str
download_url: str

# FIXME: Where is the summary from?
family_summary: str = "summary"

VALID_COLUMNS: ClassVar[set[str]] = VALID_DOCUMENT_COLUMN_NAMES
Expand Down
4 changes: 2 additions & 2 deletions app/core/ingestion/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ def validate_unfccc_document_row(
errors,
)

# Add to the collections that are referenced so we can valiate later
context.collection_ids_referenced.append(row.cpr_collection_id)
# Add to the collections that are referenced so we can validate later
context.collection_ids_referenced.extend(row.cpr_collection_id)

if len(errors) > 0:
context.results += errors
Expand Down
14 changes: 8 additions & 6 deletions tests/core/ingestion/test_collection.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import cast
from sqlalchemy.orm import Session
from app.core.ingestion.collection import handle_collection_and_link
from app.core.ingestion.cclw.ingest_row_cclw import CCLWDocumentIngestRow
from app.core.ingestion.processor import build_params_from_cclw
from app.core.ingestion.processor import (
build_params_from_cclw,
handle_cclw_collection_and_link,
)
from app.core.ingestion.utils import get_or_create
from app.db.models.law_policy.collection import (
Collection,
Expand Down Expand Up @@ -43,7 +45,7 @@ def test_handle_collection_from_row__creates(test_db: Session):
result = {}
row, family = db_setup(test_db)

collection = handle_collection_and_link(
collection = handle_cclw_collection_and_link(
test_db, build_params_from_cclw(row), 1, cast(str, family.import_id), result
)
assert collection
Expand Down Expand Up @@ -76,7 +78,7 @@ def test_handle_collection_from_row__updates(test_db: Session):
first_result = {}
row, family = db_setup(test_db)

handle_collection_and_link(
handle_cclw_collection_and_link(
test_db,
build_params_from_cclw(row),
1,
Expand All @@ -87,7 +89,7 @@ def test_handle_collection_from_row__updates(test_db: Session):
result = {}
row.collection_name = "new name"
row.collection_summary = "new summary"
collection = handle_collection_and_link(
collection = handle_cclw_collection_and_link(
test_db, build_params_from_cclw(row), 1, cast(str, family.import_id), result
)
assert collection
Expand All @@ -112,7 +114,7 @@ def test_handle_collection_from_row__ignores(test_db: Session):
row, family = db_setup(test_db)
row.cpr_collection_id = "n/a"

collection = handle_collection_and_link(
collection = handle_cclw_collection_and_link(
test_db, build_params_from_cclw(row), 1, cast(str, family.import_id), result
)

Expand Down
Loading