Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add author to UNFCCC metadata #129

Merged
merged 1 commit into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions app/core/ingestion/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class TaxonomyEntry:
"""Details a single taxonomy field"""

allow_blanks: bool
allow_any: bool
allowed_values: Sequence[str]


Expand All @@ -36,13 +37,17 @@ def build_metadata_field(
row_set = set(ingest_values)
allowed_set: set[str] = set(taxonomy[tax_key].allowed_values)
allow_blanks = taxonomy[tax_key].allow_blanks
allow_any = taxonomy[tax_key].allow_any

if len(row_set) == 0:
if not allow_blanks:
details = f"Row {row_number} is blank for {tax_key} - which is not allowed."
return Result(type=ResultType.ERROR, details=details), []
return Result(), [] # field is blank and allowed

if allow_any:
return Result(), ingest_values

unknown_set = row_set.difference(allowed_set)
if not unknown_set:
return Result(), ingest_values # all is well - everything found
Expand Down
2 changes: 1 addition & 1 deletion app/core/ingestion/unfccc/ingest_row_unfccc.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class UNFCCCDocumentIngestRow(BaseIngestRow):
family_name: str
document_title: str
documents: str
author: str
author: str # METADATA
author_type: str # METADATA
geography: str
geography_iso: str
Expand Down
1 change: 1 addition & 0 deletions app/core/ingestion/unfccc/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


MAP_OF_LIST_VALUES = {
"author": "author",
"author_type": "author_type",
}

Expand Down
4 changes: 3 additions & 1 deletion app/core/organisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def get_organisation_taxonomy_by_name(db: Session, org_name: str) -> TaxonomyDat
# TODO: in the future move these into the MetadataTaxonomy
event_types = db.query(FamilyEventType).all()
entry = TaxonomyEntry(
allow_blanks=False, allowed_values=[r.name for r in event_types]
allow_blanks=False,
allowed_values=[r.name for r in event_types],
allow_any=False,
)

# The above line will throw if there is no taxonomy for the organisation
Expand Down
6 changes: 6 additions & 0 deletions app/data_migrations/taxonomy_unf3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
"allow_blanks": False,
"allowed_values": ["Party", "Non-Party"],
},
{
"key": "author",
"allow_blanks": False,
"allow_any": True,
"allowed_values": [],
},
]


Expand Down
9 changes: 6 additions & 3 deletions app/data_migrations/taxonomy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
}

These functions allow you to reference the values within the json.
See sector_data.json for example each element in the array contains an object where
See sector_data.json for example each element in the array contains an object where
we use the "node.name" as the taxonomy values:

{
Expand All @@ -29,7 +29,7 @@
"children": []
},

This is referenced in the "file_key_path" as the values to be used when a file is
This is referenced in the "file_key_path" as the values to be used when a file is
loaded:

{
Expand Down Expand Up @@ -60,10 +60,13 @@ def _maybe_read(data: dict[str, Any]) -> TaxonomyEntry:
return TaxonomyEntry(
allowed_values=_load_metadata_type(data["filename"], data["file_key_path"]),
allow_blanks=data["allow_blanks"],
allow_any=False,
)
else:
return TaxonomyEntry(
allowed_values=data["allowed_values"], allow_blanks=data["allow_blanks"]
allowed_values=data["allowed_values"],
allow_blanks=data["allow_blanks"],
allow_any=data.get("allow_any", False),
)


Expand Down
14 changes: 14 additions & 0 deletions tests/core/ingestion/test_unfccc_ingest_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from app.db.models.law_policy.collection import CollectionFamily, CollectionOrganisation
from app.db.models.law_policy.family import Family, FamilyEvent
from app.db.models.law_policy.geography import GEO_INTERNATIONAL, GEO_NONE, Geography
from app.db.models.law_policy.metadata import FamilyMetadata

from tests.core.ingestion.helpers import (
populate_for_ingest,
Expand Down Expand Up @@ -77,6 +78,19 @@ def test_ingest_single_collection_and_document(test_db: Session):
result = ingest_unfccc_document_row(test_db, context, document_row)
assert len(result) == 7

test_db_families = test_db.query(Family).all()
assert len(test_db_families) == 1
created_family: Family = test_db_families[0]
created_family_metadata: FamilyMetadata = (
test_db.query(FamilyMetadata)
.filter(FamilyMetadata.family_import_id == created_family.import_id)
.one()
)
assert created_family_metadata.value == {
"author": ["author"],
"author_type": ["Party"],
}


def test_ingest_two_collections_and_document(test_db: Session):
populate_for_ingest(test_db)
Expand Down
7 changes: 6 additions & 1 deletion tests/data_migrations/test_populate_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_populate_taxonomy_unf3c_correct_counts(test_db):
populate_taxonomy(test_db)
taxonomy = get_organisation_taxonomy_by_name(test_db, "UNFCCC")

assert 2 == len(taxonomy)
assert 3 == len(taxonomy)

assert "event_types" in taxonomy
assert 17 == len(taxonomy["event_types"]["allowed_values"])
Expand All @@ -55,3 +55,8 @@ def test_populate_taxonomy_unf3c_correct_counts(test_db):

assert "author_type" in taxonomy
assert 2 == len(taxonomy["author_type"]["allowed_values"])
assert not taxonomy["author_type"]["allow_any"]

assert "author" in taxonomy
assert 0 == len(taxonomy["author"]["allowed_values"])
assert taxonomy["author"]["allow_any"]
2 changes: 1 addition & 1 deletion tests/routes/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def test_config_endpoint_content(client, test_db):

assert "UNFCCC" in response_json["taxonomies"]
unfccc_taxonomy = response_json["taxonomies"]["UNFCCC"]
assert set(unfccc_taxonomy) == {"author_type", "event_types"}
assert set(unfccc_taxonomy) == {"author", "author_type", "event_types"}
assert set(unfccc_taxonomy["author_type"]["allowed_values"]) == {
"Party",
"Non-Party",
Expand Down