diff --git a/app/core/ingestion/metadata.py b/app/core/ingestion/metadata.py index 75b6e71c..ca957b0d 100644 --- a/app/core/ingestion/metadata.py +++ b/app/core/ingestion/metadata.py @@ -12,6 +12,7 @@ class TaxonomyEntry: """Details a single taxonomy field""" allow_blanks: bool + allow_any: bool allowed_values: Sequence[str] @@ -36,6 +37,7 @@ def build_metadata_field( row_set = set(ingest_values) allowed_set: set[str] = set(taxonomy[tax_key].allowed_values) allow_blanks = taxonomy[tax_key].allow_blanks + allow_any = taxonomy[tax_key].allow_any if len(row_set) == 0: if not allow_blanks: @@ -43,6 +45,9 @@ def build_metadata_field( return Result(type=ResultType.ERROR, details=details), [] return Result(), [] # field is blank and allowed + if allow_any: + return Result(), ingest_values + unknown_set = row_set.difference(allowed_set) if not unknown_set: return Result(), ingest_values # all is well - everything found diff --git a/app/core/ingestion/unfccc/ingest_row_unfccc.py b/app/core/ingestion/unfccc/ingest_row_unfccc.py index a4fe9ef6..fe831edc 100644 --- a/app/core/ingestion/unfccc/ingest_row_unfccc.py +++ b/app/core/ingestion/unfccc/ingest_row_unfccc.py @@ -46,7 +46,7 @@ class UNFCCCDocumentIngestRow(BaseIngestRow): family_name: str document_title: str documents: str - author: str + author: str # METADATA author_type: str # METADATA geography: str geography_iso: str diff --git a/app/core/ingestion/unfccc/metadata.py b/app/core/ingestion/unfccc/metadata.py index eada7baf..ec76dd35 100644 --- a/app/core/ingestion/unfccc/metadata.py +++ b/app/core/ingestion/unfccc/metadata.py @@ -8,6 +8,7 @@ MAP_OF_LIST_VALUES = { + "author": "author", "author_type": "author_type", } diff --git a/app/core/organisation.py b/app/core/organisation.py index b30835ac..6d88da72 100644 --- a/app/core/organisation.py +++ b/app/core/organisation.py @@ -50,7 +50,9 @@ def get_organisation_taxonomy_by_name(db: Session, org_name: str) -> TaxonomyDat # TODO: in the future move these into the MetadataTaxonomy event_types = db.query(FamilyEventType).all() entry = TaxonomyEntry( - allow_blanks=False, allowed_values=[r.name for r in event_types] + allow_blanks=False, + allowed_values=[r.name for r in event_types], + allow_any=False, ) # The above line will throw if there is no taxonomy for the organisation diff --git a/app/data_migrations/taxonomy_unf3c.py b/app/data_migrations/taxonomy_unf3c.py index 1003f419..2012afde 100644 --- a/app/data_migrations/taxonomy_unf3c.py +++ b/app/data_migrations/taxonomy_unf3c.py @@ -7,6 +7,12 @@ "allow_blanks": False, "allowed_values": ["Party", "Non-Party"], }, + { + "key": "author", + "allow_blanks": False, + "allow_any": True, + "allowed_values": [], + }, ] diff --git a/app/data_migrations/taxonomy_utils.py b/app/data_migrations/taxonomy_utils.py index 5c65cd42..88bb04e4 100644 --- a/app/data_migrations/taxonomy_utils.py +++ b/app/data_migrations/taxonomy_utils.py @@ -17,7 +17,7 @@ } These functions allow you to reference the values within the json. -See sector_data.json for example each element in the array contains an object where +See sector_data.json for example each element in the array contains an object where we use the "node.name" as the taxonomy values: { @@ -29,7 +29,7 @@ "children": [] }, -This is referenced in the "file_key_path" as the values to be used when a file is +This is referenced in the "file_key_path" as the values to be used when a file is loaded: { @@ -60,10 +60,13 @@ def _maybe_read(data: dict[str, Any]) -> TaxonomyEntry: return TaxonomyEntry( allowed_values=_load_metadata_type(data["filename"], data["file_key_path"]), allow_blanks=data["allow_blanks"], + allow_any=False, ) else: return TaxonomyEntry( - allowed_values=data["allowed_values"], allow_blanks=data["allow_blanks"] + allowed_values=data["allowed_values"], + allow_blanks=data["allow_blanks"], + allow_any=data.get("allow_any", False), ) diff --git a/tests/core/ingestion/test_unfccc_ingest_row.py b/tests/core/ingestion/test_unfccc_ingest_row.py index 28a9ab8f..2b06810d 100644 --- a/tests/core/ingestion/test_unfccc_ingest_row.py +++ b/tests/core/ingestion/test_unfccc_ingest_row.py @@ -13,6 +13,7 @@ from app.db.models.law_policy.collection import CollectionFamily, CollectionOrganisation from app.db.models.law_policy.family import Family, FamilyEvent from app.db.models.law_policy.geography import GEO_INTERNATIONAL, GEO_NONE, Geography +from app.db.models.law_policy.metadata import FamilyMetadata from tests.core.ingestion.helpers import ( populate_for_ingest, @@ -77,6 +78,19 @@ def test_ingest_single_collection_and_document(test_db: Session): result = ingest_unfccc_document_row(test_db, context, document_row) assert len(result) == 7 + test_db_families = test_db.query(Family).all() + assert len(test_db_families) == 1 + created_family: Family = test_db_families[0] + created_family_metadata: FamilyMetadata = ( + test_db.query(FamilyMetadata) + .filter(FamilyMetadata.family_import_id == created_family.import_id) + .one() + ) + assert created_family_metadata.value == { + "author": ["author"], + "author_type": ["Party"], + } + def test_ingest_two_collections_and_document(test_db: Session): populate_for_ingest(test_db) diff --git a/tests/data_migrations/test_populate_taxonomy.py b/tests/data_migrations/test_populate_taxonomy.py index b8cf9893..a4e89e17 100644 --- a/tests/data_migrations/test_populate_taxonomy.py +++ b/tests/data_migrations/test_populate_taxonomy.py @@ -46,7 +46,7 @@ def test_populate_taxonomy_unf3c_correct_counts(test_db): populate_taxonomy(test_db) taxonomy = get_organisation_taxonomy_by_name(test_db, "UNFCCC") - assert 2 == len(taxonomy) + assert 3 == len(taxonomy) assert "event_types" in taxonomy assert 17 == len(taxonomy["event_types"]["allowed_values"]) @@ -55,3 +55,8 @@ def test_populate_taxonomy_unf3c_correct_counts(test_db): assert "author_type" in taxonomy assert 2 == len(taxonomy["author_type"]["allowed_values"]) + assert not taxonomy["author_type"]["allow_any"] + + assert "author" in taxonomy + assert 0 == len(taxonomy["author"]["allowed_values"]) + assert taxonomy["author"]["allow_any"] diff --git a/tests/routes/test_config.py b/tests/routes/test_config.py index 70fd6c8f..30f4aa4a 100644 --- a/tests/routes/test_config.py +++ b/tests/routes/test_config.py @@ -80,7 +80,7 @@ def test_config_endpoint_content(client, test_db): assert "UNFCCC" in response_json["taxonomies"] unfccc_taxonomy = response_json["taxonomies"]["UNFCCC"] - assert set(unfccc_taxonomy) == {"author_type", "event_types"} + assert set(unfccc_taxonomy) == {"author", "author_type", "event_types"} assert set(unfccc_taxonomy["author_type"]["allowed_values"]) == { "Party", "Non-Party",