diff --git a/app/core/ingestion/processor.py b/app/core/ingestion/processor.py index ba89333c..de29407a 100644 --- a/app/core/ingestion/processor.py +++ b/app/core/ingestion/processor.py @@ -34,6 +34,7 @@ validate_unfccc_document_row, ) from app.db.models.app.users import Organisation +from app.db.models.law_policy.geography import GEO_INTERNATIONAL, GEO_NONE _LOGGER = logging.getLogger(__name__) @@ -43,6 +44,16 @@ ProcessFunc = Callable[[IngestContext, _RowType], None] +def parse_csv_geography(csv_geo: str) -> str: + if csv_geo == "": + return GEO_NONE + + if csv_geo == "INT": + return GEO_INTERNATIONAL # Support old style + + return csv_geo + + def build_params_from_cclw(row: CCLWDocumentIngestRow) -> IngestParameters: def add_metadata(db: Session, import_id: str, taxonomy: Taxonomy, taxonomy_id: int): add_cclw_metadata(db, import_id, taxonomy, taxonomy_id, row) @@ -59,7 +70,7 @@ def add_metadata(db: Session, import_id: str, taxonomy: Taxonomy, taxonomy_id: i family_summary=row.family_summary, document_role=row.document_role, document_variant=row.document_variant, - geography_iso=row.geography_iso, + geography_iso=parse_csv_geography(row.geography_iso), documents=row.documents, category=row.category, document_type=row.document_type, @@ -89,7 +100,7 @@ def add_metadata(db: Session, import_id: str, taxonomy: Taxonomy, taxonomy_id: i family_summary=row.family_summary, document_role=row.document_role, document_variant=row.document_variant, - geography_iso=row.geography_iso, + geography_iso=parse_csv_geography(row.geography_iso), documents=row.documents, category=row.category, document_type=row.submission_type, diff --git a/app/core/ingestion/validator.py b/app/core/ingestion/validator.py index 09217714..7dd7d10c 100644 --- a/app/core/ingestion/validator.py +++ b/app/core/ingestion/validator.py @@ -22,6 +22,7 @@ Variant, Geography, ) +from app.db.models.law_policy.geography import GEO_NONE from app.db.session import Base DbTable = Base @@ -104,9 +105,12 @@ def validate_unfccc_document_row( errors.append(result) # validate: geography_iso: str - result = _check_geo_in_db(n, db, row.geography_iso) - if result.type != ResultType.OK: - errors.append(result) + if row.geography_iso != "": + result = _check_geo_in_db(n, db, row.geography_iso) + if result.type != ResultType.OK: + errors.append(result) + else: + row.geography_iso = GEO_NONE # validate: Submission type as document type result = _check_value_in_db( diff --git a/app/data_migrations/data/geo_stats_data.json b/app/data_migrations/data/geo_stats_data.json index c15a1a20..4eb9d082 100644 --- a/app/data_migrations/data/geo_stats_data.json +++ b/app/data_migrations/data/geo_stats_data.json @@ -995,18 +995,6 @@ "visibility_status": "published", "iso": "IDN" }, - { - "name": "International", - "legislative_process": "", - "federal": false, - "federal_details": "", - "political_groups": "", - "global_emissions_percent": null, - "climate_risk_index": null, - "worldbank_income_group": "", - "visibility_status": "draft", - "iso": "INT" - }, { "name": "Iran", "legislative_process": "

The Islamic Republic of Iran has a semi-democratic political system established after the Islamic Revolution of 1979. The political system is based upon governance by an Islamic jurist. The system is comprised of a Supreme Leader, as well as the Executive, Legislative and Judicial branches of power. Although the Constitution affirms the independence of each of the three branches from each other, it states that all three branches are under the direction of the Supreme Leader.

\n

The Supreme Leader is the spiritual leader, commander-in-chief of the armed forces and controls intelligence and security operations. He is elected by the Assembly of Experts on the basis of his personality (leadership abilities, his religious qualifications and his popular esteem). The Supreme Leader appoints the head of the judicial branch, the head of state radio and television, and the supreme commander of the Islamic Revolutionary Guard Corps. He also appoints six of the 12 members of the Council of Guardians, and he designates the members of the Expediency Council.

\n

The head of the Executive branch is the President, who is directly elected by the popular vote to a four-year term, for a maximum of two terms. He is the second highest-ranking official and is responsible for the implementation of the Constitution and for the exercise of executive powers, except for matters directly related to the Supreme Leader. The President appoints and supervises the Council of Ministers. The Government is comprised of the ministers and vice-presidents in charge of different government ministries or organisations. Presently there are 21 ministers and 10 vice-presidents, including the head of the Environmental Protection Organisation. The ministers are accountable to the President as well as to Parliament. The last presidential elections in Iran took place in 2013, and the next elections should be in 2017.

\n

The Parliament has 290 members elected directly for four-year terms and is the main legislative body. It ratifies all laws and international treaties and approves the national Budget. Elections are direct and by secret popular ballot. Each Member of Parliament represents a particular geographic area. The last parliamentary election took place in February 2016, and the next election is expected for 2020.

\n

The upper house, the Council of Guardians, approves all laws ratified by Parliament. Its responsibility is to verify that the law is not in contradiction with Islamic law and the Constitution. They are 12 members; six Islamic Jurists who are Ayatollahs appointed by the Leader, and a further six high level attorneys elected by Parliament. The Council can approve or veto legislation passed by Parliament on the grounds that it is inconsistent with the Constitution and/or Islamic law. Following a veto, Parliament can amend the legislation in order to address the Council\u2019s concerns. If Parliament and the Council of Guardians fail to resolve their differences, the Expediency Council is empowered to make the final decision. The Expediency Council also serves as the Leader\u2019s advisory body to formulate the \u2018general policies\u2019 of the country. These range from economic to environmental, social or judicial matters. The Council\u2019s recommendations become law when they are ratified by the Leader.

", diff --git a/app/data_migrations/data/geography_data.json b/app/data_migrations/data/geography_data.json index d9207f39..d2fc43f0 100644 --- a/app/data_migrations/data/geography_data.json +++ b/app/data_migrations/data/geography_data.json @@ -1443,14 +1443,6 @@ }, "children": [] }, - { - "node": { - "display_value": "International", - "value": "INT", - "type": "ISO-3166" - }, - "children": [] - }, { "node": { "display_value": "Japan", diff --git a/app/data_migrations/data/source/geography-iso-3166.csv b/app/data_migrations/data/source/geography-iso-3166.csv index b0b761f7..406f40e7 100644 --- a/app/data_migrations/data/source/geography-iso-3166.csv +++ b/app/data_migrations/data/source/geography-iso-3166.csv @@ -80,7 +80,6 @@ Hungary,HUN,National,Europe & Central Asia,FALSE,,OECD;EU,0.13%,69,High income Iceland,ISL,National,Europe & Central Asia,FALSE,,OECD,0.01%,170.33,High income India,IND,National,South Asia,TRUE,"29 states, 7 Union Territories, including a National Capital Territory",G77;G20,6.81%,38.67,Lower middle income Indonesia,IDN,National,East Asia & Pacific,FALSE,,G77;G20,4.69%,76.83,Lower middle income -International,INT,Supranational,East Asia & Pacific,FALSE,,,-,, Iran,IRN,National,Middle East & North Africa,FALSE,,G77,1.83%,79,Upper middle income Iraq,IRQ,National,Middle East & North Africa,TRUE,18 provinces,G77;The Arab Group,0.40%,141.33,Upper middle income Ireland,IRL,National,Europe & Central Asia,FALSE,,OECD;EU,0.14%,119.17,High income diff --git a/app/data_migrations/populate_geography.py b/app/data_migrations/populate_geography.py index 8d57a5df..68523808 100644 --- a/app/data_migrations/populate_geography.py +++ b/app/data_migrations/populate_geography.py @@ -5,6 +5,11 @@ from sqlalchemy.orm import Session from app.db.models.law_policy import Geography +from app.db.models.law_policy.geography import ( + CPR_DEFINED_GEOS, + GEO_OTHER, + GeoStatistics, +) from .utils import has_rows, load_tree @@ -19,10 +24,57 @@ def _add_geo_slugs(geo_tree: list[dict[str, dict]]): _add_geo_slugs(child_nodes) +def remove_old_international_geo(db: Session) -> None: + db_international = ( + db.query(Geography).filter(Geography.value == "INT").one_or_none() + ) + if db_international is not None: + db_stats = ( + db.query(GeoStatistics) + .filter(GeoStatistics.geography_id == db_international.id) + .one_or_none() + ) + if db_stats is not None: + db.delete(db_stats) + db.flush() + db.delete(db_international) + db.flush() + + def populate_geography(db: Session) -> None: """Populates the geography table with pre-defined data.""" - if has_rows(db, Geography): + geo_populated = has_rows(db, Geography) + # First ensure our defined entries are present + remove_old_international_geo(db) + + # Add the Other region + other = db.query(Geography).filter(Geography.value == GEO_OTHER).one_or_none() + if other is None: + other = Geography( + display_value=GEO_OTHER, + slug=slugify(GEO_OTHER), + value=GEO_OTHER, + type="ISO-3166 CPR Extension", + ) + db.add(other) + db.flush() + + # Add the CPR geo definitions in Other + for value, description in CPR_DEFINED_GEOS.items(): + db_geo = db.query(Geography).filter(Geography.value == value).one_or_none() + if db_geo is None: + db.add( + Geography( + display_value=description, + slug=slugify(value), + value=value, + type="ISO-3166 CPR Extension", + parent_id=other.id, + ) + ) + + if geo_populated: return with open("app/data_migrations/data/geography_data.json") as geo_data_file: diff --git a/app/db/models/law_policy/geography.py b/app/db/models/law_policy/geography.py index 8b956129..87207c08 100644 --- a/app/db/models/law_policy/geography.py +++ b/app/db/models/law_policy/geography.py @@ -3,6 +3,13 @@ from app.db.session import Base +GEO_NONE = "XAA" +GEO_INTERNATIONAL = "XAB" + +GEO_OTHER = "Other" +CPR_DEFINED_GEOS = {GEO_NONE: "No Geography", GEO_INTERNATIONAL: "International"} + + class Geography(Base): # noqa: D101 """Database model for Geography""" diff --git a/tests/core/ingestion/test_unfccc_ingest_row.py b/tests/core/ingestion/test_unfccc_ingest_row.py index dc3afb01..9aff23a3 100644 --- a/tests/core/ingestion/test_unfccc_ingest_row.py +++ b/tests/core/ingestion/test_unfccc_ingest_row.py @@ -10,35 +10,38 @@ UNFCCCDocumentIngestRow, ) from app.db.models.law_policy.collection import CollectionOrganisation +from app.db.models.law_policy.family import Family +from app.db.models.law_policy.geography import GEO_INTERNATIONAL, GEO_NONE, Geography -# from app.core.ingestion.processor import ingest_unfccc_document_row -# from app.core.ingestion.unfccc.ingest_row_unfccc import UNFCCCDocumentIngestRow -# from app.core.ingestion.utils import UNFCCCIngestContext -# from app.db.models.document.physical_document import PhysicalDocument -# from app.db.models.law_policy.collection import ( -# Collection, -# CollectionFamily, -# CollectionOrganisation, -# ) -# from app.db.models.law_policy.family import ( -# Family, -# FamilyDocument, -# FamilyOrganisation, -# Slug, -# ) from tests.core.ingestion.helpers import ( - # COLLECTION_IMPORT_ID, - # DOCUMENT_IMPORT_ID, - # DOCUMENT_TITLE, - # FAMILY_IMPORT_ID, - # SLUG_DOCUMENT_NAME, - # SLUG_FAMILY_NAME, - # get_doc_ingest_row_data, populate_for_ingest, ) from app.db.models.law_policy import Collection -# FIXME: All this file needs attention + +DOC_ROW = UNFCCCDocumentIngestRow( + row_number=1, + category="UNFCCC", + md5sum="md5sum", + submission_type="Plan", + family_name="family_name", + document_title="document_title", + documents="documents", + author="author", + author_type="Party", + geography="GBR", + geography_iso="GBR", + date=datetime.now(), + document_role="MAIN", + document_variant="Original Language", + language=["en"], + download_url="download_url", + cpr_collection_id="id1", + cpr_document_id="cpr_document_id", + cpr_family_id="cpr_family_id", + cpr_family_slug="cpr_family_slug", + cpr_document_slug="cpr_document_slug", +) def test_ingest_single_collection_and_document(test_db: Session): @@ -67,182 +70,69 @@ def test_ingest_single_collection_and_document(test_db: Session): ) # Act - create document - document_row = UNFCCCDocumentIngestRow( + document_row = DOC_ROW + + result = ingest_unfccc_document_row(test_db, context, document_row) + assert len(result) == 8 + + +def test_ingest_blank_geo(test_db: Session): + populate_for_ingest(test_db) + test_db.commit() + context = initialise_context(test_db, "UNFCCC") + + # Act - create collection + collection_row = CollectonIngestRow( row_number=1, - category="UNFCCC", - md5sum="md5sum", - submission_type="Plan", - family_name="family_name", - document_title="document_title", - documents="documents", - author="author", - author_type="Party", - geography="GBR", - geography_iso="GBR", - date=datetime.now(), - document_role="MAIN", - document_variant="Original Language", - language=["en"], - download_url="download_url", cpr_collection_id="id1", - cpr_document_id="cpr_document_id", - cpr_family_id="cpr_family_id", - cpr_family_slug="cpr_family_slug", - cpr_document_slug="cpr_document_slug", + collection_name="collection-title", + collection_summary="collection-description", ) + result = ingest_collection_row(test_db, context, collection_row) + + # Act - create document + document_row = DOC_ROW + document_row.geography_iso = "" + result = ingest_unfccc_document_row(test_db, context, document_row) assert len(result) == 8 + assert 1 == test_db.query(Family).count() + family = test_db.query(Family).first() + assert family + assert family.geography_id + geo = test_db.query(Geography).get(family.geography_id) + no_geo = test_db.query(Geography).filter(Geography.value == GEO_NONE).one() + assert geo == no_geo + -# def setup_for_update(test_db): -# context = UNFCCCIngestContext() -# row = UNFCCCDocumentIngestRow.from_row(1, get_doc_ingest_row_data(0)) -# populate_for_ingest(test_db) -# ingest_unfccc_document_row(test_db, context, row) -# return context, row - - -# def assert_dfc(db: Session, n_docs: int, n_families: int, n_collections: int): -# assert n_docs == db.query(FamilyDocument).count() -# assert n_docs == db.query(PhysicalDocument).count() -# assert n_families == db.query(Family).count() -# assert n_collections == db.query(Collection).count() - - -# def test_ingest_row__with_multiple_rows(test_db: Session): -# context = UNFCCCIngestContext() -# row = UNFCCCDocumentIngestRow.from_row(1, get_doc_ingest_row_data(0)) -# row.cpr_family_id = "UNFCCC.family.test.1" -# row.cpr_family_slug = "fam-test-1" -# populate_for_ingest(test_db) - -# # First row -# result = ingest_unfccc_document_row(test_db, context, row) -# assert 9 == len(result.keys()) -# assert_dfc(test_db, 1, 1, 1) - -# # Second row - adds another document to family -# row.cpr_document_id = "UNFCCC.doc.test.1" -# row.cpr_document_slug = "doc-test-1" -# result = ingest_unfccc_document_row(test_db, context, row) -# assert 3 == len(result.keys()) -# assert_dfc(test_db, 2, 1, 1) - -# # Third row - adds another family and document -# row.cpr_family_id = "UNFCCC.family.test.2" -# row.cpr_family_slug = "fam-test-2" -# row.cpr_document_id = "UNFCCC.doc.test.2" -# row.cpr_document_slug = "doc-test-2" -# result = ingest_unfccc_document_row(test_db, context, row) -# assert 7 == len(result.keys()) -# assert_dfc(test_db, 3, 2, 1) - -# # Forth - adds another document to the family -# row.cpr_document_id = "UNFCCC.doc.test.3" -# row.cpr_document_slug = "doc-test-3" -# result = ingest_unfccc_document_row(test_db, context, row) -# assert 3 == len(result.keys()) -# assert_dfc(test_db, 4, 2, 1) - -# # Finally change the family id of the document just added -# row.cpr_family_id = "UNFCCC.family.test.1" -# row.cpr_family_slug = "fam-test-1" -# result = ingest_unfccc_document_row(test_db, context, row) -# assert 1 == len(result.keys()) -# assert_dfc(test_db, 4, 2, 1) - -# # Now assert both families have correct documents -# assert ( -# 3 -# == test_db.query(FamilyDocument) -# .filter_by(family_import_id="UNFCCC.family.test.1") -# .count() -# ) -# assert ( -# 1 -# == test_db.query(FamilyDocument) -# .filter_by(family_import_id="UNFCCC.family.test.2") -# .count() -# ) - -# # Now assert collection has 2 families -# assert 1 == test_db.query(Collection).count() -# assert 2 == test_db.query(CollectionFamily).count() - - -# def test_ingest_row__creates_missing_documents(test_db: Session): -# context = UNFCCCIngestContext() -# row = UNFCCCDocumentIngestRow.from_row(1, get_doc_ingest_row_data(0)) -# populate_for_ingest(test_db) -# result = ingest_unfccc_document_row(test_db, context, row) -# actual_keys = set(result.keys()) -# expected_keys = set( -# [ -# "family_slug", -# "family_organisation", -# "family", -# "physical_document", -# "family_document", -# "family_document_slug", -# "collection", -# "collection_organisation", -# "collection_family", -# ] -# ) -# assert actual_keys.symmetric_difference(expected_keys) == set([]) -# # Assert db objects -# assert test_db.query(Slug).filter_by(name=SLUG_FAMILY_NAME).one() -# assert ( -# test_db.query(FamilyOrganisation) -# .filter_by(family_import_id=FAMILY_IMPORT_ID) -# .one() -# ) -# assert test_db.query(Family).filter_by(import_id=FAMILY_IMPORT_ID).one() -# assert test_db.query(PhysicalDocument).filter_by(title=DOCUMENT_TITLE).one() -# assert test_db.query(FamilyDocument).filter_by(import_id=DOCUMENT_IMPORT_ID).one() -# assert test_db.query(Slug).filter_by(name=SLUG_DOCUMENT_NAME).one() -# assert test_db.query(Collection).filter_by(import_id=COLLECTION_IMPORT_ID).one() -# assert ( -# test_db.query(CollectionOrganisation) -# .filter_by(collection_import_id=COLLECTION_IMPORT_ID) -# .one() -# ) -# assert ( -# test_db.query(CollectionFamily) -# .filter_by( -# collection_import_id=COLLECTION_IMPORT_ID, family_import_id=FAMILY_IMPORT_ID -# ) -# .one() -# ) - - -# def test_ingest_row__idempotent(test_db: Session): -# context, row = setup_for_update(test_db) - -# result = ingest_unfccc_document_row(test_db, context, row) -# assert len(result) == 0 - -# # Assert db objects -# assert test_db.query(Slug).filter_by(name=SLUG_FAMILY_NAME).one() -# assert ( -# test_db.query(FamilyOrganisation) -# .filter_by(family_import_id=FAMILY_IMPORT_ID) -# .one() -# ) -# assert test_db.query(Family).filter_by(import_id=FAMILY_IMPORT_ID).one() -# assert test_db.query(PhysicalDocument).filter_by(title=DOCUMENT_TITLE).one() -# assert test_db.query(FamilyDocument).filter_by(import_id=DOCUMENT_IMPORT_ID).one() -# assert test_db.query(Slug).filter_by(name=SLUG_DOCUMENT_NAME).one() -# assert test_db.query(Collection).filter_by(import_id=COLLECTION_IMPORT_ID).one() -# assert ( -# test_db.query(CollectionOrganisation) -# .filter_by(collection_import_id=COLLECTION_IMPORT_ID) -# .one() -# ) -# assert ( -# test_db.query(CollectionFamily) -# .filter_by( -# collection_import_id=COLLECTION_IMPORT_ID, family_import_id=FAMILY_IMPORT_ID -# ) -# .one() -# ) +def test_ingest_international_geo(test_db: Session): + populate_for_ingest(test_db) + test_db.commit() + context = initialise_context(test_db, "UNFCCC") + + # Act - create collection + collection_row = CollectonIngestRow( + row_number=1, + cpr_collection_id="id1", + collection_name="collection-title", + collection_summary="collection-description", + ) + result = ingest_collection_row(test_db, context, collection_row) + + # Act - create document + document_row = DOC_ROW + document_row.geography_iso = "INT" + + result = ingest_unfccc_document_row(test_db, context, document_row) + assert len(result) == 8 + + assert 1 == test_db.query(Family).count() + family = test_db.query(Family).first() + assert family + assert family.geography_id + geo = test_db.query(Geography).get(family.geography_id) + international = ( + test_db.query(Geography).filter(Geography.value == GEO_INTERNATIONAL).one() + ) + assert geo == international diff --git a/tests/routes/test_config.py b/tests/routes/test_config.py index 48e69e94..4ef3c6b8 100644 --- a/tests/routes/test_config.py +++ b/tests/routes/test_config.py @@ -31,7 +31,7 @@ def test_endpoint_returns_taxonomy(client, test_db): assert len(response_json) == 2 assert "geographies" in response_json - assert len(response_json["geographies"]) == 7 + assert len(response_json["geographies"]) == 8 assert "taxonomies" in response_json assert "CCLW" in response_json["taxonomies"] diff --git a/tests/test_schema/test_inital_data.py b/tests/test_schema/test_inital_data.py index 500a7fdd..20fd8951 100644 --- a/tests/test_schema/test_inital_data.py +++ b/tests/test_schema/test_inital_data.py @@ -15,6 +15,6 @@ def test_initial_data_populates_tables(engine): language_count = db.execute("SELECT count(*) FROM language;").scalar() geo_stats_count = db.execute("SELECT count(*) FROM geo_statistics;").scalar() - assert geo_count == 210 + assert geo_count == 212 assert language_count == 7893 - assert geo_stats_count == 202 + assert geo_stats_count == 201