Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDCT 395/only add counters #168

Merged
merged 10 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions alembic/versions/0019_add_entity_counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""add entity counters

Revision ID: 0019
Revises: 0018
Create Date: 2023-10-02 11:32:43.825217

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '0019'
down_revision = '0018'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('entity_counter',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('description', sa.String(), nullable=False),
sa.Column('prefix', sa.String(), nullable=False),
sa.Column('counter', sa.Integer(), nullable=True),
sa.CheckConstraint("prefix IN ('CCLW','UNFCCC')", name=op.f('ck_entity_counter__prefix_allowed_orgs')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_entity_counter')),
sa.UniqueConstraint('prefix', name=op.f('uq_entity_counter__prefix'))
)
# ### end Alembic commands ###



def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('entity_counter')
# ### end Alembic commands ###
7 changes: 4 additions & 3 deletions app/api/api_v1/routers/cclw_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
write_documents_to_s3,
write_ingest_results_to_s3,
)
from app.db.models.app import ORGANISATION_CCLW
from app.db.session import get_db

_LOGGER = logging.getLogger(__name__)
Expand All @@ -63,7 +64,7 @@ def _start_ingest(
context = None
# TODO: add a way for a user to monitor progress of the ingest
try:
context = initialise_context(db, "CCLW")
context = initialise_context(db, ORGANISATION_CCLW)
document_ingestor = get_cclw_document_ingestor(db, context)
read(documents_file_contents, context, CCLWDocumentIngestRow, document_ingestor)
event_ingestor = get_event_ingestor(db)
Expand Down Expand Up @@ -135,7 +136,7 @@ def validate_law_policy(
)

try:
context = initialise_context(db, "CCLW")
context = initialise_context(db, ORGANISATION_CCLW)
except Exception as e:
_LOGGER.exception(
"Failed to create ingest context", extra={"props": {"errors": str(e)}}
Expand Down Expand Up @@ -206,7 +207,7 @@ def ingest_law_policy(
)

try:
context = initialise_context(db, "CCLW")
context = initialise_context(db, ORGANISATION_CCLW)
except Exception as e:
_LOGGER.exception(
"Failed to create ingest context", extra={"props": {"errors": str(e)}}
Expand Down
7 changes: 4 additions & 3 deletions app/api/api_v1/routers/unfccc_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
write_documents_to_s3,
write_ingest_results_to_s3,
)
from app.db.models.app import ORGANISATION_UNFCCC
from app.db.session import get_db

_LOGGER = logging.getLogger(__name__)
Expand All @@ -63,7 +64,7 @@ def start_unfccc_ingest(
context = None
# TODO: add a way for a user to monitor progress of the ingest
try:
context = initialise_context(db, "UNFCCC")
context = initialise_context(db, ORGANISATION_UNFCCC)
# First the collections....
collection_ingestor = get_collection_ingestor(db)
read(
Expand Down Expand Up @@ -149,7 +150,7 @@ def validate_unfccc_law_policy(
)

try:
context = initialise_context(db, "UNFCCC")
context = initialise_context(db, ORGANISATION_UNFCCC)
except Exception as e:
_LOGGER.exception(
"Failed to create ingest context", extra={"props": {"errors": str(e)}}
Expand Down Expand Up @@ -230,7 +231,7 @@ def ingest_unfccc_law_policy(
)

try:
context = initialise_context(db, "UNFCCC")
context = initialise_context(db, ORGANISATION_UNFCCC)
except Exception as e:
_LOGGER.exception(
"Failed to create ingest context", extra={"props": {"errors": str(e)}}
Expand Down
9 changes: 5 additions & 4 deletions app/core/ingestion/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
validate_cclw_document_row,
validate_unfccc_document_row,
)
from app.db.models.app import ORGANISATION_CCLW, ORGANISATION_UNFCCC
from app.db.models.app.users import Organisation
from app.db.models.law_policy.geography import GEO_INTERNATIONAL, GEO_NONE

Expand Down Expand Up @@ -235,11 +236,11 @@ def initialise_context(db: Session, org_name: str) -> IngestContext:
"""
with db.begin():
organisation = db.query(Organisation).filter_by(name=org_name).one()
if org_name == "CCLW":
if org_name == ORGANISATION_CCLW:
return CCLWIngestContext(
org_name=org_name, org_id=cast(int, organisation.id), results=[]
)
if org_name == "UNFCCC":
if org_name == ORGANISATION_UNFCCC:
return UNFCCCIngestContext(
org_name=org_name, org_id=cast(int, organisation.id), results=[]
)
Expand Down Expand Up @@ -366,9 +367,9 @@ def unfccc_process(context: IngestContext, row: UNFCCCDocumentIngestRow) -> None
row=row,
)

if context.org_name == "CCLW":
if context.org_name == ORGANISATION_CCLW:
return cclw_process
elif context.org_name == "UNFCCC":
elif context.org_name == ORGANISATION_UNFCCC:
return unfccc_process

raise ValueError(f"Unknown org {context.org_name} for validation.")
5 changes: 3 additions & 2 deletions app/core/ingestion/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass
import enum
from typing import Any, Callable, Optional, TypeVar, cast
from app.db.models.app import ORGANISATION_CCLW, ORGANISATION_UNFCCC
from app.db.session import AnyModel
from sqlalchemy.orm import Session

Expand Down Expand Up @@ -229,7 +230,7 @@ class UNFCCCIngestContext(IngestContext):
consistency_validator: ConsistencyValidator
download_urls: dict[str, str] # import_id -> url

def __init__(self, org_name="UNFCCC", org_id=2, results=None):
def __init__(self, org_name=ORGANISATION_UNFCCC, org_id=2, results=None):
self.collection_ids_defined = []
self.collection_ids_referenced = []
self.consistency_validator = ConsistencyValidator()
Expand All @@ -245,7 +246,7 @@ class CCLWIngestContext(IngestContext):

consistency_validator: ConsistencyValidator

def __init__(self, org_name="CCLW", org_id=1, results=None):
def __init__(self, org_name=ORGANISATION_CCLW, org_id=1, results=None):
self.consistency_validator = ConsistencyValidator()
self.org_name = org_name
self.org_id = org_id
Expand Down
1 change: 1 addition & 0 deletions app/data_migrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .populate_geography import populate_geography
from .populate_language import populate_language
from .populate_taxonomy import populate_taxonomy
from .populate_counters import populate_counters
23 changes: 23 additions & 0 deletions app/data_migrations/populate_counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from sqlalchemy.orm import Session

from app.db.models.app.counters import (
ORGANISATION_CCLW,
ORGANISATION_UNFCCC,
EntityCounter,
)


def populate_counters(db: Session):
n_rows = db.query(EntityCounter).count()
if n_rows == 0:
db.add(
EntityCounter(
prefix=ORGANISATION_CCLW, description="Counter for CCLW entities"
)
)
db.add(
EntityCounter(
prefix=ORGANISATION_UNFCCC, description="Counter for UNFCCC entities"
)
)
db.commit()
5 changes: 3 additions & 2 deletions app/data_migrations/populate_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from sqlalchemy.orm import Session
from app.data_migrations.taxonomy_cclw import get_cclw_taxonomy
from app.data_migrations.taxonomy_unf3c import get_unf3c_taxonomy
from app.db.models.app import ORGANISATION_CCLW, ORGANISATION_UNFCCC

from app.db.models.app.users import Organisation
from app.db.models.law_policy.metadata import MetadataOrganisation, MetadataTaxonomy
Expand Down Expand Up @@ -53,14 +54,14 @@ def populate_org_taxonomy(
def populate_taxonomy(db: Session) -> None:
populate_org_taxonomy(
db,
org_name="CCLW",
org_name=ORGANISATION_CCLW,
org_type="Academic",
description="Climate Change Laws of the World",
fn_get_taxonomy=get_cclw_taxonomy,
)
populate_org_taxonomy(
db,
org_name="UNFCCC",
org_name=ORGANISATION_UNFCCC,
org_type="UN",
description="United Nations Framework Convention on Climate Change",
fn_get_taxonomy=get_unf3c_taxonomy,
Expand Down
1 change: 1 addition & 0 deletions app/db/models/app/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .users import AppUser, OrganisationUser, Organisation
from .counters import EntityCounter, ORGANISATION_CCLW, ORGANISATION_UNFCCC
107 changes: 107 additions & 0 deletions app/db/models/app/counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Schema for counters.

The following section includes the necessary schema for maintaining the counts
of different entity types. These are scoped per "data source" - however the
concept of "data source" is not yet implemented, see PDCT-431.
"""
import logging
from enum import Enum
import sqlalchemy as sa
from sqlalchemy.sql import text
from app.db.session import Base
from sqlalchemy.orm.session import object_session


_LOGGER = logging.getLogger(__name__)

#
# DO NOT ADD TO THIS LIST BELOW
#
# NOTE: These need to change when we introduce "Data source" (PDCT-431)
ORGANISATION_CCLW = "CCLW"
ORGANISATION_UNFCCC = "UNFCCC"


class CountedEntity(str, Enum):
"""Entities that are to be counted."""

Collection = "collection"
Family = "family"
Document = "document"
Event = "event"


class EntityCounter(Base):
"""
A list of entity counters per organisation name.

NOTE: There is no foreign key, as this is expected to change
when we introduce data sources (PDCT-431). So at this time a
FK to the new datasource table should be introduced.

This is used for generating import_ids in the following format:

<organisation.name>.<entity>.<counter>.<n>

"""

__tablename__ = "entity_counter"
__table_args__ = (
sa.CheckConstraint(
"prefix IN ('CCLW','UNFCCC')",
name="prefix_allowed_orgs",
),
)

_get_and_increment = text(
"""
WITH updated AS (
UPDATE entity_counter SET counter = counter + 1
WHERE id = :id RETURNING counter
)
SELECT counter FROM updated;
"""
)

id = sa.Column(sa.Integer, primary_key=True)
description = sa.Column(sa.String, nullable=False, default="")
prefix = sa.Column(sa.String, unique=True, nullable=False) # Organisation.name
counter = sa.Column(sa.Integer, default=0)

def get_next_count(self) -> str:
"""
Gets the next counter value and updates the row.

:return str: The next counter value.
"""
try:
db = object_session(self)
cmd = self._get_and_increment.bindparams(id=self.id)
value = db.execute(cmd).scalar()
db.commit()
return value
except:
_LOGGER.exception(f"When generating counter for {self.prefix}")
raise

def create_import_id(self, entity: CountedEntity) -> str:
"""
Creates a unique import id.

This uses the n-value of zero to conform to existing format.

:param CountedEntity entity: The entity you want counted
:raises RuntimeError: raised when the prefix is not an organisation.
:return str: The fully formatted import_id
"""
# Validation
prefix_ok = (
self.prefix == ORGANISATION_CCLW or self.prefix == ORGANISATION_UNFCCC
)
if not prefix_ok:
raise RuntimeError("Prefix is not a known organisation!")
n = 0 # The fourth quad is historical
i_value = str(self.get_next_count()).zfill(8)
n_value = str(n).zfill(4)
return f"{self.prefix}.{entity.value}.i{i_value}.n{n_value}"
2 changes: 2 additions & 0 deletions app/initial_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from app.db.session import SessionLocal

from app.data_migrations import (
populate_counters,
populate_document_type,
populate_document_role,
populate_document_variant,
Expand All @@ -33,6 +34,7 @@ def run_data_migrations(db):
populate_geography(db)
populate_language(db)
populate_taxonomy(db)
populate_counters(db)

db.flush() # Geography data is used by geo-stats so flush

Expand Down
24 changes: 24 additions & 0 deletions tests/unit/app/models/test_counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from app.data_migrations import populate_counters
from app.db.models.app.counters import CountedEntity, EntityCounter


def test_import_id_generation(test_db):
populate_counters(test_db)
rows = test_db.query(EntityCounter).count()
assert rows > 0

row: EntityCounter = (
test_db.query(EntityCounter).filter(EntityCounter.prefix == "CCLW").one()
)
assert row is not None

assert row.prefix == "CCLW"
assert row.counter == 0

import_id = row.create_import_id(CountedEntity.Family)
assert import_id == "CCLW.family.i00000001.n0000"

row: EntityCounter = (
test_db.query(EntityCounter).filter(EntityCounter.prefix == "CCLW").one()
)
assert row.counter == 1
Loading