Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDCT 395/only add counters #168

Merged
merged 10 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions alembic/versions/0019_add_entity_counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""add entity counters

Revision ID: 0019
Revises: 0018
Create Date: 2023-10-02 11:32:43.825217

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.orm import Session

from app.db.models.app.counters import ORGANISATION_CCLW, ORGANISATION_UNFCCC, EntityCounter
diversemix marked this conversation as resolved.
Show resolved Hide resolved


# revision identifiers, used by Alembic.
revision = '0019'
down_revision = '0018'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('entity_counter',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('description', sa.String(), nullable=False),
sa.Column('prefix', sa.String(), nullable=False),
sa.Column('counter', sa.Integer(), nullable=True),
sa.CheckConstraint("prefix IN ('CCLW','UNFCCC')", name=op.f('ck_entity_counter__prefix_allowed_orgs')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_entity_counter')),
sa.UniqueConstraint('prefix', name=op.f('uq_entity_counter__prefix'))
)
# ### end Alembic commands ###



def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('entity_counters')
# ### end Alembic commands ###
7 changes: 4 additions & 3 deletions app/api/api_v1/routers/cclw_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
write_documents_to_s3,
write_ingest_results_to_s3,
)
from app.db.models.app import ORGANISATION_CCLW
from app.db.session import get_db

_LOGGER = logging.getLogger(__name__)
Expand All @@ -63,7 +64,7 @@ def _start_ingest(
context = None
# TODO: add a way for a user to monitor progress of the ingest
try:
context = initialise_context(db, "CCLW")
context = initialise_context(db, ORGANISATION_CCLW)
document_ingestor = get_cclw_document_ingestor(db, context)
read(documents_file_contents, context, CCLWDocumentIngestRow, document_ingestor)
event_ingestor = get_event_ingestor(db)
Expand Down Expand Up @@ -135,7 +136,7 @@ def validate_law_policy(
)

try:
context = initialise_context(db, "CCLW")
context = initialise_context(db, ORGANISATION_CCLW)
except Exception as e:
_LOGGER.exception(
"Failed to create ingest context", extra={"props": {"errors": str(e)}}
Expand Down Expand Up @@ -206,7 +207,7 @@ def ingest_law_policy(
)

try:
context = initialise_context(db, "CCLW")
context = initialise_context(db, ORGANISATION_CCLW)
except Exception as e:
_LOGGER.exception(
"Failed to create ingest context", extra={"props": {"errors": str(e)}}
Expand Down
7 changes: 4 additions & 3 deletions app/api/api_v1/routers/unfccc_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
write_documents_to_s3,
write_ingest_results_to_s3,
)
from app.db.models.app import ORGANISATION_UNFCCC
from app.db.session import get_db

_LOGGER = logging.getLogger(__name__)
Expand All @@ -63,7 +64,7 @@ def start_unfccc_ingest(
context = None
# TODO: add a way for a user to monitor progress of the ingest
try:
context = initialise_context(db, "UNFCCC")
context = initialise_context(db, ORGANISATION_UNFCCC)
# First the collections....
collection_ingestor = get_collection_ingestor(db)
read(
Expand Down Expand Up @@ -149,7 +150,7 @@ def validate_unfccc_law_policy(
)

try:
context = initialise_context(db, "UNFCCC")
context = initialise_context(db, ORGANISATION_UNFCCC)
except Exception as e:
_LOGGER.exception(
"Failed to create ingest context", extra={"props": {"errors": str(e)}}
Expand Down Expand Up @@ -230,7 +231,7 @@ def ingest_unfccc_law_policy(
)

try:
context = initialise_context(db, "UNFCCC")
context = initialise_context(db, ORGANISATION_UNFCCC)
except Exception as e:
_LOGGER.exception(
"Failed to create ingest context", extra={"props": {"errors": str(e)}}
Expand Down
9 changes: 5 additions & 4 deletions app/core/ingestion/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
validate_cclw_document_row,
validate_unfccc_document_row,
)
from app.db.models.app import ORGANISATION_CCLW, ORGANISATION_UNFCCC
from app.db.models.app.users import Organisation
from app.db.models.law_policy.geography import GEO_INTERNATIONAL, GEO_NONE

Expand Down Expand Up @@ -235,11 +236,11 @@ def initialise_context(db: Session, org_name: str) -> IngestContext:
"""
with db.begin():
organisation = db.query(Organisation).filter_by(name=org_name).one()
if org_name == "CCLW":
if org_name == ORGANISATION_CCLW:
return CCLWIngestContext(
org_name=org_name, org_id=cast(int, organisation.id), results=[]
)
if org_name == "UNFCCC":
if org_name == ORGANISATION_UNFCCC:
return UNFCCCIngestContext(
org_name=org_name, org_id=cast(int, organisation.id), results=[]
)
Expand Down Expand Up @@ -366,9 +367,9 @@ def unfccc_process(context: IngestContext, row: UNFCCCDocumentIngestRow) -> None
row=row,
)

if context.org_name == "CCLW":
if context.org_name == ORGANISATION_CCLW:
return cclw_process
elif context.org_name == "UNFCCC":
elif context.org_name == ORGANISATION_UNFCCC:
return unfccc_process

raise ValueError(f"Unknown org {context.org_name} for validation.")
5 changes: 3 additions & 2 deletions app/core/ingestion/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass
import enum
from typing import Any, Callable, Optional, TypeVar, cast
from app.db.models.app import ORGANISATION_CCLW, ORGANISATION_UNFCCC
from app.db.session import AnyModel
from sqlalchemy.orm import Session

Expand Down Expand Up @@ -229,7 +230,7 @@ class UNFCCCIngestContext(IngestContext):
consistency_validator: ConsistencyValidator
download_urls: dict[str, str] # import_id -> url

def __init__(self, org_name="UNFCCC", org_id=2, results=None):
def __init__(self, org_name=ORGANISATION_UNFCCC, org_id=2, results=None):
self.collection_ids_defined = []
self.collection_ids_referenced = []
self.consistency_validator = ConsistencyValidator()
Expand All @@ -245,7 +246,7 @@ class CCLWIngestContext(IngestContext):

consistency_validator: ConsistencyValidator

def __init__(self, org_name="CCLW", org_id=1, results=None):
def __init__(self, org_name=ORGANISATION_CCLW, org_id=1, results=None):
self.consistency_validator = ConsistencyValidator()
self.org_name = org_name
self.org_id = org_id
Expand Down
1 change: 1 addition & 0 deletions app/data_migrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .populate_geography import populate_geography
from .populate_language import populate_language
from .populate_taxonomy import populate_taxonomy
from .populate_counters import populate_counters
23 changes: 23 additions & 0 deletions app/data_migrations/populate_counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from sqlalchemy.orm import Session

from app.db.models.app.counters import (
ORGANISATION_CCLW,
ORGANISATION_UNFCCC,
EntityCounter,
)


def populate_counters(db: Session):
n_rows = db.query(EntityCounter).count()
if n_rows == 0:
db.add(
EntityCounter(
prefix=ORGANISATION_CCLW, description="Counter for CCLW entities"
)
)
db.add(
EntityCounter(
prefix=ORGANISATION_UNFCCC, description="Counter for UNFCCC entities"
)
)
db.commit()
5 changes: 3 additions & 2 deletions app/data_migrations/populate_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from sqlalchemy.orm import Session
from app.data_migrations.taxonomy_cclw import get_cclw_taxonomy
from app.data_migrations.taxonomy_unf3c import get_unf3c_taxonomy
from app.db.models.app import ORGANISATION_CCLW, ORGANISATION_UNFCCC

from app.db.models.app.users import Organisation
from app.db.models.law_policy.metadata import MetadataOrganisation, MetadataTaxonomy
Expand Down Expand Up @@ -53,14 +54,14 @@ def populate_org_taxonomy(
def populate_taxonomy(db: Session) -> None:
populate_org_taxonomy(
db,
org_name="CCLW",
org_name=ORGANISATION_CCLW,
org_type="Academic",
description="Climate Change Laws of the World",
fn_get_taxonomy=get_cclw_taxonomy,
)
populate_org_taxonomy(
db,
org_name="UNFCCC",
org_name=ORGANISATION_UNFCCC,
org_type="UN",
description="United Nations Framework Convention on Climate Change",
fn_get_taxonomy=get_unf3c_taxonomy,
Expand Down
1 change: 1 addition & 0 deletions app/db/models/app/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .users import AppUser, OrganisationUser, Organisation
from .counters import EntityCounter, ORGANISATION_CCLW, ORGANISATION_UNFCCC
95 changes: 95 additions & 0 deletions app/db/models/app/counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
Schema for counters.

The following section includes the necessary schema for maintaining the counts
of different entity types. These are scoped per "data source" - however the
concept of "data source" is not yet implemented, see PDCT-431.
"""
import logging
from enum import Enum
import sqlalchemy as sa
from sqlalchemy.sql import text
from app.db.session import Base
from sqlalchemy.orm.session import object_session


_LOGGER = logging.getLogger(__name__)

#
# DO NOT ADD TO THIS LIST BELOW
#
# NOTE: These need to change when we introduce "Data source" (PDCT-431)
ORGANISATION_CCLW = "CCLW"
ORGANISATION_UNFCCC = "UNFCCC"


class CountedEntity(str, Enum):
"""Entities that are to be counted."""

Collection = "collection"
Family = "family"
Document = "document"
Event = "event"


class EntityCounter(Base):
"""
A list of entity counters per organisation name.

NOTE: There is no foreign key, as this is expected to change
when we introduce data sources (PDCT-431). So at this time a
FK to the new datasource table should be introduced.

This is used for generating import_ids in the following format:

<organisation.name>.<entity>.<counter>.<n>

"""

__tablename__ = "entity_counter"
__table_args__ = (
sa.CheckConstraint(
"prefix IN ('CCLW','UNFCCC')",
name="prefix_allowed_orgs",
),
)

_get_and_increment = text(
"""
WITH updated AS (
UPDATE entity_counter SET counter = counter + 1
WHERE id = :id RETURNING counter
)
SELECT counter FROM updated;
"""
)

id = sa.Column(sa.Integer, primary_key=True)
description = sa.Column(sa.String, nullable=False, default="")
prefix = sa.Column(sa.String, unique=True, nullable=False) # Organisation.name
counter = sa.Column(sa.Integer, default=0)

def get_next_count(self) -> str:
"""Gets the next counter value"""
try:
db = object_session(self)
cmd = self._get_and_increment.bindparams(id=self.id)
value = db.execute(cmd).scalar()
db.commit()
return value
except:
_LOGGER.exception(f"When generating counter for {self.prefix}")
raise

def get_import_id(self, entity: CountedEntity, n: int = 0) -> str:
diversemix marked this conversation as resolved.
Show resolved Hide resolved
"""gets an import id"""
# Validation
prefix_ok = (
self.prefix == ORGANISATION_CCLW or self.prefix == ORGANISATION_UNFCCC
)
if not prefix_ok:
raise RuntimeError("Prefix is not a known organisation!")

i_value = str(self.get_next_count()).zfill(8)
n_value = str(n).zfill(4)
return f"{self.prefix}.{entity.value}.i{i_value}.n{n_value}"
4 changes: 2 additions & 2 deletions app/db/models/law_policy/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import relationship

from app.db.models.app import Organisation
from app.db.models.app import ORGANISATION_UNFCCC, Organisation
from app.db.models.app.enum import BaseModelEnum
from app.db.models.document import PhysicalDocument
from app.db.session import Base
Expand All @@ -17,7 +17,7 @@ class FamilyCategory(BaseModelEnum):

EXECUTIVE = "Executive"
LEGISLATIVE = "Legislative"
UNFCCC = "UNFCCC"
UNFCCC = ORGANISATION_UNFCCC
diversemix marked this conversation as resolved.
Show resolved Hide resolved


class Variant(Base):
Expand Down
2 changes: 2 additions & 0 deletions app/initial_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from app.db.session import SessionLocal

from app.data_migrations import (
populate_counters,
populate_document_type,
populate_document_role,
populate_document_variant,
Expand All @@ -33,6 +34,7 @@ def run_data_migrations(db):
populate_geography(db)
populate_language(db)
populate_taxonomy(db)
populate_counters(db)

db.flush() # Geography data is used by geo-stats so flush

Expand Down
Loading