Skip to content

Commit

Permalink
step1 - add corpus info to config (#267)
Browse files Browse the repository at this point in the history
  • Loading branch information
diversemix authored Apr 11, 2024
1 parent f2f295c commit 32cf0bc
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 54 deletions.
14 changes: 13 additions & 1 deletion app/api/api_v1/schemas/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,22 @@
TaxonomyData = Mapping[str, Mapping[str, Union[bool, Sequence[str]]]]


class CorpusData(BaseModel):
"""Contains the Corpus and CorpusType info"""

corpus_import_id: str
title: str
description: str
corpus_type: str
corpus_type_description: str
taxonomy: TaxonomyData


class OrganisationConfig(BaseModel):
"""Definition of stats used on homepage"""

taxonomy: TaxonomyData
taxonomy: TaxonomyData # TODO: Remove this in subsequent deploy see PDCT-1057
copora: Sequence[CorpusData]
total: int
count_by_category: Mapping[str, int]

Expand Down
51 changes: 48 additions & 3 deletions app/core/organisation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import asdict
from typing import cast
from typing import Sequence, cast

from db_client.models.dfce.family import (
Corpus,
Expand All @@ -13,7 +13,7 @@
from sqlalchemy import func
from sqlalchemy.orm import Session

from app.api.api_v1.schemas.metadata import OrganisationConfig, TaxonomyData
from app.api.api_v1.schemas.metadata import CorpusData, OrganisationConfig, TaxonomyData


def get_organisation_taxonomy(db: Session, org_id: int) -> Taxonomy:
Expand All @@ -38,6 +38,7 @@ def get_organisation_taxonomy(db: Session, org_id: int) -> Taxonomy:
return {k: TaxonomyEntry(**v) for k, v in taxonomy[0].items()}


# TODO: Remove this function as part of PDCT-1067
def get_organisation_taxonomy_by_name(db: Session, org_name: str) -> TaxonomyData:
"""
Returns the TaxonomyConfig for the named organisation
Expand Down Expand Up @@ -71,6 +72,48 @@ def get_organisation_taxonomy_by_name(db: Session, org_name: str) -> TaxonomyDat
}


def _to_corpus_data(row, event_types) -> CorpusData:
return CorpusData(
corpus_import_id=row.corpus_import_id,
title=row.title,
description=row.description,
corpus_type=row.corpus_type,
corpus_type_description=row.corpus_type_description,
taxonomy={
**row.taxonomy,
"event_types": asdict(event_types),
},
)


def get_copora_for_org(db: Session, org_name: str) -> Sequence[CorpusData]:
copora = (
db.query(
Corpus.import_id.label("corpus_import_id"),
Corpus.title.label("title"),
Corpus.description.label("description"),
Corpus.corpus_type_name.label("corpus_type"),
CorpusType.description.label("corpus_type_description"),
CorpusType.valid_metadata.label("taxonomy"),
)
.join(
Corpus,
Corpus.corpus_type_name == CorpusType.name,
)
.join(Organisation, Organisation.id == Corpus.organisation_id)
.filter(Organisation.name == org_name)
.all()
)

event_types = db.query(FamilyEventType).all()
entry = TaxonomyEntry(
allow_blanks=False,
allowed_values=[r.name for r in event_types],
allow_any=False,
)
return [_to_corpus_data(row, entry) for row in copora]


def get_organisation_config(db: Session, org: Organisation) -> OrganisationConfig:
total = (
db.query(Family)
Expand Down Expand Up @@ -98,8 +141,10 @@ def get_organisation_config(db: Session, org: Organisation) -> OrganisationConfi
else:
count_by_category[category] = 0

org_name = cast(str, org.name)
return OrganisationConfig(
total=total,
count_by_category=count_by_category,
taxonomy=get_organisation_taxonomy_by_name(db, cast(str, org.name)),
taxonomy=get_organisation_taxonomy_by_name(db, org_name),
copora=get_copora_for_org(db, org_name),
)
121 changes: 71 additions & 50 deletions tests/routes/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,39 @@
from app.core.util import tree_table_to_json
from app.db.session import SessionLocal

LEN_ORG_CONFIG = 4
EXPECTED_CCLW_TAXONOMY = {
"instrument",
"keyword",
"sector",
"topic",
"framework",
"hazard",
"event_types",
}
EXPECTED_CCLW_EVENTS = [
"Amended",
"Appealed",
"Closed",
"Declaration Of Climate Emergency",
"Dismissed",
"Entered Into Force",
"Filing",
"Granted",
"Implementation Details",
"International Agreement",
"Net Zero Pledge",
"Other",
"Passed/Approved",
"Repealed/Replaced",
"Set",
"Settled",
"Updated",
]


EXPECTED_UNFCCC_TAXONOMY = {"author", "author_type", "event_types"}


def _add_family(test_db, import_id: str, cat: FamilyCategory, corpus_import_id):
test_db.add(
Expand Down Expand Up @@ -42,50 +75,6 @@ def test_config_endpoint_content(data_client, data_db):

assert "geographies" in response_json
assert len(response_json["geographies"]) == 8

assert "organisations" in response_json

assert "CCLW" in response_json["organisations"]
cclw_taxonomy = response_json["organisations"]["CCLW"]["taxonomy"]
assert set(cclw_taxonomy) == {
"instrument",
"keyword",
"sector",
"topic",
"framework",
"hazard",
"event_types",
}
cclw_taxonomy_event_types = cclw_taxonomy["event_types"]["allowed_values"]
cclw_expected_event_types = [
"Amended",
"Appealed",
"Closed",
"Declaration Of Climate Emergency",
"Dismissed",
"Entered Into Force",
"Filing",
"Granted",
"Implementation Details",
"International Agreement",
"Net Zero Pledge",
"Other",
"Passed/Approved",
"Repealed/Replaced",
"Set",
"Settled",
"Updated",
]
assert set(cclw_taxonomy_event_types) ^ set(cclw_expected_event_types) == set()

assert "UNFCCC" in response_json["organisations"]
unfccc_taxonomy = response_json["organisations"]["UNFCCC"]["taxonomy"]
assert set(unfccc_taxonomy) == {"author", "author_type", "event_types"}
assert set(unfccc_taxonomy["author_type"]["allowed_values"]) == {
"Party",
"Non-Party",
}

assert "languages" in response_json
assert len(response_json["languages"]) == 7893
assert "fra" in response_json["languages"]
Expand All @@ -100,16 +89,48 @@ def test_config_endpoint_content(data_client, data_db):
assert len(response_json["document_variants"]) == 2
assert "Original Language" in response_json["document_variants"]

org_config = response_json["organisations"]["CCLW"]
assert len(org_config) == 3
assert "taxonomy" in org_config
assert org_config["total"] == 0
assert org_config["count_by_category"] == {
# Now test organisations
assert "organisations" in response_json

assert "CCLW" in response_json["organisations"]
cclw_org = response_json["organisations"]["CCLW"]
assert len(cclw_org) == LEN_ORG_CONFIG

# Test the counts are there (just CCLW)
assert cclw_org["total"] == 0
assert cclw_org["count_by_category"] == {
"Executive": 0,
"Legislative": 0,
"UNFCCC": 0,
}

assert "UNFCCC" in response_json["organisations"]
unfccc_org = response_json["organisations"]["UNFCCC"]
assert len(unfccc_org) == LEN_ORG_CONFIG

# Old tests - to be removed in PDCT-1057
cclw_taxonomy = cclw_org["taxonomy"]
assert set(cclw_taxonomy) == EXPECTED_CCLW_TAXONOMY
cclw_taxonomy_event_types = cclw_taxonomy["event_types"]["allowed_values"]
assert set(cclw_taxonomy_event_types) ^ set(EXPECTED_CCLW_EVENTS) == set()

unfccc_taxonomy = unfccc_org["taxonomy"]
assert set(unfccc_taxonomy) == EXPECTED_UNFCCC_TAXONOMY
assert set(unfccc_taxonomy["author_type"]["allowed_values"]) == {
"Party",
"Non-Party",
}

# New taxonomy tests
cclw_copora = cclw_org["copora"]
assert len(cclw_copora) == 1
assert cclw_copora[0]["corpus_import_id"] == "CCLW.corpus.i00000001.n0000"
assert cclw_copora[0]["corpus_type"] == "Laws and Policies"
assert cclw_copora[0]["corpus_type_description"] == "Laws and policies"
assert cclw_copora[0]["description"] == "CCLW national policies"
assert cclw_copora[0]["title"] == "CCLW national policies"
assert set(cclw_copora[0]["taxonomy"]) ^ EXPECTED_CCLW_TAXONOMY == set()


def test_config_endpoint_cclw_stats(data_client, data_db):
url_under_test = "/api/v1/config"
Expand Down Expand Up @@ -144,7 +165,7 @@ def test_config_endpoint_cclw_stats(data_client, data_db):
response_json = response.json()

org_config = response_json["organisations"]["CCLW"]
assert len(org_config) == 3
assert len(org_config) == LEN_ORG_CONFIG
assert org_config["total"] == 6

laws = org_config["count_by_category"]["Legislative"]
Expand Down

0 comments on commit 32cf0bc

Please sign in to comment.