Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDCT-1057 step1 - add corpus info to config #267

Merged
merged 1 commit into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion app/api/api_v1/schemas/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,22 @@
TaxonomyData = Mapping[str, Mapping[str, Union[bool, Sequence[str]]]]


class CorpusData(BaseModel):
"""Contains the Corpus and CorpusType info"""

corpus_import_id: str
title: str
description: str
corpus_type: str
corpus_type_description: str
taxonomy: TaxonomyData


class OrganisationConfig(BaseModel):
"""Definition of stats used on homepage"""

taxonomy: TaxonomyData
taxonomy: TaxonomyData # TODO: Remove this in subsequent deploy see PDCT-1057
copora: Sequence[CorpusData]
total: int
count_by_category: Mapping[str, int]

Expand Down
51 changes: 48 additions & 3 deletions app/core/organisation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import asdict
from typing import cast
from typing import Sequence, cast

from db_client.models.dfce.family import (
Corpus,
Expand All @@ -13,7 +13,7 @@
from sqlalchemy import func
from sqlalchemy.orm import Session

from app.api.api_v1.schemas.metadata import OrganisationConfig, TaxonomyData
from app.api.api_v1.schemas.metadata import CorpusData, OrganisationConfig, TaxonomyData


def get_organisation_taxonomy(db: Session, org_id: int) -> Taxonomy:
Expand All @@ -38,6 +38,7 @@ def get_organisation_taxonomy(db: Session, org_id: int) -> Taxonomy:
return {k: TaxonomyEntry(**v) for k, v in taxonomy[0].items()}


# TODO: Remove this function as part of PDCT-1067
def get_organisation_taxonomy_by_name(db: Session, org_name: str) -> TaxonomyData:
"""
Returns the TaxonomyConfig for the named organisation
Expand Down Expand Up @@ -71,6 +72,48 @@ def get_organisation_taxonomy_by_name(db: Session, org_name: str) -> TaxonomyDat
}


def _to_corpus_data(row, event_types) -> CorpusData:
return CorpusData(
corpus_import_id=row.corpus_import_id,
title=row.title,
description=row.description,
corpus_type=row.corpus_type,
corpus_type_description=row.corpus_type_description,
taxonomy={
**row.taxonomy,
"event_types": asdict(event_types),
},
)


def get_copora_for_org(db: Session, org_name: str) -> Sequence[CorpusData]:
copora = (
db.query(
Corpus.import_id.label("corpus_import_id"),
Corpus.title.label("title"),
Corpus.description.label("description"),
Corpus.corpus_type_name.label("corpus_type"),
CorpusType.description.label("corpus_type_description"),
CorpusType.valid_metadata.label("taxonomy"),
)
.join(
Corpus,
Corpus.corpus_type_name == CorpusType.name,
)
.join(Organisation, Organisation.id == Corpus.organisation_id)
.filter(Organisation.name == org_name)
.all()
)

event_types = db.query(FamilyEventType).all()
entry = TaxonomyEntry(
allow_blanks=False,
allowed_values=[r.name for r in event_types],
allow_any=False,
)
return [_to_corpus_data(row, entry) for row in copora]


def get_organisation_config(db: Session, org: Organisation) -> OrganisationConfig:
total = (
db.query(Family)
Expand Down Expand Up @@ -98,8 +141,10 @@ def get_organisation_config(db: Session, org: Organisation) -> OrganisationConfi
else:
count_by_category[category] = 0

org_name = cast(str, org.name)
return OrganisationConfig(
total=total,
count_by_category=count_by_category,
taxonomy=get_organisation_taxonomy_by_name(db, cast(str, org.name)),
taxonomy=get_organisation_taxonomy_by_name(db, org_name),
copora=get_copora_for_org(db, org_name),
)
121 changes: 71 additions & 50 deletions tests/routes/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,39 @@
from app.core.util import tree_table_to_json
from app.db.session import SessionLocal

LEN_ORG_CONFIG = 4
EXPECTED_CCLW_TAXONOMY = {
"instrument",
"keyword",
"sector",
"topic",
"framework",
"hazard",
"event_types",
}
EXPECTED_CCLW_EVENTS = [
"Amended",
"Appealed",
"Closed",
"Declaration Of Climate Emergency",
"Dismissed",
"Entered Into Force",
"Filing",
"Granted",
"Implementation Details",
"International Agreement",
"Net Zero Pledge",
"Other",
"Passed/Approved",
"Repealed/Replaced",
"Set",
"Settled",
"Updated",
]


EXPECTED_UNFCCC_TAXONOMY = {"author", "author_type", "event_types"}


def _add_family(test_db, import_id: str, cat: FamilyCategory, corpus_import_id):
test_db.add(
Expand Down Expand Up @@ -42,50 +75,6 @@ def test_config_endpoint_content(data_client, data_db):

assert "geographies" in response_json
assert len(response_json["geographies"]) == 8

assert "organisations" in response_json

assert "CCLW" in response_json["organisations"]
cclw_taxonomy = response_json["organisations"]["CCLW"]["taxonomy"]
assert set(cclw_taxonomy) == {
"instrument",
"keyword",
"sector",
"topic",
"framework",
"hazard",
"event_types",
}
cclw_taxonomy_event_types = cclw_taxonomy["event_types"]["allowed_values"]
cclw_expected_event_types = [
"Amended",
"Appealed",
"Closed",
"Declaration Of Climate Emergency",
"Dismissed",
"Entered Into Force",
"Filing",
"Granted",
"Implementation Details",
"International Agreement",
"Net Zero Pledge",
"Other",
"Passed/Approved",
"Repealed/Replaced",
"Set",
"Settled",
"Updated",
]
assert set(cclw_taxonomy_event_types) ^ set(cclw_expected_event_types) == set()

assert "UNFCCC" in response_json["organisations"]
unfccc_taxonomy = response_json["organisations"]["UNFCCC"]["taxonomy"]
assert set(unfccc_taxonomy) == {"author", "author_type", "event_types"}
assert set(unfccc_taxonomy["author_type"]["allowed_values"]) == {
"Party",
"Non-Party",
}

assert "languages" in response_json
assert len(response_json["languages"]) == 7893
assert "fra" in response_json["languages"]
Expand All @@ -100,16 +89,48 @@ def test_config_endpoint_content(data_client, data_db):
assert len(response_json["document_variants"]) == 2
assert "Original Language" in response_json["document_variants"]

org_config = response_json["organisations"]["CCLW"]
assert len(org_config) == 3
assert "taxonomy" in org_config
assert org_config["total"] == 0
assert org_config["count_by_category"] == {
# Now test organisations
assert "organisations" in response_json

assert "CCLW" in response_json["organisations"]
cclw_org = response_json["organisations"]["CCLW"]
assert len(cclw_org) == LEN_ORG_CONFIG

# Test the counts are there (just CCLW)
assert cclw_org["total"] == 0
assert cclw_org["count_by_category"] == {
"Executive": 0,
"Legislative": 0,
"UNFCCC": 0,
}

assert "UNFCCC" in response_json["organisations"]
unfccc_org = response_json["organisations"]["UNFCCC"]
assert len(unfccc_org) == LEN_ORG_CONFIG

# Old tests - to be removed in PDCT-1057
cclw_taxonomy = cclw_org["taxonomy"]
assert set(cclw_taxonomy) == EXPECTED_CCLW_TAXONOMY
cclw_taxonomy_event_types = cclw_taxonomy["event_types"]["allowed_values"]
assert set(cclw_taxonomy_event_types) ^ set(EXPECTED_CCLW_EVENTS) == set()

unfccc_taxonomy = unfccc_org["taxonomy"]
assert set(unfccc_taxonomy) == EXPECTED_UNFCCC_TAXONOMY
assert set(unfccc_taxonomy["author_type"]["allowed_values"]) == {
"Party",
"Non-Party",
}

# New taxonomy tests
cclw_copora = cclw_org["copora"]
assert len(cclw_copora) == 1
assert cclw_copora[0]["corpus_import_id"] == "CCLW.corpus.i00000001.n0000"
assert cclw_copora[0]["corpus_type"] == "Laws and Policies"
assert cclw_copora[0]["corpus_type_description"] == "Laws and policies"
assert cclw_copora[0]["description"] == "CCLW national policies"
assert cclw_copora[0]["title"] == "CCLW national policies"
assert set(cclw_copora[0]["taxonomy"]) ^ EXPECTED_CCLW_TAXONOMY == set()


def test_config_endpoint_cclw_stats(data_client, data_db):
url_under_test = "/api/v1/config"
Expand Down Expand Up @@ -144,7 +165,7 @@ def test_config_endpoint_cclw_stats(data_client, data_db):
response_json = response.json()

org_config = response_json["organisations"]["CCLW"]
assert len(org_config) == 3
assert len(org_config) == LEN_ORG_CONFIG
assert org_config["total"] == 6

laws = org_config["count_by_category"]["Legislative"]
Expand Down
Loading