Skip to content

Commit

Permalink
Add a test for the import_ids (#122)
Browse files Browse the repository at this point in the history
* Add a test for the import_ids

* fix test and add negative one

* Tighten ID_MATCHER, remove redundant code, ensure logging works
  • Loading branch information
diversemix authored May 30, 2023
1 parent beed956 commit aba0a5f
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 96 deletions.
2 changes: 1 addition & 1 deletion app/api/api_v1/routers/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ async def update_document(
"props": {
"superuser_email": current_user.email,
"import_id_or_slug": import_id_or_slug,
"meta_data": meta_data,
"meta_data": meta_data.as_json(),
}
},
)
Expand Down
90 changes: 0 additions & 90 deletions app/api/api_v1/routers/cclw_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@
UploadFile,
status,
)
from sqlalchemy import update
from app.core.aws import S3Document

from app.api.api_v1.schemas.document import (
BulkIngestResult,
DocumentUpdateRequest,
)
from app.core.auth import get_superuser_details
from app.core.aws import get_s3_client
Expand All @@ -41,16 +39,13 @@
get_result_counts,
)
from app.core.ingestion.validator import validate_event_row
from app.core.validation import IMPORT_ID_MATCHER
from app.core.validation.types import ImportSchemaMismatchError
from app.core.validation.util import (
get_new_s3_prefix,
write_csv_to_s3,
write_documents_to_s3,
write_ingest_results_to_s3,
)
from app.db.models.document.physical_document import PhysicalDocument
from app.db.models.law_policy.family import FamilyDocument, Slug
from app.db.session import get_db

_LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -397,88 +392,3 @@ def _validate_cclw_csv(
_LOGGER.info(message)

return documents_file_contents, message


@r.put("/documents/{import_id_or_slug}", status_code=status.HTTP_200_OK)
async def update_document(
request: Request,
import_id_or_slug: str,
meta_data: DocumentUpdateRequest,
db=Depends(get_db),
current_user=Depends(get_superuser_details),
):
# TODO: As this grows move it out into the crud later.

_LOGGER.info(
f"Superuser '{current_user.email}' called update_document",
extra={
"props": {
"superuser_email": current_user.email,
"import_id_or_slug": import_id_or_slug,
"meta_data": meta_data,
}
},
)

# First query the FamilyDocument
query = db.query(FamilyDocument)
if IMPORT_ID_MATCHER.match(import_id_or_slug) is not None:
family_document = query.filter(
FamilyDocument.import_id == import_id_or_slug
).one_or_none()
_LOGGER.info("update_document called with import_id")
else:
family_document = (
query.join(Slug, Slug.family_document_import_id == FamilyDocument.import_id)
.filter(Slug.name == import_id_or_slug)
.one_or_none()
)
_LOGGER.info("update_document called with slug")

# Check we have found one
if family_document is None:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
)

# Get the physical document to update
physical_document = family_document.physical_document

# Note this code relies on the fields being the same as the db column names
num_changed = db.execute(
update(PhysicalDocument)
.values(meta_data.dict())
.where(PhysicalDocument.id == physical_document.id)
).rowcount

if num_changed == 0:
_LOGGER.info("update_document complete - nothing changed")
return physical_document # Nothing to do - as should be idempotent

if num_changed > 1:
# This should never happen due to table uniqueness constraints
# TODO Rollback
raise HTTPException(
detail=(
f"There was more than one document identified by {import_id_or_slug}. "
"This should not happen!!!"
),
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

db.commit()
db.refresh(physical_document)
_LOGGER.info(
"Call to update_document complete",
extra={
"props": {
"superuser_email": current_user.email,
"num_changed": num_changed,
"import_id": family_document.import_id,
"md5_sum": physical_document.md5_sum,
"content_type": physical_document.content_type,
"cdn_object": physical_document.cdn_object,
}
},
)
return physical_document
1 change: 1 addition & 0 deletions app/api/api_v1/routers/unfccc_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def start_unfccc_ingest(
read(
documents_file_contents, context, UNFCCCDocumentIngestRow, document_ingestor
)
_LOGGER.info("UNFCCC Ingestion Complete")
except Exception as e:
# This is a background task, so do not raise
_LOGGER.exception(
Expand Down
10 changes: 9 additions & 1 deletion app/api/api_v1/schemas/document.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from typing import Any, Mapping, Optional, Sequence
from typing import Any, Mapping, Optional, Sequence, Union

from pydantic import BaseModel, validator
from . import CLIMATE_LAWS_MATCH
Expand Down Expand Up @@ -159,6 +159,14 @@ class DocumentUpdateRequest(BaseModel):
content_type: Optional[str]
cdn_object: Optional[str]

def as_json(self) -> dict[str, Union[str, None]]:
"""Convert to json for logging"""
return {
"md5_sum": self.md5_sum,
"content_type": self.content_type,
"cdn_object": self.cdn_object,
}


class BulkIngestDetail(BaseModel):
"""Additional detail for bulk ingest."""
Expand Down
2 changes: 1 addition & 1 deletion app/core/validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
import re

PIPELINE_BUCKET = os.environ["PIPELINE_BUCKET"]
IMPORT_ID_MATCHER = re.compile(r"\w+\.[\w\-]+\.\w+\.\w+")
IMPORT_ID_MATCHER = re.compile(r"^\w+\.[\w\-]+\.\w+\.\w+$")
10 changes: 10 additions & 0 deletions tests/routes/document_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@
2002,0,Test2,FALSE,FALSE,N/A,Collection1,CollectionSummary1,Title2,Fam2,Summary2,,MAIN,,GBR,http://another_somewhere|en,executive,03/03/2024|Law passed,Energy,,,Mitigation,,Order,,,Energy Supply,Algeria,,,CCLW.executive.2.2,CCLW.family.2002.0,CPR.Collection.1,FamSlug2,DocSlug2,
"""

TWO_DFC_ROW_DIFFERENT_ORG = """ID,Document ID,CCLW Description,Part of collection?,Create new family/ies?,Collection ID,Collection name,Collection summary,Document title,Family name,Family summary,Family ID,Document role,Applies to ID,Geography ISO,Documents,Category,Events,Sectors,Instruments,Frameworks,Responses,Natural Hazards,Document Type,Year,Language,Keywords,Geography,Parent Legislation,Comment,CPR Document ID,CPR Family ID,CPR Collection ID,CPR Family Slug,CPR Document Slug,Document variant
1001,0,Test1,FALSE,FALSE,N/A,Collection1,CollectionSummary1,Title1,Fam1,Summary1,,MAIN,,GBR,http://somewhere|en,executive,02/02/2014|Law passed,Energy,,,Mitigation,,Order,,,Energy Supply,Algeria,,,CCLW.executive.1.2,CCLW.family.1001.0,CPR.Collection.1,FamSlug1,DocSlug1,
2002,0,Test2,FALSE,FALSE,N/A,Collection1,CollectionSummary1,Title2,Fam2,Summary2,,MAIN,,GBR,http://another_somewhere|en,executive,03/03/2024|Law passed,Energy,,,Mitigation,,Order,,,Energy Supply,Algeria,,,UNFCCC.non-party.2.2,CCLW.family.2002.0,CPR.Collection.1,FamSlug2,DocSlug2,
"""

TWO_DFC_ROW_NON_MATCHING_IDS = """ID,Document ID,CCLW Description,Part of collection?,Create new family/ies?,Collection ID,Collection name,Collection summary,Document title,Family name,Family summary,Family ID,Document role,Applies to ID,Geography ISO,Documents,Category,Events,Sectors,Instruments,Frameworks,Responses,Natural Hazards,Document Type,Year,Language,Keywords,Geography,Parent Legislation,Comment,CPR Document ID,CPR Family ID,CPR Collection ID,CPR Family Slug,CPR Document Slug,Document variant
1001,0,Test1,FALSE,FALSE,N/A,Collection1,CollectionSummary1,Title1,Fam1,Summary1,,MAIN,,GBR,http://somewhere|en,executive,02/02/2014|Law passed,Energy,,,Mitigation,,Order,,,Energy Supply,Algeria,,,CCLW.executive.12,CCLW.family.1001.0,CPR.Collection.1,FamSlug1,DocSlug1,
2002,0,Test2,FALSE,FALSE,N/A,Collection1,CollectionSummary1,Title2,Fam2,Summary2,,MAIN,,GBR,http://another_somewhere|en,executive,03/03/2024|Law passed,Energy,,,Mitigation,,Order,,,Energy Supply,Algeria,,,UNFCCC.s.ill.y.2.2,CCLW.family.2002.0,CPR.Collection.1,FamSlug2,DocSlug2,
"""

ONE_DFC_ROW_TWO_LANGUAGES = """ID,Document ID,CCLW Description,Part of collection?,Create new family/ies?,Collection ID,Collection name,Collection summary,Document title,Family name,Family summary,Family ID,Document role,Applies to ID,Geography ISO,Documents,Category,Events,Sectors,Instruments,Frameworks,Responses,Natural Hazards,Document Type,Year,Language,Keywords,Geography,Parent Legislation,Comment,CPR Document ID,CPR Family ID,CPR Collection ID,CPR Family Slug,CPR Document Slug,Document variant
1001,0,Test1,FALSE,FALSE,N/A,Collection1,CollectionSummary1,Title1,Fam1,Summary1,,MAIN,,GBR,http://somewhere|en,executive,02/02/2014|Law passed,Energy,,,Mitigation,,Order,,French;English,Energy Supply,Algeria,,,CCLW.executive.1.2,CCLW.family.1001.0,CPR.Collection.1,FamSlug1,DocSlug1,Translation
"""
Expand Down
52 changes: 49 additions & 3 deletions tests/routes/test_document_families.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pytest
from typing import Callable, Generator
from sqlalchemy.orm import Session
from fastapi.testclient import TestClient
Expand All @@ -6,7 +7,9 @@
from tests.routes.document_helpers import (
ONE_DFC_ROW_TWO_LANGUAGES,
ONE_EVENT_ROW,
TWO_DFC_ROW_DIFFERENT_ORG,
TWO_DFC_ROW_ONE_LANGUAGE,
TWO_DFC_ROW_NON_MATCHING_IDS,
TWO_EVENT_ROWS,
populate_languages,
setup_with_docs,
Expand Down Expand Up @@ -238,15 +241,58 @@ def test_update_document__is_secure(
assert response.status_code == 401


def test_update_document__works_on_import_id(
@pytest.mark.parametrize(
"import_id",
[
"CCLW.executive.12",
"UNFCCC.s.ill.y.2.2",
],
)
def test_update_document__fails_on_non_matching_import_id(
client: TestClient,
superuser_token_headers: dict[str, str],
test_db: Session,
mocker: Callable[..., Generator[MockerFixture, None, None]],
import_id: str,
):
setup_with_two_docs(test_db, mocker)
setup_with_multiple_docs(
test_db,
mocker,
doc_data=TWO_DFC_ROW_NON_MATCHING_IDS,
event_data=TWO_EVENT_ROWS,
)
payload = {
"md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76",
"content_type": "updated/content_type",
"cdn_object": "folder/file",
}

import_id = "CCLW.executive.1.2"
response = client.put(
f"/api/v1/admin/documents/{import_id}",
headers=superuser_token_headers,
json=payload,
)

assert response.status_code == 422


@pytest.mark.parametrize(
"import_id",
[
"CCLW.executive.1.2",
"UNFCCC.non-party.2.2",
],
)
def test_update_document__works_on_import_id(
client: TestClient,
superuser_token_headers: dict[str, str],
test_db: Session,
mocker: Callable[..., Generator[MockerFixture, None, None]],
import_id: str,
):
setup_with_multiple_docs(
test_db, mocker, doc_data=TWO_DFC_ROW_DIFFERENT_ORG, event_data=TWO_EVENT_ROWS
)
payload = {
"md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76",
"content_type": "updated/content_type",
Expand Down

0 comments on commit aba0a5f

Please sign in to comment.