Skip to content

Commit

Permalink
Making updates to handle different iso codes. (#155)
Browse files Browse the repository at this point in the history
* Making updates to handle different iso codes.

* Updating the logging.

* Adding todo comment and fixing tests.

* Removing trailing space.

* Updating to return one or none.

* Updating tests.

---------

Co-authored-by: Mark <[email protected]>
  • Loading branch information
THOR300 and Mark authored Jul 11, 2023
1 parent 2d99c4d commit ac8058c
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 5 deletions.
44 changes: 39 additions & 5 deletions app/api/api_v1/routers/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ async def update_document(
if meta_data.languages is not None:
_LOGGER.info(
"Adding meta_data object languages to the database.",
extra={"props": {"meta_data_languages": meta_data.languages}},
extra={
"props": {
"meta_data_languages": meta_data.languages,
"import_id_or_slug": import_id_or_slug,
}
},
)

physical_document_languages = (
Expand All @@ -97,10 +102,39 @@ async def update_document(
}

for language in meta_data.languages:
lang = (
db.query(Language)
.filter(Language.language_code == language)
.one_or_none()
if len(language) == 2: # iso639-1 two letter language code
lang = (
db.query(Language)
.filter(Language.part1_code == language)
.one_or_none()
)
elif len(language) == 3: # iso639-2/3 three letter language code
lang = (
db.query(Language)
.filter(Language.language_code == language)
.one_or_none()
)
else:
_LOGGER.warning(
"Retrieved no language from database for meta_data object language.",
extra={
"props": {
"metadata_language": language,
"import_id_or_slug": import_id_or_slug,
}
},
)
lang = None

_LOGGER.info(
"Retrieved language from database for meta_data object language.",
extra={
"props": {
"metadata_language": language,
"db_language": (None if lang is None else lang.language_code),
"import_id_or_slug": import_id_or_slug,
}
},
)
if lang is not None and language not in existing_language_codes:
physical_document_language = PhysicalDocumentLanguage(
Expand Down
2 changes: 2 additions & 0 deletions app/db/models/document/physical_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from app.db.session import Base


# TODO Our current process for updating languages in the database relies on all the part1_code's being unique/null.
# Thus, we should enforce null or uniqueness on part1_code's in the database.
class Language(Base):
"""
A language used to identify the content of a document.
Expand Down
175 changes: 175 additions & 0 deletions tests/routes/test_document_families.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,181 @@ def test_update_document__works_on_new_language(
assert lang.language_code in expected_languages


@pytest.mark.parametrize(
"import_id",
[
"CCLW.executive.1.2",
"UNFCCC.non-party.2.2",
],
)
def test_update_document__works_on_new_iso_639_1_language(
client: TestClient,
superuser_token_headers: dict[str, str],
test_db: Session,
mocker: Callable[..., Generator[MockerFixture, None, None]],
import_id: str,
):
"""Send two payloads in series to assert that languages are additive and we don't remove existing languages."""
setup_with_multiple_docs(
test_db, mocker, doc_data=TWO_DFC_ROW_DIFFERENT_ORG, event_data=TWO_EVENT_ROWS
)

# ADD THE FIRST LANGUAGE
payload = {
"md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76",
"content_type": "updated/content_type",
"cdn_object": "folder/file",
"languages": ["bo"],
}

response = client.put(
f"/api/v1/admin/documents/{import_id}",
headers=superuser_token_headers,
json=payload,
)

assert response.status_code == 200
json_object = response.json()
assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert json_object["content_type"] == "updated/content_type"
assert json_object["cdn_object"] == "folder/file"
assert {language["language_code"] for language in json_object["languages"]} == {
"bod"
}

# Now Check the db
doc = (
test_db.query(FamilyDocument)
.filter(FamilyDocument.import_id == import_id)
.one()
.physical_document
)
assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert doc.content_type == "updated/content_type"
assert doc.cdn_object == "folder/file"

languages = (
test_db.query(PhysicalDocumentLanguage)
.filter(PhysicalDocumentLanguage.document_id == doc.id)
.all()
)
assert len(languages) == 1
lang = test_db.query(Language).filter(Language.id == languages[0].language_id).one()
assert lang.language_code == "bod"

# NOW ADD A NEW LANGUAGE TO CHECK THAT THE UPDATE IS ADDITIVE
payload = {
"md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76",
"content_type": "updated/content_type",
"cdn_object": "folder/file",
"languages": ["el"],
}

response = client.put(
f"/api/v1/admin/documents/{import_id}",
headers=superuser_token_headers,
json=payload,
)

assert response.status_code == 200
json_object = response.json()
assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert json_object["content_type"] == "updated/content_type"
assert json_object["cdn_object"] == "folder/file"
expected_languages = {"ell", "bod"}
assert {
lang["language_code"] for lang in json_object["languages"]
} == expected_languages

# Now Check the db
doc = (
test_db.query(FamilyDocument)
.filter(FamilyDocument.import_id == import_id)
.one()
.physical_document
)
assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert doc.content_type == "updated/content_type"
assert doc.cdn_object == "folder/file"

doc_languages = (
test_db.query(PhysicalDocumentLanguage)
.filter(PhysicalDocumentLanguage.document_id == doc.id)
.all()
)
assert len(doc_languages) == 2
for doc_lang in doc_languages:
lang = test_db.query(Language).filter(Language.id == doc_lang.language_id).one()
assert lang.language_code in expected_languages


@pytest.mark.parametrize(
"import_id",
[
"CCLW.executive.1.2",
"UNFCCC.non-party.2.2",
],
)
def test_update_document__logs_warning_on_four_letter_language(
client: TestClient,
superuser_token_headers: dict[str, str],
test_db: Session,
mocker: Callable[..., Generator[MockerFixture, None, None]],
import_id: str,
):
"""Send a payload to assert that languages that are too long aren't added and a warning is logged."""
setup_with_multiple_docs(
test_db, mocker, doc_data=TWO_DFC_ROW_DIFFERENT_ORG, event_data=TWO_EVENT_ROWS
)

# Payload with a four letter language code that won't exist in the db
payload = {
"md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76",
"content_type": "updated/content_type",
"cdn_object": "folder/file",
"languages": ["boda"],
}

from app.api.api_v1.routers.admin import _LOGGER

log_spy = mocker.spy(_LOGGER, "warning")

response = client.put(
f"/api/v1/admin/documents/{import_id}",
headers=superuser_token_headers,
json=payload,
)

assert response.status_code == 200
json_object = response.json()
assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert json_object["content_type"] == "updated/content_type"
assert json_object["cdn_object"] == "folder/file"
assert {language["language_code"] for language in json_object["languages"]} == set()

assert log_spy.call_args_list[0].args[0] == "Retrieved no language from database for meta_data object " \
"language."
assert len(log_spy.call_args_list) == 1

# Now Check the db
doc = (
test_db.query(FamilyDocument)
.filter(FamilyDocument.import_id == import_id)
.one()
.physical_document
)
assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert doc.content_type == "updated/content_type"
assert doc.cdn_object == "folder/file"

languages = (
test_db.query(PhysicalDocumentLanguage)
.filter(PhysicalDocumentLanguage.document_id == doc.id)
.all()
)
assert len(languages) == 0


@pytest.mark.parametrize(
"import_id",
[
Expand Down

0 comments on commit ac8058c

Please sign in to comment.