Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making updates to handle different iso codes. #155

Merged
merged 6 commits into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 39 additions & 5 deletions app/api/api_v1/routers/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ async def update_document(
if meta_data.languages is not None:
_LOGGER.info(
"Adding meta_data object languages to the database.",
extra={"props": {"meta_data_languages": meta_data.languages}},
extra={
"props": {
"meta_data_languages": meta_data.languages,
"import_id_or_slug": import_id_or_slug,
}
},
)

physical_document_languages = (
Expand All @@ -97,10 +102,39 @@ async def update_document(
}

for language in meta_data.languages:
lang = (
db.query(Language)
.filter(Language.language_code == language)
.one_or_none()
if len(language) == 2: # iso639-1 two letter language code
lang = (
db.query(Language)
.filter(Language.part1_code == language)
.one_or_none()
)
elif len(language) == 3: # iso639-2/3 three letter language code
lang = (
db.query(Language)
.filter(Language.language_code == language)
.one_or_none()
)
joel-wright marked this conversation as resolved.
Show resolved Hide resolved
else:
_LOGGER.warning(
"Retrieved no language from database for meta_data object language.",
extra={
"props": {
"metadata_language": language,
"import_id_or_slug": import_id_or_slug,
}
},
)
lang = None

_LOGGER.info(
"Retrieved language from database for meta_data object language.",
extra={
"props": {
"metadata_language": language,
"db_language": (None if lang is None else lang.language_code),
"import_id_or_slug": import_id_or_slug,
}
},
)
if lang is not None and language not in existing_language_codes:
physical_document_language = PhysicalDocumentLanguage(
Expand Down
2 changes: 2 additions & 0 deletions app/db/models/document/physical_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from app.db.session import Base


# TODO Our current process for updating languages in the database relies on all the part1_code's being unique/null.
# Thus, we should enforce null or uniqueness on part1_code's in the database.
class Language(Base):
"""
A language used to identify the content of a document.
Expand Down
175 changes: 175 additions & 0 deletions tests/routes/test_document_families.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,181 @@ def test_update_document__works_on_new_language(
assert lang.language_code in expected_languages


@pytest.mark.parametrize(
"import_id",
[
"CCLW.executive.1.2",
"UNFCCC.non-party.2.2",
],
)
def test_update_document__works_on_new_iso_639_1_language(
client: TestClient,
superuser_token_headers: dict[str, str],
test_db: Session,
mocker: Callable[..., Generator[MockerFixture, None, None]],
THOR300 marked this conversation as resolved.
Show resolved Hide resolved
import_id: str,
):
"""Send two payloads in series to assert that languages are additive and we don't remove existing languages."""
setup_with_multiple_docs(
test_db, mocker, doc_data=TWO_DFC_ROW_DIFFERENT_ORG, event_data=TWO_EVENT_ROWS
)

# ADD THE FIRST LANGUAGE
payload = {
"md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76",
"content_type": "updated/content_type",
"cdn_object": "folder/file",
"languages": ["bo"],
}

response = client.put(
f"/api/v1/admin/documents/{import_id}",
headers=superuser_token_headers,
json=payload,
)

assert response.status_code == 200
json_object = response.json()
assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert json_object["content_type"] == "updated/content_type"
assert json_object["cdn_object"] == "folder/file"
assert {language["language_code"] for language in json_object["languages"]} == {
"bod"
}

# Now Check the db
doc = (
test_db.query(FamilyDocument)
.filter(FamilyDocument.import_id == import_id)
.one()
.physical_document
)
assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert doc.content_type == "updated/content_type"
assert doc.cdn_object == "folder/file"

languages = (
test_db.query(PhysicalDocumentLanguage)
.filter(PhysicalDocumentLanguage.document_id == doc.id)
.all()
)
assert len(languages) == 1
lang = test_db.query(Language).filter(Language.id == languages[0].language_id).one()
assert lang.language_code == "bod"

# NOW ADD A NEW LANGUAGE TO CHECK THAT THE UPDATE IS ADDITIVE
payload = {
"md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76",
"content_type": "updated/content_type",
"cdn_object": "folder/file",
"languages": ["el"],
}

response = client.put(
f"/api/v1/admin/documents/{import_id}",
headers=superuser_token_headers,
json=payload,
)

assert response.status_code == 200
json_object = response.json()
assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert json_object["content_type"] == "updated/content_type"
assert json_object["cdn_object"] == "folder/file"
expected_languages = {"ell", "bod"}
assert {
lang["language_code"] for lang in json_object["languages"]
} == expected_languages

# Now Check the db
doc = (
test_db.query(FamilyDocument)
.filter(FamilyDocument.import_id == import_id)
.one()
.physical_document
)
assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert doc.content_type == "updated/content_type"
assert doc.cdn_object == "folder/file"

doc_languages = (
test_db.query(PhysicalDocumentLanguage)
.filter(PhysicalDocumentLanguage.document_id == doc.id)
.all()
)
assert len(doc_languages) == 2
for doc_lang in doc_languages:
lang = test_db.query(Language).filter(Language.id == doc_lang.language_id).one()
assert lang.language_code in expected_languages


@pytest.mark.parametrize(
"import_id",
[
"CCLW.executive.1.2",
"UNFCCC.non-party.2.2",
],
)
def test_update_document__logs_warning_on_four_letter_language(
client: TestClient,
superuser_token_headers: dict[str, str],
test_db: Session,
mocker: Callable[..., Generator[MockerFixture, None, None]],
import_id: str,
):
"""Send a payload to assert that languages that are too long aren't added and a warning is logged."""
setup_with_multiple_docs(
test_db, mocker, doc_data=TWO_DFC_ROW_DIFFERENT_ORG, event_data=TWO_EVENT_ROWS
)

# Payload with a four letter language code that won't exist in the db
payload = {
"md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76",
"content_type": "updated/content_type",
"cdn_object": "folder/file",
"languages": ["boda"],
}

from app.api.api_v1.routers.admin import _LOGGER

log_spy = mocker.spy(_LOGGER, "warning")

response = client.put(
f"/api/v1/admin/documents/{import_id}",
headers=superuser_token_headers,
json=payload,
)

assert response.status_code == 200
json_object = response.json()
assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert json_object["content_type"] == "updated/content_type"
assert json_object["cdn_object"] == "folder/file"
assert {language["language_code"] for language in json_object["languages"]} == set()

assert log_spy.call_args_list[0].args[0] == "Retrieved no language from database for meta_data object " \
"language."
assert len(log_spy.call_args_list) == 1

# Now Check the db
doc = (
test_db.query(FamilyDocument)
.filter(FamilyDocument.import_id == import_id)
.one()
.physical_document
)
assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76"
assert doc.content_type == "updated/content_type"
assert doc.cdn_object == "folder/file"

languages = (
test_db.query(PhysicalDocumentLanguage)
.filter(PhysicalDocumentLanguage.document_id == doc.id)
.all()
)
assert len(languages) == 0


@pytest.mark.parametrize(
"import_id",
[
Expand Down