diff --git a/app/api/api_v1/routers/admin.py b/app/api/api_v1/routers/admin.py index 74b07afc..18e0f508 100644 --- a/app/api/api_v1/routers/admin.py +++ b/app/api/api_v1/routers/admin.py @@ -83,7 +83,12 @@ async def update_document( if meta_data.languages is not None: _LOGGER.info( "Adding meta_data object languages to the database.", - extra={"props": {"meta_data_languages": meta_data.languages}}, + extra={ + "props": { + "meta_data_languages": meta_data.languages, + "import_id_or_slug": import_id_or_slug, + } + }, ) physical_document_languages = ( @@ -97,10 +102,39 @@ async def update_document( } for language in meta_data.languages: - lang = ( - db.query(Language) - .filter(Language.language_code == language) - .one_or_none() + if len(language) == 2: # iso639-1 two letter language code + lang = ( + db.query(Language) + .filter(Language.part1_code == language) + .one_or_none() + ) + elif len(language) == 3: # iso639-2/3 three letter language code + lang = ( + db.query(Language) + .filter(Language.language_code == language) + .one_or_none() + ) + else: + _LOGGER.warning( + "Retrieved no language from database for meta_data object language.", + extra={ + "props": { + "metadata_language": language, + "import_id_or_slug": import_id_or_slug, + } + }, + ) + lang = None + + _LOGGER.info( + "Retrieved language from database for meta_data object language.", + extra={ + "props": { + "metadata_language": language, + "db_language": (None if lang is None else lang.language_code), + "import_id_or_slug": import_id_or_slug, + } + }, ) if lang is not None and language not in existing_language_codes: physical_document_language = PhysicalDocumentLanguage( diff --git a/app/db/models/document/physical_document.py b/app/db/models/document/physical_document.py index d16472aa..83b11c81 100644 --- a/app/db/models/document/physical_document.py +++ b/app/db/models/document/physical_document.py @@ -6,6 +6,8 @@ from app.db.session import Base +# TODO Our current process for updating languages in the database relies on all the part1_code's being unique/null. +# Thus, we should enforce null or uniqueness on part1_code's in the database. class Language(Base): """ A language used to identify the content of a document. diff --git a/tests/routes/test_document_families.py b/tests/routes/test_document_families.py index d77b0a44..04a3af26 100644 --- a/tests/routes/test_document_families.py +++ b/tests/routes/test_document_families.py @@ -516,6 +516,181 @@ def test_update_document__works_on_new_language( assert lang.language_code in expected_languages +@pytest.mark.parametrize( + "import_id", + [ + "CCLW.executive.1.2", + "UNFCCC.non-party.2.2", + ], +) +def test_update_document__works_on_new_iso_639_1_language( + client: TestClient, + superuser_token_headers: dict[str, str], + test_db: Session, + mocker: Callable[..., Generator[MockerFixture, None, None]], + import_id: str, +): + """Send two payloads in series to assert that languages are additive and we don't remove existing languages.""" + setup_with_multiple_docs( + test_db, mocker, doc_data=TWO_DFC_ROW_DIFFERENT_ORG, event_data=TWO_EVENT_ROWS + ) + + # ADD THE FIRST LANGUAGE + payload = { + "md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76", + "content_type": "updated/content_type", + "cdn_object": "folder/file", + "languages": ["bo"], + } + + response = client.put( + f"/api/v1/admin/documents/{import_id}", + headers=superuser_token_headers, + json=payload, + ) + + assert response.status_code == 200 + json_object = response.json() + assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76" + assert json_object["content_type"] == "updated/content_type" + assert json_object["cdn_object"] == "folder/file" + assert {language["language_code"] for language in json_object["languages"]} == { + "bod" + } + + # Now Check the db + doc = ( + test_db.query(FamilyDocument) + .filter(FamilyDocument.import_id == import_id) + .one() + .physical_document + ) + assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76" + assert doc.content_type == "updated/content_type" + assert doc.cdn_object == "folder/file" + + languages = ( + test_db.query(PhysicalDocumentLanguage) + .filter(PhysicalDocumentLanguage.document_id == doc.id) + .all() + ) + assert len(languages) == 1 + lang = test_db.query(Language).filter(Language.id == languages[0].language_id).one() + assert lang.language_code == "bod" + + # NOW ADD A NEW LANGUAGE TO CHECK THAT THE UPDATE IS ADDITIVE + payload = { + "md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76", + "content_type": "updated/content_type", + "cdn_object": "folder/file", + "languages": ["el"], + } + + response = client.put( + f"/api/v1/admin/documents/{import_id}", + headers=superuser_token_headers, + json=payload, + ) + + assert response.status_code == 200 + json_object = response.json() + assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76" + assert json_object["content_type"] == "updated/content_type" + assert json_object["cdn_object"] == "folder/file" + expected_languages = {"ell", "bod"} + assert { + lang["language_code"] for lang in json_object["languages"] + } == expected_languages + + # Now Check the db + doc = ( + test_db.query(FamilyDocument) + .filter(FamilyDocument.import_id == import_id) + .one() + .physical_document + ) + assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76" + assert doc.content_type == "updated/content_type" + assert doc.cdn_object == "folder/file" + + doc_languages = ( + test_db.query(PhysicalDocumentLanguage) + .filter(PhysicalDocumentLanguage.document_id == doc.id) + .all() + ) + assert len(doc_languages) == 2 + for doc_lang in doc_languages: + lang = test_db.query(Language).filter(Language.id == doc_lang.language_id).one() + assert lang.language_code in expected_languages + + +@pytest.mark.parametrize( + "import_id", + [ + "CCLW.executive.1.2", + "UNFCCC.non-party.2.2", + ], +) +def test_update_document__logs_warning_on_four_letter_language( + client: TestClient, + superuser_token_headers: dict[str, str], + test_db: Session, + mocker: Callable[..., Generator[MockerFixture, None, None]], + import_id: str, +): + """Send a payload to assert that languages that are too long aren't added and a warning is logged.""" + setup_with_multiple_docs( + test_db, mocker, doc_data=TWO_DFC_ROW_DIFFERENT_ORG, event_data=TWO_EVENT_ROWS + ) + + # Payload with a four letter language code that won't exist in the db + payload = { + "md5_sum": "c184214e-4870-48e0-adab-3e064b1b0e76", + "content_type": "updated/content_type", + "cdn_object": "folder/file", + "languages": ["boda"], + } + + from app.api.api_v1.routers.admin import _LOGGER + + log_spy = mocker.spy(_LOGGER, "warning") + + response = client.put( + f"/api/v1/admin/documents/{import_id}", + headers=superuser_token_headers, + json=payload, + ) + + assert response.status_code == 200 + json_object = response.json() + assert json_object["md5_sum"] == "c184214e-4870-48e0-adab-3e064b1b0e76" + assert json_object["content_type"] == "updated/content_type" + assert json_object["cdn_object"] == "folder/file" + assert {language["language_code"] for language in json_object["languages"]} == set() + + assert log_spy.call_args_list[0].args[0] == "Retrieved no language from database for meta_data object " \ + "language." + assert len(log_spy.call_args_list) == 1 + + # Now Check the db + doc = ( + test_db.query(FamilyDocument) + .filter(FamilyDocument.import_id == import_id) + .one() + .physical_document + ) + assert doc.md5_sum == "c184214e-4870-48e0-adab-3e064b1b0e76" + assert doc.content_type == "updated/content_type" + assert doc.cdn_object == "folder/file" + + languages = ( + test_db.query(PhysicalDocumentLanguage) + .filter(PhysicalDocumentLanguage.document_id == doc.id) + .all() + ) + assert len(languages) == 0 + + @pytest.mark.parametrize( "import_id", [