From 5ec8357023dcf8c0d0c38901b84770532ee7e0a2 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Thu, 21 Sep 2023 12:33:02 +0100 Subject: [PATCH] refactor(test): add tests for utils module (resolves #50) * Unit tests for `utils` module * Edit some error messages * Add type annotations for `mypy` checks --- src/scribe_data/utils.py | 58 ++++--- tests/load/test_update_utils.py | 273 +++++++++++++++++++++++++++++++- 2 files changed, 306 insertions(+), 25 deletions(-) diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py index e86acea65..da7d48f35 100644 --- a/src/scribe_data/utils.py +++ b/src/scribe_data/utils.py @@ -17,13 +17,15 @@ get_ios_data_path, get_android_data_path, get_desktop_data_path, - check_command_line_args + check_command_line_args, + check_and_return_command_line_args """ import ast +from typing import Any -def get_scribe_languages(): +def get_scribe_languages() -> list[str]: """ Returns the list of currently implemented Scribe languages. """ @@ -39,7 +41,7 @@ def get_scribe_languages(): ] -def get_language_qid(language): +def get_language_qid(language: str) -> str: """ Returns the QID of the given language. @@ -67,13 +69,13 @@ def get_language_qid(language): if language not in language_qid_dict: raise ValueError( - f"{language.upper()} is not currently not a supported language for QID conversion." + f"{language.upper()} is currently not a supported language for QID conversion." ) return language_qid_dict[language] -def get_language_iso(language): +def get_language_iso(language: str) -> str: """ Returns the ISO code of the given language. @@ -101,13 +103,13 @@ def get_language_iso(language): if language not in language_iso_dict: raise ValueError( - f"{language.capitalize()} is not currently not a supported language for ISO conversion." + f"{language.capitalize()} is currently not a supported language for ISO conversion." ) return language_iso_dict[language] -def get_language_from_iso(iso): +def get_language_from_iso(iso: str) -> str: """ Returns the language name for the given ISO. @@ -134,14 +136,12 @@ def get_language_from_iso(iso): } if iso not in iso_language_dict: - raise ValueError( - f"{iso.upper()} is not currently not a supported ISO for language conversion." - ) + raise ValueError(f"{iso.upper()} is currently not a supported ISO language.") return iso_language_dict[iso] -def get_language_words_to_remove(language): +def get_language_words_to_remove(language: str) -> list[str]: """ Returns the words that should not be included as autosuggestions for the given language. @@ -155,7 +155,7 @@ def get_language_words_to_remove(language): The words that should not be included as autosuggestions for the given language as values of a dictionary. """ language = language.lower() - language_iso_dict = { + words_to_remove: dict[str, list[str]] = { "english": [ "of", "the", @@ -181,10 +181,15 @@ def get_language_words_to_remove(language): "swedish": ["of", "the", "The", "and", "Checklist", "Catalogue"], } - return language_iso_dict[language] + if language not in words_to_remove: + raise ValueError( + f"{language.capitalize()} is currently not a supported language." + ) + return words_to_remove[language] -def get_language_words_to_ignore(language): + +def get_language_words_to_ignore(language: str) -> list[str]: """ Returns the words that should not be included as autosuggestions for the given language. @@ -198,7 +203,7 @@ def get_language_words_to_ignore(language): The words that should not be included as autosuggestions for the given language as values of a dictionary. """ language = language.lower() - language_iso_dict = { + words_to_ignore: dict[str, list[str]] = { "french": [ "XXe", ], @@ -210,31 +215,36 @@ def get_language_words_to_ignore(language): "swedish": ["databasdump"], } - return language_iso_dict[language] + if language not in words_to_ignore: + raise ValueError( + f"{language.capitalize()} is currently not a supported language." + ) + + return words_to_ignore[language] -def get_path_from_format_file(): +def get_path_from_format_file() -> str: """ Returns the directory path from a data formatting file to scribe-org. """ return "../../../../../.." -def get_path_from_load_dir(): +def get_path_from_load_dir() -> str: """ Returns the directory path from the load directory to scribe-org. """ return "../../../.." -def get_path_from_et_dir(): +def get_path_from_et_dir() -> str: """ Returns the directory path from the extract_transform directory to scribe-org. """ return "../../../.." -def get_ios_data_path(language: str): +def get_ios_data_path(language: str) -> str: """ Returns the path to the data json of the iOS app given a language. @@ -250,7 +260,7 @@ def get_ios_data_path(language: str): return f"/Scribe-iOS/Keyboards/LanguageKeyboards/{language}" -def get_android_data_path(language: str): +def get_android_data_path(language: str) -> str: """ Returns the path to the data json of the Android app given a language. @@ -266,7 +276,7 @@ def get_android_data_path(language: str): return f"/Scribe-Android/app/src/main/LanguageKeyboards/{language}" -def get_desktop_data_path(language: str): +def get_desktop_data_path(language: str) -> str: """ Returns the path to the data json of the desktop app given a language. @@ -282,7 +292,9 @@ def get_desktop_data_path(language: str): return f"/Scribe-Desktop/scribe/language_guis/{language}" -def check_command_line_args(file_name, passed_values, values_to_check): +def check_command_line_args( + file_name: str, passed_values: Any, values_to_check: list[str] +) -> list[str]: """ Checks command line arguments passed to Scribe-Data files. diff --git a/tests/load/test_update_utils.py b/tests/load/test_update_utils.py index d594aacf2..b8722ac79 100644 --- a/tests/load/test_update_utils.py +++ b/tests/load/test_update_utils.py @@ -1,5 +1,274 @@ +import unittest +import pytest + from scribe_data import utils -def test_get_language_qid(): - assert utils.get_language_qid("french") == "Q150" +def test_get_scribe_languages(): + test_case = unittest.TestCase() + + # test for content, not order + test_case.assertCountEqual( + utils.get_scribe_languages(), + [ + "English", + "French", + "German", + "Italian", + "Portuguese", + "Russian", + "Spanish", + "Swedish", + ], + ) + + +@pytest.mark.parametrize( + "language, qid_code", + [ + ("English", "Q1860"), + ("french", "Q150"), + ("GERMAN", "Q188"), + ("iTalian", "Q652"), + ("poRTUGuese", "Q5146"), + ("russian", "Q7737"), + ("spanish", "Q1321"), + ("swedish", "Q9027"), + ], +) +def test_get_language_qid_positive(language, qid_code): + assert utils.get_language_qid(language) == qid_code + + +def test_get_language_qid_negative(): + with pytest.raises(ValueError) as excp: + _ = utils.get_language_qid("Newspeak") + + assert ( + str(excp.value) + == "NEWSPEAK is currently not a supported language for QID conversion." + ) + + +@pytest.mark.parametrize( + "language, iso_code", + [ + ("English", "en"), + ("french", "fr"), + ("GERMAN", "de"), + ("iTalian", "it"), + ("poRTUGuese", "pt"), + ("russian", "ru"), + ("spanish", "es"), + ("SwedisH", "sv"), + ], +) +def test_get_language_iso_positive(language, iso_code): + assert utils.get_language_iso(language) == iso_code + + +def test_get_language_iso_negative(): + with pytest.raises(ValueError) as excp: + _ = utils.get_language_iso("gibberish") + + assert ( + str(excp.value) + == "Gibberish is currently not a supported language for ISO conversion." + ) + + +@pytest.mark.parametrize( + "iso_code, language", + [ + ("en", "English"), + ("fr", "French"), + ("de", "German"), + ("it", "Italian"), + ("pt", "Portuguese"), + ("ru", "Russian"), + ("es", "Spanish"), + ("sv", "Swedish"), + ], +) +def test_get_language_from_iso_positive(iso_code, language): + assert utils.get_language_from_iso(iso_code) == language + + +def test_get_language_from_iso_negative(): + with pytest.raises(ValueError) as excp: + _ = utils.get_language_from_iso("ixi") + + assert str(excp.value) == "IXI is currently not a supported ISO language." + + +@pytest.mark.parametrize( + "language, remove_words", + [ + ( + "english", + [ + "of", + "the", + "The", + "and", + ], + ), + ( + "french", + [ + "of", + "the", + "The", + "and", + ], + ), + ("german", ["of", "the", "The", "and", "NeinJa", "et", "redirect"]), + ("italian", ["of", "the", "The", "and", "text", "from"]), + ("portuguese", ["of", "the", "The", "and", "jbutadptflora"]), + ( + "russian", + [ + "of", + "the", + "The", + "and", + ], + ), + ("spanish", ["of", "the", "The", "and"]), + ("swedish", ["of", "the", "The", "and", "Checklist", "Catalogue"]), + ], +) +def test_get_language_words_to_remove(language, remove_words): + test_case = unittest.TestCase() + + # ignore order, only content matters + test_case.assertCountEqual( + utils.get_language_words_to_remove(language), remove_words + ) + + +def test_get_language_words_to_remove_negative(): + with pytest.raises(ValueError) as excp: + _ = utils.get_language_words_to_remove("python") + + assert str(excp.value) == "Python is currently not a supported language." + + +@pytest.mark.parametrize( + "language, ignore_words", + [ + ( + "french", + [ + "XXe", + ], + ), + ("german", ["Gemeinde", "Familienname"]), + ("italian", ["The", "ATP"]), + ("portuguese", []), + ("russian", []), + ("spanish", []), + ("swedish", ["databasdump"]), + ], +) +def test_get_language_words_to_ignore(language, ignore_words): + test_case = unittest.TestCase() + + # ignore order, only content matters + test_case.assertCountEqual( + utils.get_language_words_to_ignore(language), ignore_words + ) + + +def test_get_language_words_to_ignore_negative(): + with pytest.raises(ValueError) as excp: + _ = utils.get_language_words_to_ignore("JAVA") + + assert str(excp.value) == "Java is currently not a supported language." + + +def test_get_path_from_format_file(): + assert utils.get_path_from_format_file() == "../../../../../.." + + +def test_get_path_from_load_dir(): + assert utils.get_path_from_load_dir() == "../../../.." + + +def test_get_path_from_et_dir(): + # TODO: file path is same as above. Is this correct? + assert utils.get_path_from_et_dir() == "../../../.." + + +def test_get_ios_data_path(): + assert ( + utils.get_ios_data_path("suomi") + == "/Scribe-iOS/Keyboards/LanguageKeyboards/suomi" + ) + + +def test_get_android_data_path(): + assert ( + utils.get_android_data_path("Robbie") + == "/Scribe-Android/app/src/main/LanguageKeyboards/Robbie" + ) + + +def test_get_desktop_data_path(): + assert ( + utils.get_desktop_data_path("PAVEMENT") + == "/Scribe-Desktop/scribe/language_guis/PAVEMENT" + ) + + +@pytest.mark.parametrize( + "passed_values, values_to_check, expected", + [ + ("['1', '2', '3']", ["1", "2", "3"], ["1", "2", "3"]), + ("['1', '3', '2']", ["1", "2", "3"], ["1", "3", "2"]), + ("['1', '2']", ["1", "2", "3"], ["1", "2"]), + ("['abc']", ["def", "abc", "ghi"], ["abc"]), + ("[]", ["1", "2", "3"], []), + ], +) +def test_check_command_line_args_positive(passed_values, values_to_check, expected): + assert ( + utils.check_command_line_args("pass.txt", passed_values, values_to_check) + == expected + ) + + +def test_check_command_line_args_fail_not_subset(): + with pytest.raises(ValueError): + _ = utils.check_command_line_args("Fail.txt", "['1', '2', '3']", ["1", "2"]) + + +def test_check_command_line_args_passed_values_not_list(): + with pytest.raises(ValueError): + _ = utils.check_command_line_args("Fail.txt", "('1', '2', '3')", ["1", "2"]) + + +def test_check_command_line_args_passed_values_invalid_arg(): + with pytest.raises(ValueError): + _ = utils.check_command_line_args("Fail.txt", 3, ["3"]) + + +def test_check_and_return_command_line_args_one_arg(): + assert utils.check_and_return_command_line_args(["1"]) == (None, None) + + +def test_check_and_return_command_line_args_two_args(): + assert utils.check_and_return_command_line_args( + ["a.txt", '["1","2"]'], ["1", "2", "3"], ["1", "2", "3"] + ) == (["1", "2"], None) + + +def test_check_and_return_command_line_args_three_args(): + assert utils.check_and_return_command_line_args( + ["a.txt", '["1","2"]', '["3"]'], ["1", "2", "3"], ["1", "2", "3"] + ) == (["1", "2"], ["3"]) + + +def test_check_and_return_command_line_args_too_many_args(): + with pytest.raises(ValueError): + _ = utils.check_and_return_command_line_args(["a", "b", "c", "d"])