From 5ec8357023dcf8c0d0c38901b84770532ee7e0a2 Mon Sep 17 00:00:00 2001
From: Michael Charlton <m.charlton@mac.com>
Date: Thu, 21 Sep 2023 12:33:02 +0100
Subject: [PATCH] refactor(test): add tests for utils module (resolves #50)

* Unit tests for `utils` module
* Edit some error messages
* Add type annotations for `mypy` checks
---
 src/scribe_data/utils.py        |  58 ++++---
 tests/load/test_update_utils.py | 273 +++++++++++++++++++++++++++++++-
 2 files changed, 306 insertions(+), 25 deletions(-)

diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py
index e86acea65..da7d48f35 100644
--- a/src/scribe_data/utils.py
+++ b/src/scribe_data/utils.py
@@ -17,13 +17,15 @@
     get_ios_data_path,
     get_android_data_path,
     get_desktop_data_path,
-    check_command_line_args
+    check_command_line_args,
+    check_and_return_command_line_args
 """
 
 import ast
+from typing import Any
 
 
-def get_scribe_languages():
+def get_scribe_languages() -> list[str]:
     """
     Returns the list of currently implemented Scribe languages.
     """
@@ -39,7 +41,7 @@ def get_scribe_languages():
     ]
 
 
-def get_language_qid(language):
+def get_language_qid(language: str) -> str:
     """
     Returns the QID of the given language.
 
@@ -67,13 +69,13 @@ def get_language_qid(language):
 
     if language not in language_qid_dict:
         raise ValueError(
-            f"{language.upper()} is not currently not a supported language for QID conversion."
+            f"{language.upper()} is currently not a supported language for QID conversion."
         )
 
     return language_qid_dict[language]
 
 
-def get_language_iso(language):
+def get_language_iso(language: str) -> str:
     """
     Returns the ISO code of the given language.
 
@@ -101,13 +103,13 @@ def get_language_iso(language):
 
     if language not in language_iso_dict:
         raise ValueError(
-            f"{language.capitalize()} is not currently not a supported language for ISO conversion."
+            f"{language.capitalize()} is currently not a supported language for ISO conversion."
         )
 
     return language_iso_dict[language]
 
 
-def get_language_from_iso(iso):
+def get_language_from_iso(iso: str) -> str:
     """
     Returns the language name for the given ISO.
 
@@ -134,14 +136,12 @@ def get_language_from_iso(iso):
     }
 
     if iso not in iso_language_dict:
-        raise ValueError(
-            f"{iso.upper()} is not currently not a supported ISO for language conversion."
-        )
+        raise ValueError(f"{iso.upper()} is currently not a supported ISO language.")
 
     return iso_language_dict[iso]
 
 
-def get_language_words_to_remove(language):
+def get_language_words_to_remove(language: str) -> list[str]:
     """
     Returns the words that should not be included as autosuggestions for the given language.
 
@@ -155,7 +155,7 @@ def get_language_words_to_remove(language):
         The words that should not be included as autosuggestions for the given language as values of a dictionary.
     """
     language = language.lower()
-    language_iso_dict = {
+    words_to_remove: dict[str, list[str]] = {
         "english": [
             "of",
             "the",
@@ -181,10 +181,15 @@ def get_language_words_to_remove(language):
         "swedish": ["of", "the", "The", "and", "Checklist", "Catalogue"],
     }
 
-    return language_iso_dict[language]
+    if language not in words_to_remove:
+        raise ValueError(
+            f"{language.capitalize()} is currently not a supported language."
+        )
 
+    return words_to_remove[language]
 
-def get_language_words_to_ignore(language):
+
+def get_language_words_to_ignore(language: str) -> list[str]:
     """
     Returns the words that should not be included as autosuggestions for the given language.
 
@@ -198,7 +203,7 @@ def get_language_words_to_ignore(language):
         The words that should not be included as autosuggestions for the given language as values of a dictionary.
     """
     language = language.lower()
-    language_iso_dict = {
+    words_to_ignore: dict[str, list[str]] = {
         "french": [
             "XXe",
         ],
@@ -210,31 +215,36 @@ def get_language_words_to_ignore(language):
         "swedish": ["databasdump"],
     }
 
-    return language_iso_dict[language]
+    if language not in words_to_ignore:
+        raise ValueError(
+            f"{language.capitalize()} is currently not a supported language."
+        )
+
+    return words_to_ignore[language]
 
 
-def get_path_from_format_file():
+def get_path_from_format_file() -> str:
     """
     Returns the directory path from a data formatting file to scribe-org.
     """
     return "../../../../../.."
 
 
-def get_path_from_load_dir():
+def get_path_from_load_dir() -> str:
     """
     Returns the directory path from the load directory to scribe-org.
     """
     return "../../../.."
 
 
-def get_path_from_et_dir():
+def get_path_from_et_dir() -> str:
     """
     Returns the directory path from the extract_transform directory to scribe-org.
     """
     return "../../../.."
 
 
-def get_ios_data_path(language: str):
+def get_ios_data_path(language: str) -> str:
     """
     Returns the path to the data json of the iOS app given a language.
 
@@ -250,7 +260,7 @@ def get_ios_data_path(language: str):
     return f"/Scribe-iOS/Keyboards/LanguageKeyboards/{language}"
 
 
-def get_android_data_path(language: str):
+def get_android_data_path(language: str) -> str:
     """
     Returns the path to the data json of the Android app given a language.
 
@@ -266,7 +276,7 @@ def get_android_data_path(language: str):
     return f"/Scribe-Android/app/src/main/LanguageKeyboards/{language}"
 
 
-def get_desktop_data_path(language: str):
+def get_desktop_data_path(language: str) -> str:
     """
     Returns the path to the data json of the desktop app given a language.
 
@@ -282,7 +292,9 @@ def get_desktop_data_path(language: str):
     return f"/Scribe-Desktop/scribe/language_guis/{language}"
 
 
-def check_command_line_args(file_name, passed_values, values_to_check):
+def check_command_line_args(
+    file_name: str, passed_values: Any, values_to_check: list[str]
+) -> list[str]:
     """
     Checks command line arguments passed to Scribe-Data files.
 
diff --git a/tests/load/test_update_utils.py b/tests/load/test_update_utils.py
index d594aacf2..b8722ac79 100644
--- a/tests/load/test_update_utils.py
+++ b/tests/load/test_update_utils.py
@@ -1,5 +1,274 @@
+import unittest
+import pytest
+
 from scribe_data import utils
 
 
-def test_get_language_qid():
-    assert utils.get_language_qid("french") == "Q150"
+def test_get_scribe_languages():
+    test_case = unittest.TestCase()
+
+    # test for content, not order
+    test_case.assertCountEqual(
+        utils.get_scribe_languages(),
+        [
+            "English",
+            "French",
+            "German",
+            "Italian",
+            "Portuguese",
+            "Russian",
+            "Spanish",
+            "Swedish",
+        ],
+    )
+
+
+@pytest.mark.parametrize(
+    "language, qid_code",
+    [
+        ("English", "Q1860"),
+        ("french", "Q150"),
+        ("GERMAN", "Q188"),
+        ("iTalian", "Q652"),
+        ("poRTUGuese", "Q5146"),
+        ("russian", "Q7737"),
+        ("spanish", "Q1321"),
+        ("swedish", "Q9027"),
+    ],
+)
+def test_get_language_qid_positive(language, qid_code):
+    assert utils.get_language_qid(language) == qid_code
+
+
+def test_get_language_qid_negative():
+    with pytest.raises(ValueError) as excp:
+        _ = utils.get_language_qid("Newspeak")
+
+    assert (
+        str(excp.value)
+        == "NEWSPEAK is currently not a supported language for QID conversion."
+    )
+
+
+@pytest.mark.parametrize(
+    "language, iso_code",
+    [
+        ("English", "en"),
+        ("french", "fr"),
+        ("GERMAN", "de"),
+        ("iTalian", "it"),
+        ("poRTUGuese", "pt"),
+        ("russian", "ru"),
+        ("spanish", "es"),
+        ("SwedisH", "sv"),
+    ],
+)
+def test_get_language_iso_positive(language, iso_code):
+    assert utils.get_language_iso(language) == iso_code
+
+
+def test_get_language_iso_negative():
+    with pytest.raises(ValueError) as excp:
+        _ = utils.get_language_iso("gibberish")
+
+    assert (
+        str(excp.value)
+        == "Gibberish is currently not a supported language for ISO conversion."
+    )
+
+
+@pytest.mark.parametrize(
+    "iso_code, language",
+    [
+        ("en", "English"),
+        ("fr", "French"),
+        ("de", "German"),
+        ("it", "Italian"),
+        ("pt", "Portuguese"),
+        ("ru", "Russian"),
+        ("es", "Spanish"),
+        ("sv", "Swedish"),
+    ],
+)
+def test_get_language_from_iso_positive(iso_code, language):
+    assert utils.get_language_from_iso(iso_code) == language
+
+
+def test_get_language_from_iso_negative():
+    with pytest.raises(ValueError) as excp:
+        _ = utils.get_language_from_iso("ixi")
+
+    assert str(excp.value) == "IXI is currently not a supported ISO language."
+
+
+@pytest.mark.parametrize(
+    "language, remove_words",
+    [
+        (
+            "english",
+            [
+                "of",
+                "the",
+                "The",
+                "and",
+            ],
+        ),
+        (
+            "french",
+            [
+                "of",
+                "the",
+                "The",
+                "and",
+            ],
+        ),
+        ("german", ["of", "the", "The", "and", "NeinJa", "et", "redirect"]),
+        ("italian", ["of", "the", "The", "and", "text", "from"]),
+        ("portuguese", ["of", "the", "The", "and", "jbutadptflora"]),
+        (
+            "russian",
+            [
+                "of",
+                "the",
+                "The",
+                "and",
+            ],
+        ),
+        ("spanish", ["of", "the", "The", "and"]),
+        ("swedish", ["of", "the", "The", "and", "Checklist", "Catalogue"]),
+    ],
+)
+def test_get_language_words_to_remove(language, remove_words):
+    test_case = unittest.TestCase()
+
+    # ignore order, only content matters
+    test_case.assertCountEqual(
+        utils.get_language_words_to_remove(language), remove_words
+    )
+
+
+def test_get_language_words_to_remove_negative():
+    with pytest.raises(ValueError) as excp:
+        _ = utils.get_language_words_to_remove("python")
+
+    assert str(excp.value) == "Python is currently not a supported language."
+
+
+@pytest.mark.parametrize(
+    "language, ignore_words",
+    [
+        (
+            "french",
+            [
+                "XXe",
+            ],
+        ),
+        ("german", ["Gemeinde", "Familienname"]),
+        ("italian", ["The", "ATP"]),
+        ("portuguese", []),
+        ("russian", []),
+        ("spanish", []),
+        ("swedish", ["databasdump"]),
+    ],
+)
+def test_get_language_words_to_ignore(language, ignore_words):
+    test_case = unittest.TestCase()
+
+    # ignore order, only content matters
+    test_case.assertCountEqual(
+        utils.get_language_words_to_ignore(language), ignore_words
+    )
+
+
+def test_get_language_words_to_ignore_negative():
+    with pytest.raises(ValueError) as excp:
+        _ = utils.get_language_words_to_ignore("JAVA")
+
+    assert str(excp.value) == "Java is currently not a supported language."
+
+
+def test_get_path_from_format_file():
+    assert utils.get_path_from_format_file() == "../../../../../.."
+
+
+def test_get_path_from_load_dir():
+    assert utils.get_path_from_load_dir() == "../../../.."
+
+
+def test_get_path_from_et_dir():
+    # TODO: file path is same as above. Is this correct?
+    assert utils.get_path_from_et_dir() == "../../../.."
+
+
+def test_get_ios_data_path():
+    assert (
+        utils.get_ios_data_path("suomi")
+        == "/Scribe-iOS/Keyboards/LanguageKeyboards/suomi"
+    )
+
+
+def test_get_android_data_path():
+    assert (
+        utils.get_android_data_path("Robbie")
+        == "/Scribe-Android/app/src/main/LanguageKeyboards/Robbie"
+    )
+
+
+def test_get_desktop_data_path():
+    assert (
+        utils.get_desktop_data_path("PAVEMENT")
+        == "/Scribe-Desktop/scribe/language_guis/PAVEMENT"
+    )
+
+
+@pytest.mark.parametrize(
+    "passed_values, values_to_check, expected",
+    [
+        ("['1', '2', '3']", ["1", "2", "3"], ["1", "2", "3"]),
+        ("['1', '3', '2']", ["1", "2", "3"], ["1", "3", "2"]),
+        ("['1', '2']", ["1", "2", "3"], ["1", "2"]),
+        ("['abc']", ["def", "abc", "ghi"], ["abc"]),
+        ("[]", ["1", "2", "3"], []),
+    ],
+)
+def test_check_command_line_args_positive(passed_values, values_to_check, expected):
+    assert (
+        utils.check_command_line_args("pass.txt", passed_values, values_to_check)
+        == expected
+    )
+
+
+def test_check_command_line_args_fail_not_subset():
+    with pytest.raises(ValueError):
+        _ = utils.check_command_line_args("Fail.txt", "['1', '2', '3']", ["1", "2"])
+
+
+def test_check_command_line_args_passed_values_not_list():
+    with pytest.raises(ValueError):
+        _ = utils.check_command_line_args("Fail.txt", "('1', '2', '3')", ["1", "2"])
+
+
+def test_check_command_line_args_passed_values_invalid_arg():
+    with pytest.raises(ValueError):
+        _ = utils.check_command_line_args("Fail.txt", 3, ["3"])
+
+
+def test_check_and_return_command_line_args_one_arg():
+    assert utils.check_and_return_command_line_args(["1"]) == (None, None)
+
+
+def test_check_and_return_command_line_args_two_args():
+    assert utils.check_and_return_command_line_args(
+        ["a.txt", '["1","2"]'], ["1", "2", "3"], ["1", "2", "3"]
+    ) == (["1", "2"], None)
+
+
+def test_check_and_return_command_line_args_three_args():
+    assert utils.check_and_return_command_line_args(
+        ["a.txt", '["1","2"]', '["3"]'], ["1", "2", "3"], ["1", "2", "3"]
+    ) == (["1", "2"], ["3"])
+
+
+def test_check_and_return_command_line_args_too_many_args():
+    with pytest.raises(ValueError):
+        _ = utils.check_and_return_command_line_args(["a", "b", "c", "d"])