diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index bab97a1a8..17c07e1c1 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -7,6 +7,7 @@ Thank you for your pull request! 🚀
 <!-- Please replace the empty checkboxes [] below with checked ones [x] accordingly. -->
 
 - [] This pull request is on a [separate branch](https://docs.github.com/en/get-started/quickstart/github-flow) and not the main branch
+- [] I have tested my code with the `pytest` command as directed in the [testing section of the contributing guide](https://github.com/scribe-org/Scribe-Data/blob/main/CONTRIBUTING.md#testing)
 
 ---
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 376a954a7..2e44c618e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -15,6 +15,7 @@ If you have questions or would like to communicate with the team, please [join u
 - [First steps as a contributor](#first-steps)
 - [Learning the tech stack](#learning-the-tech)
 - [Development environment](#dev-env)
+- [Testing](#testing)
 - [Issues and projects](#issues-projects)
 - [Bug reports](#bug-reports)
 - [Feature requests](#feature-requests)
@@ -171,6 +172,16 @@ pip install -e .
 > [!NOTE]
 > Feel free to contact the team in the [Data room on Matrix](https://matrix.to/#/#ScribeData:matrix.org) if you're having problems getting your environment setup!
 
+<a id="testing"></a>
+
+## Testing [`⇧`](#contents)
+
+In addition to the [pre-commit](https://pre-commit.com/) hooks that are set up during the [development environment section](#dev-env), Scribe-Data also includes a testing suite that should be ran before all pull requests and subsequent commits. Please run the following in the project root:
+
+```bash
+pytest
+```
+
 <a id="issues-projects"></a>
 
 ## Issues and projects [`⇧`](#contents)
diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
index 4f59a65ef..e39e1621d 100644
--- a/src/scribe_data/cli/cli_utils.py
+++ b/src/scribe_data/cli/cli_utils.py
@@ -27,6 +27,8 @@
 
 from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
 
+# MARK: CLI Variables
+
 LANGUAGE_DATA_EXTRACTION_DIR = Path(__file__).parent.parent / "language_data_extraction"
 
 LANGUAGE_METADATA_FILE = (
@@ -53,14 +55,24 @@
     print(f"Error reading data type metadata: {e}")
 
 
-language_map = {
-    lang["language"].lower(): lang for lang in language_metadata["languages"]
-}
+language_map = {}
+language_to_qid = {}
+
+# Process each language and its potential sub-languages in one pass.
+for lang, lang_data in language_metadata.items():
+    lang_lower = lang.lower()
 
-# Create language_to_qid dictionary.
-language_to_qid = {
-    lang["language"].lower(): lang["qid"] for lang in language_metadata["languages"]
-}
+    # Handle sub-languages if they exist.
+    if "sub_languages" in lang_data:
+        for sub_lang, sub_lang_data in lang_data["sub_languages"].items():
+            sub_lang_lower = sub_lang.lower()
+            language_map[sub_lang_lower] = sub_lang_data
+            language_to_qid[sub_lang_lower] = sub_lang_data["qid"]
+
+    else:
+        # Handle the main language directly.
+        language_map[lang_lower] = lang_data
+        language_to_qid[lang_lower] = lang_data["qid"]
 
 
 # MARK: Correct Inputs
@@ -103,41 +115,37 @@ def print_formatted_data(data: Union[dict, list], data_type: str) -> None:
     if isinstance(data, dict):
         max_key_length = max((len(key) for key in data.keys()), default=0)
 
-        if data_type == "autosuggestions":
-            for key, value in data.items():
+        for key, value in data.items():
+            if data_type == "autosuggestions":
                 print(f"{key:<{max_key_length}} : {', '.join(value)}")
 
-        elif data_type == "emoji_keywords":
-            for key, value in data.items():
+            elif data_type == "emoji_keywords":
                 emojis = [item["emoji"] for item in value]
                 print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
 
-        elif data_type in {"prepositions"}:
-            for key, value in data.items():
+            elif data_type in {"prepositions"}:
                 print(f"{key:<{max_key_length}} : {value}")
 
-        else:
-            for key, value in data.items():
-                if isinstance(value, dict):
-                    print(f"{key:<{max_key_length}} : ")
-                    max_sub_key_length = max(
-                        (len(sub_key) for sub_key in value.keys()), default=0
-                    )
-                    for sub_key, sub_value in value.items():
-                        print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
-
-                elif isinstance(value, list):
-                    print(f"{key:<{max_key_length}} : ")
-                    for item in value:
-                        if isinstance(item, dict):
-                            for sub_key, sub_value in item.items():
-                                print(f"  {sub_key:<{max_key_length}} : {sub_value}")
-
-                        else:
-                            print(f"  {item}")
-
-                else:
-                    print(f"{key:<{max_key_length}} : {value}")
+            elif isinstance(value, dict):
+                print(f"{key:<{max_key_length}} : ")
+                max_sub_key_length = max(
+                    (len(sub_key) for sub_key in value.keys()), default=0
+                )
+                for sub_key, sub_value in value.items():
+                    print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
+
+            elif isinstance(value, list):
+                print(f"{key:<{max_key_length}} : ")
+                for item in value:
+                    if isinstance(item, dict):
+                        for sub_key, sub_value in item.items():
+                            print(f"  {sub_key:<{max_key_length}} : {sub_value}")
+
+                    else:
+                        print(f"  {item}")
+
+            else:
+                print(f"{key:<{max_key_length}} : {value}")
 
     elif isinstance(data, list):
         for item in data:
@@ -202,12 +210,12 @@ def validate_single_item(item, valid_options, item_type):
         ):
             closest_match = difflib.get_close_matches(item, valid_options, n=1)
             closest_match_str = (
-                f" The closest matching {item_type} is {closest_match[0]}."
+                f" The closest matching {item_type} is '{closest_match[0]}'."
                 if closest_match
                 else ""
             )
 
-            return f"Invalid {item_type} {item}.{closest_match_str}"
+            return f"Invalid {item_type} '{item}'.{closest_match_str}"
 
         return None
 
diff --git a/src/scribe_data/cli/interactive.py b/src/scribe_data/cli/interactive.py
index 4e95f34b0..6ba7a1f55 100644
--- a/src/scribe_data/cli/interactive.py
+++ b/src/scribe_data/cli/interactive.py
@@ -35,7 +35,7 @@
 from scribe_data.cli.cli_utils import data_type_metadata, language_metadata
 from scribe_data.cli.get import get_data
 from scribe_data.cli.version import get_version_message
-from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
+from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR, list_all_languages
 
 # MARK: Config Setup
 
@@ -51,9 +51,7 @@
 
 class ScribeDataConfig:
     def __init__(self):
-        self.languages = [
-            lang["language"].capitalize() for lang in language_metadata["languages"]
-        ]
+        self.languages = list_all_languages(language_metadata)
         self.data_types = list(data_type_metadata.keys())
         self.selected_languages: List[str] = []
         self.selected_data_types: List[str] = []
diff --git a/src/scribe_data/cli/list.py b/src/scribe_data/cli/list.py
index 5d16b4413..762d3bfca 100644
--- a/src/scribe_data/cli/list.py
+++ b/src/scribe_data/cli/list.py
@@ -21,10 +21,16 @@
 """
 
 from scribe_data.cli.cli_utils import (
+    LANGUAGE_DATA_EXTRACTION_DIR,
     correct_data_type,
-    language_metadata,
     language_map,
-    LANGUAGE_DATA_EXTRACTION_DIR,
+    language_metadata,
+)
+from scribe_data.utils import (
+    format_sublanguage_name,
+    get_language_iso,
+    get_language_qid,
+    list_all_languages,
 )
 
 
@@ -32,12 +38,11 @@ def list_languages() -> None:
     """
     Generates a table of languages, their ISO-2 codes and their Wikidata QIDs.
     """
-    languages = list(language_metadata["languages"])
-    languages.sort(key=lambda x: x["language"])
+    languages = list_all_languages(language_metadata)
 
-    language_col_width = max(len(lang["language"]) for lang in languages) + 2
-    iso_col_width = max(len(lang["iso"]) for lang in languages) + 2
-    qid_col_width = max(len(lang["qid"]) for lang in languages) + 2
+    language_col_width = max(len(lang) for lang in languages) + 2
+    iso_col_width = max(len(get_language_iso(lang)) for lang in languages) + 2
+    qid_col_width = max(len(get_language_qid(lang)) for lang in languages) + 2
 
     table_line_length = language_col_width + iso_col_width + qid_col_width
 
@@ -49,7 +54,7 @@ def list_languages() -> None:
 
     for lang in languages:
         print(
-            f"{lang['language'].capitalize():<{language_col_width}} {lang['iso']:<{iso_col_width}} {lang['qid']:<{qid_col_width}}"
+            f"{lang.capitalize():<{language_col_width}} {get_language_iso(lang):<{iso_col_width}} {get_language_qid(lang):<{qid_col_width}}"
         )
 
     print("-" * table_line_length)
@@ -65,7 +70,9 @@ def list_data_types(language: str = None) -> None:
         language : str
             The language to potentially list data types for.
     """
+    languages = list_all_languages(language_metadata)
     if language:
+        language = format_sublanguage_name(language, language_metadata)
         language_data = language_map.get(language.lower())
         language_capitalized = language.capitalize()
         language_dir = LANGUAGE_DATA_EXTRACTION_DIR / language_capitalized
@@ -83,8 +90,11 @@ def list_data_types(language: str = None) -> None:
 
     else:
         data_types = set()
-        for lang in language_metadata["languages"]:
-            language_dir = LANGUAGE_DATA_EXTRACTION_DIR / lang["language"].capitalize()
+        for lang in languages:
+            language_dir = (
+                LANGUAGE_DATA_EXTRACTION_DIR
+                / format_sublanguage_name(lang, language_metadata).capitalize()
+            )
             if language_dir.is_dir():
                 data_types.update(f.name for f in language_dir.iterdir() if f.is_dir())
 
@@ -122,13 +132,15 @@ def list_languages_for_data_type(data_type: str) -> None:
             The data type to check for.
     """
     data_type = correct_data_type(data_type=data_type)
+    all_languages = list_all_languages(language_metadata)
     available_languages = []
-    for lang in language_metadata["languages"]:
-        language_dir = LANGUAGE_DATA_EXTRACTION_DIR / lang["language"].capitalize()
+    for lang in all_languages:
+        lang = format_sublanguage_name(lang, language_metadata)
+        language_dir = LANGUAGE_DATA_EXTRACTION_DIR / lang
         if language_dir.is_dir():
             dt_path = language_dir / data_type
             if dt_path.exists():
-                available_languages.append(lang["language"])
+                available_languages.append(lang)
 
     available_languages.sort()
     table_header = f"Available languages: {data_type}"
@@ -141,7 +153,7 @@ def list_languages_for_data_type(data_type: str) -> None:
     print("-" * table_line_length)
 
     for lang in available_languages:
-        print(f"{lang.capitalize()}")
+        print(f"{lang}")
 
     print("-" * table_line_length)
     print()
diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py
index fe1382707..885d9b3e9 100644
--- a/src/scribe_data/cli/total.py
+++ b/src/scribe_data/cli/total.py
@@ -29,6 +29,7 @@
     language_metadata,
     language_to_qid,
 )
+from scribe_data.utils import format_sublanguage_name, list_all_languages
 from scribe_data.wikidata.wikidata_utils import sparql
 
 
@@ -71,12 +72,13 @@ def get_datatype_list(language):
         data_types : list[str] or None
             A list of the corresponding data types.
     """
-    languages = list(language_metadata["languages"])
-    language_list = [lang["language"] for lang in languages]
+    languages = list_all_languages(language_metadata)
 
-    if language.lower() in language_list:
+    if language.lower() in languages:
         language_data = language_map.get(language.lower())
-        language_capitalized = language.capitalize()
+        language_capitalized = format_sublanguage_name(
+            language, language_metadata
+        ).capitalize()
         language_dir = LANGUAGE_DATA_EXTRACTION_DIR / language_capitalized
 
         if not language_data:
@@ -131,11 +133,9 @@ def print_total_lexemes(language: str = None):
     print("=" * 64)
 
     if language is None:  # all languages
-        languages = list(language_metadata["languages"])
-        languages.sort(key=lambda x: x["language"])
-        language_list = [lang["language"] for lang in languages]
+        languages = list_all_languages(language_metadata)
 
-        for lang in language_list:
+        for lang in languages:
             data_types = get_datatype_list(lang)
 
             first_row = True
diff --git a/src/scribe_data/load/data_to_sqlite.py b/src/scribe_data/load/data_to_sqlite.py
index 79d19e39b..aec1f9560 100644
--- a/src/scribe_data/load/data_to_sqlite.py
+++ b/src/scribe_data/load/data_to_sqlite.py
@@ -35,6 +35,7 @@
     DEFAULT_SQLITE_EXPORT_DIR,
     get_language_iso,
 )
+from scribe_data.utils import list_all_languages
 
 
 def data_to_sqlite(
@@ -52,8 +53,7 @@ def data_to_sqlite(
         current_language_data = json.load(f_languages)
         data_types = json.load(f_data_types).keys()
 
-    current_languages = [d["language"] for d in current_language_data["languages"]]
-
+    current_languages = list_all_languages(current_language_data)
     if not languages:
         languages = current_languages
 
diff --git a/src/scribe_data/resources/language_metadata.json b/src/scribe_data/resources/language_metadata.json
index e6d7de8a6..7ab2145bf 100755
--- a/src/scribe_data/resources/language_metadata.json
+++ b/src/scribe_data/resources/language_metadata.json
@@ -1,70 +1,182 @@
 {
-  "used by": "Scribe-Data/src/scribe_data/utils.py",
-  "description": {
-    "entry": {
-      "language": "the supported language. All lowercase",
-      "iso": "the ISO 639 code for 'language'. See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes. All lowercase",
-      "qid": "the unique identifier of 'language' on Wikidata. 'Q' followed by one or more digits. See https://www.wikidata.org/wiki/Q43649390",
-      "remove-words": "words that should not be included as autosuggestions for the given language.",
-      "ignore-words": "words that should be removed from the autosuggestion generation process."
+  "arabic": {
+    "iso": "ar",
+    "qid": "Q13955"
+  },
+  "basque": {
+    "iso": "eu",
+    "qid": "Q8752"
+  },
+  "bengali": {
+    "iso": "bn",
+    "qid": "Q9610"
+  },
+  "chinese": {
+    "sub_languages": {
+      "mandarin": {
+        "iso": "zh",
+        "qid": "Q727694"
+      }
+    }
+  },
+  "czech": {
+    "iso": "cs",
+    "qid": "Q9056"
+  },
+  "danish": {
+    "iso": "da",
+    "qid": "Q9035"
+  },
+  "english": {
+    "iso": "en",
+    "qid": "Q1860"
+  },
+  "esperanto": {
+    "iso": "eo",
+    "qid": "Q143"
+  },
+  "estonian": {
+    "iso": "et",
+    "qid": "Q9072"
+  },
+  "finnish": {
+    "iso": "fi",
+    "qid": "Q1412"
+  },
+  "french": {
+    "iso": "fr",
+    "qid": "Q150"
+  },
+  "german": {
+    "iso": "de",
+    "qid": "Q188"
+  },
+  "greek": {
+    "iso": "el",
+    "qid": "Q36510"
+  },
+  "hausa": {
+    "iso": "ha",
+    "qid": "Q56475"
+  },
+  "hebrew": {
+    "iso": "he",
+    "qid": "Q9288"
+  },
+  "hindustani": {
+    "sub_languages": {
+      "hindi": {
+        "iso": "hi",
+        "qid": "Q11051"
+      },
+      "urdu": {
+        "iso": "ur",
+        "qid": "Q11051"
+      }
+    }
+  },
+  "indonesian": {
+    "iso": "id",
+    "qid": "Q9240"
+  },
+  "italian": {
+    "iso": "it",
+    "qid": "Q652"
+  },
+  "japanese": {
+    "iso": "ja",
+    "qid": "Q5287"
+  },
+  "kurmanji": {
+    "iso": "kmr",
+    "qid": "Q36163"
+  },
+  "latin": {
+    "iso": "la",
+    "qid": "Q397"
+  },
+  "malay": {
+    "iso": "ms",
+    "qid": "Q9237"
+  },
+  "malayalam": {
+    "iso": "ml",
+    "qid": "Q36236"
+  },
+  "norwegian": {
+    "sub_languages": {
+      "bokmål": {
+        "iso": "nb",
+        "qid": "Q25167"
+      },
+      "nynorsk": {
+        "iso": "nn",
+        "qid": "Q25164"
+      }
+    }
+  },
+  "pidgin": {
+    "sub_languages": {
+      "nigerian": {
+        "iso": "pi",
+        "qid": "Q33655"
+      }
     }
   },
-  "languages": [
-    {
-      "language": "english",
-      "iso": "en",
-      "qid": "Q1860",
-      "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": []
-    },
-    {
-      "language": "french",
-      "iso": "fr",
-      "qid": "Q150",
-      "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": ["XXe"]
-    },
-    {
-      "language": "german",
-      "iso": "de",
-      "qid": "Q188",
-      "remove-words": ["of", "the", "The", "and", "NeinJa", "et", "redirect"],
-      "ignore-words": ["Gemeinde", "Familienname"]
-    },
-    {
-      "language": "italian",
-      "iso": "it",
-      "qid": "Q652",
-      "remove-words": ["of", "the", "The", "and", "text", "from"],
-      "ignore-words": ["The", "ATP"]
-    },
-    {
-      "language": "portuguese",
-      "iso": "pt",
-      "qid": "Q5146",
-      "remove-words": ["of", "the", "The", "and", "jbutadptflora"],
-      "ignore-words": []
-    },
-    {
-      "language": "russian",
-      "iso": "ru",
-      "qid": "Q7737",
-      "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": []
-    },
-    {
-      "language": "spanish",
-      "iso": "es",
-      "qid": "Q1321",
-      "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": []
-    },
-    {
-      "language": "swedish",
-      "iso": "sv",
-      "qid": "Q9027",
-      "remove-words": ["of", "the", "The", "and", "Checklist", "Catalogue"],
-      "ignore-words": ["databasdump"]
+  "polish": {
+    "iso": "pl",
+    "qid": "Q809"
+  },
+  "portuguese": {
+    "iso": "pt",
+    "qid": "Q5146"
+  },
+  "punjabi": {
+    "sub_languages": {
+      "gurmukhi": {
+        "iso": "pa",
+        "qid": "Q58635"
+      },
+      "shahmukhi": {
+        "iso": "pnb",
+        "qid": "Q58635"
+      }
     }
-  ]
+  },
+  "russian": {
+    "iso": "ru",
+    "qid": "Q7737"
+  },
+  "slovak": {
+    "iso": "sk",
+    "qid": "Q9058"
+  },
+  "spanish": {
+    "iso": "es",
+    "qid": "Q1321"
+  },
+  "swahili": {
+    "iso": "sw",
+    "qid": "Q7838"
+  },
+  "swedish": {
+    "iso": "sv",
+    "qid": "Q9027"
+  },
+  "tajik": {
+    "iso": "tg",
+    "qid": "Q9260"
+  },
+  "tamil": {
+    "iso": "ta",
+    "qid": "Q5885"
+  },
+  "ukrainian": {
+    "iso": "ua",
+    "qid": "Q8798"
+  },
+  "yoruba": {
+    "iso": "yo",
+    "qid": "Q34311"
+  }
 }
diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py
index 9d94485ab..3c2007640 100644
--- a/src/scribe_data/utils.py
+++ b/src/scribe_data/utils.py
@@ -26,9 +26,6 @@
 from pathlib import Path
 from typing import Any, Optional
 
-from iso639 import Lang
-from iso639.exceptions import DeprecatedLanguageValue, InvalidLanguageValue
-
 PROJECT_ROOT = "Scribe-Data"
 DEFAULT_JSON_EXPORT_DIR = "scribe_data_json_export"
 DEFAULT_CSV_EXPORT_DIR = "scribe_data_csv_export"
@@ -36,7 +33,7 @@
 DEFAULT_SQLITE_EXPORT_DIR = "scribe_data_sqlite_export"
 
 
-def _load_json(package_path: str, file_name: str, root: str) -> Any:
+def _load_json(package_path: str, file_name: str) -> Any:
     """
     Loads a JSON resource from a package into a python entity.
 
@@ -48,52 +45,37 @@ def _load_json(package_path: str, file_name: str, root: str) -> Any:
         file_name : str
             The name of the file (resource) that contains the JSON data.
 
-        root : str
-            The root node of the JSON document.
-
     Returns
     -------
-        A python entity starting at 'root'.
+        A python entity representing the JSON content.
     """
-
     with resources.files(package_path).joinpath(file_name).open(
         encoding="utf-8"
     ) as in_stream:
-        contents = json.load(in_stream)
-        return contents[root]
+        return json.load(in_stream)
 
 
 _languages = _load_json(
-    package_path="scribe_data.resources",
-    file_name="language_metadata.json",
-    root="languages",
+    package_path="scribe_data.resources", file_name="language_metadata.json"
 )
 
 
 def _find(source_key: str, source_value: str, target_key: str, error_msg: str) -> Any:
     """
-    Each 'language', (english, german,..., etc) is a dictionary of key/value pairs:
+    Finds a target value based on a source key/value pair from the language metadata.
 
-        entry = {
-            "language": "english",
-            "iso": "en",
-            "qid": "Q1860",
-            "remove-words": [...],
-            "ignore-words": [...]
-        }
-
-    Given a key/value pair, the 'source' and the 'target' key get the 'target' value.
+    This version handles both regular languages and those with sub-languages (e.g., Norwegian).
 
     Parameters
     ----------
         source_value : str
-            The source value to find equivalents for (e.g. 'english').
+            The source value to find equivalents for (e.g., 'english', 'nynorsk').
 
         source_key : str
-            The source key to reference (e.g. 'language').
+            The source key to reference (e.g., 'language').
 
         target_key : str
-            The key to target (e.g. 'iso').
+            The key to target (e.g., 'qid').
 
         error_msg : str
             The message displayed when a value cannot be found.
@@ -104,28 +86,33 @@ def _find(source_key: str, source_value: str, target_key: str, error_msg: str) -
 
     Raises
     ------
-        ValueError : when a source_value is not supported.
-    """
-    norm_source_value = source_value.lower()
-
-    if target_value := [
-        entry[target_key]
-        for entry in _languages
-        if entry[source_key] == norm_source_value
-    ]:
-        assert len(target_value) == 1, f"More than one entry for '{norm_source_value}'"
-        return target_value[0]
-
+        ValueError : when a source_value is not supported or the language only has sub-languages.
+    """
+    # Check if we're searching by language name.
+    if source_key == "language":
+        norm_source_value = source_value.lower()
+
+        # First, check the main language entries (e.g., mandarin, french, etc.).
+        for language, entry in _languages.items():
+            # If the language name matches the top-level key, return the target value.
+            if language.lower() == norm_source_value:
+                if "sub_languages" in entry:
+                    sub_languages = ", ".join(entry["sub_languages"].keys())
+                    raise ValueError(
+                        f"'{language}' has sub-languages, but is not queryable directly. Available sub-languages: {sub_languages}"
+                    )
+                return entry.get(target_key)
+
+            # If there are sub-languages, check them too.
+            if "sub_languages" in entry:
+                for sub_language, sub_entry in entry["sub_languages"].items():
+                    if sub_language.lower() == norm_source_value:
+                        return sub_entry.get(target_key)
+
+    # If no match was found, raise an error.
     raise ValueError(error_msg)
 
 
-def get_scribe_languages() -> list[str]:
-    """
-    Returns the list of currently implemented Scribe languages.
-    """
-    return sorted(entry["language"].capitalize() for entry in _languages)
-
-
 def get_language_qid(language: str) -> str:
     """
     Returns the QID of the given language.
@@ -162,13 +149,13 @@ def get_language_iso(language: str) -> str:
         str
             The ISO code for the language.
     """
-    try:
-        iso_code = str(Lang(language.capitalize()).pt1)
-    except InvalidLanguageValue:
-        raise ValueError(
-            f"{language.capitalize()} is currently not a supported language for ISO conversion."
-        ) from None
-    return iso_code
+
+    return _find(
+        "language",
+        language,
+        "iso",
+        f"{language.upper()} is currently not a supported language for ISO conversion.",
+    )
 
 
 def get_language_from_iso(iso: str) -> str:
@@ -185,57 +172,20 @@ def get_language_from_iso(iso: str) -> str:
         str
             The name for the language which has an ISO value of iso.
     """
-    try:
-        language_name = str(Lang(iso.lower()).name)
-    except DeprecatedLanguageValue as e:
-        raise ValueError(
-            f"{iso.upper()} is currently not a supported ISO language."
-        ) from e
-    return language_name
-
-
-def get_language_words_to_remove(language: str) -> list[str]:
-    """
-    Returns the words that should be removed during the data cleaning process for the given language.
-
-    Parameters
-    ----------
-        language : str
-            The language the words should be returned for.
-
-    Returns
-    -------
-        list[str]
-            The words that that be removed during the data cleaning process for the given language.
-    """
-    return _find(
-        "language",
-        language,
-        "remove-words",
-        f"{language.capitalize()} is currently not a supported language.",
-    )
-
-
-def get_language_words_to_ignore(language: str) -> list[str]:
-    """
-    Returns the words that should not be included as autosuggestions for the given language.
+    # Iterate over the languages and their properties
+    for language, properties in _languages.items():
+        # Check if the current language's ISO matches the provided ISO
+        if properties.get("iso") == iso:
+            return language.capitalize()
 
-    Parameters
-    ----------
-        language : str
-            The language the words should be returned for.
+        # If there are sub-languages, check those as well
+        if "sub_languages" in properties:
+            for sub_lang, sub_properties in properties["sub_languages"].items():
+                if sub_properties.get("iso") == iso:
+                    return sub_lang.capitalize()
 
-    Returns
-    -------
-        list[str]
-            The words that should not be included as autosuggestions for the given language.
-    """
-    return _find(
-        "language",
-        language,
-        "ignore-words",
-        f"{language.capitalize()} is currently not a supported language.",
-    )
+    # If no match is found, raise a ValueError
+    raise ValueError(f"{iso.upper()} is currently not a supported ISO language.")
 
 
 def load_queried_data(
@@ -459,20 +409,25 @@ def map_genders(wikidata_gender: str) -> str:
     ----------
         wikidata_gender : str
             The gender of the noun that was queried from WikiData.
+
+    Returns
+    -------
+        The gender value corrected in case the Wikidata ID was queried.
     """
     gender_map = {
-        "masculine": "M",
-        "Q499327": "M",
-        "feminine": "F",
-        "Q1775415": "F",
-        "common gender": "C",
-        "Q1305037": "C",
-        "neuter": "N",
-        "Q1775461": "N",
+        "masculine": "masculine",
+        "Q499327": "masculine",
+        "feminine": "feminine",
+        "Q1775415": "feminine",
+        "common": "common",
+        "common gender": "common",
+        "Q1305037": "common",
+        "neuter": "neuter",
+        "Q1775461": "neuter",
     }
 
     return gender_map.get(
-        wikidata_gender, ""
+        wikidata_gender.lower(), ""
     )  # nouns could have a gender that is not a valid attribute
 
 
@@ -484,20 +439,24 @@ def map_cases(wikidata_case: str) -> str:
     ----------
         wikidata_case : str
             The case of the noun that was queried from WikiData.
+
+    Returns
+    -------
+        The case value corrected in case the Wikidata ID was queried.
     """
     case_map = {
-        "accusative": "Acc",
-        "Q146078": "Acc",
-        "dative": "Dat",
-        "Q145599": "Dat",
-        "genitive": "Gen",
-        "Q146233": "Gen",
-        "instrumental": "Ins",
-        "Q192997": "Ins",
-        "prepositional": "Pre",
-        "Q2114906": "Pre",
-        "locative": "Loc",
-        "Q202142": "Loc",
+        "accusative": "accusative",
+        "Q146078": "accusative",
+        "dative": "dative",
+        "Q145599": "dative",
+        "genitive": "genitive",
+        "Q146233": "genitive",
+        "instrumental": "instrumental",
+        "Q192997": "instrumental",
+        "prepositional": "prepositional",
+        "Q2114906": "prepositional",
+        "locative": "locative",
+        "Q202142": "locative",
     }
     case = wikidata_case.split(" case")[0]
     return case_map.get(case, "")
@@ -519,3 +478,71 @@ def order_annotations(annotation: str) -> str:
     annotation_split = sorted(list(set(filter(None, annotation.split("/")))))
 
     return "/".join(annotation_split)
+
+
+def format_sublanguage_name(lang, language_metadata=_languages):
+    """
+    Formats the name of a sub-language by appending its main language
+    in the format 'MAIN_LANG/SUB_LANG'. If the language is not a sub-language,
+    the original language name is returned as-is.
+
+    Parameters
+    ----------
+        lang : str
+            The name of the language or sub-language to format.
+
+        language_metadata : dict
+            The metadata containing information about main languages and their sub-languages.
+
+    Returns
+    -------
+        str
+            The formatted language name if it's a sub-language (e.g., 'Norwegian/Nynorsk').
+            Otherwise the original name.
+
+    Raises
+    ------
+        ValueError: If the provided language or sub-language is not found.
+
+    Example
+    -------
+        > format_sublanguage_name("nynorsk", language_metadata)
+        'Norwegian/Nynorsk'
+
+        > format_sublanguage_name("english", language_metadata)
+        'English'
+    """
+    for main_lang, lang_data in language_metadata.items():
+        # If it's not a sub-language, return the original name.
+        if main_lang == lang.lower():
+            return lang.capitalize()
+
+        # Check if the main language has sub-languages.
+        if "sub_languages" in lang_data:
+            # Check if the provided language is a sub-language.
+            for sub_lang in lang_data["sub_languages"]:
+                if lang.lower() == sub_lang.lower():
+                    # Return the formatted name MAIN_LANG/SUB_LANG.
+                    return f"{main_lang.capitalize()}/{sub_lang.capitalize()}"
+
+    # Raise ValueError if no match is found.
+    raise ValueError(f"{lang.upper()} is not a valid language or sub-language.")
+
+
+def list_all_languages(language_metadata=_languages):
+    """
+    Returns a sorted list of all languages from the provided metadata dictionary, including sub-languages.
+    """
+    current_languages = []
+
+    # Iterate through the language metadata.
+    for lang_key, lang_data in language_metadata.items():
+        # Check if there are sub-languages.
+        if "sub_languages" in lang_data:
+            # Add the sub-languages to current_languages.
+            current_languages.extend(lang_data["sub_languages"].keys())
+        else:
+            # If no sub-languages, add the main language.
+            current_languages.append(lang_key)
+
+    return sorted(current_languages)
diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py
index 4da51b4f6..a9dba0b9f 100644
--- a/src/scribe_data/wikidata/query_data.py
+++ b/src/scribe_data/wikidata/query_data.py
@@ -33,6 +33,7 @@
 from scribe_data.cli.cli_utils import (
     language_metadata,
 )
+from scribe_data.utils import format_sublanguage_name, list_all_languages
 from scribe_data.wikidata.wikidata_utils import sparql
 
 
@@ -103,7 +104,7 @@ def query_data(
         SCRIBE_DATA_SRC_PATH / "language_data_extraction"
     )
     languages = [lang.capitalize() for lang in languages]
-    current_languages = list(language_metadata["languages"])
+    current_languages = list_all_languages(language_metadata)
     current_data_type = ["nouns", "verbs", "prepositions"]
 
     # Assign current_languages and current_data_type if no arguments have been passed.
@@ -147,7 +148,7 @@ def query_data(
         disable=interactive,
         colour="MAGENTA",
     ):
-        lang = q.parent.parent.name
+        lang = format_sublanguage_name(q.parent.parent.name, language_metadata)
         target_type = q.parent.name
 
         updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir
diff --git a/tests/cli/test_list.py b/tests/cli/test_list.py
index 03172e077..6fb4bf791 100644
--- a/tests/cli/test_list.py
+++ b/tests/cli/test_list.py
@@ -39,17 +39,49 @@ def test_list_languages(self, mock_print):
         list_languages()
         expected_calls = [
             call(),
-            call("Language     ISO  QID    "),
-            call("-----------------------"),
-            call("English      en   Q1860  "),
-            call("French       fr   Q150   "),
-            call("German       de   Q188   "),
-            call("Italian      it   Q652   "),
-            call("Portuguese   pt   Q5146  "),
-            call("Russian      ru   Q7737  "),
-            call("Spanish      es   Q1321  "),
-            call("Swedish      sv   Q9027  "),
-            call("-----------------------"),
+            call("Language     ISO   QID      "),
+            call("--------------------------"),
+            call("Arabic       ar    Q13955   "),
+            call("Basque       eu    Q8752    "),
+            call("Bengali      bn    Q9610    "),
+            call("Bokmål       nb    Q25167   "),
+            call("Czech        cs    Q9056    "),
+            call("Danish       da    Q9035    "),
+            call("English      en    Q1860    "),
+            call("Esperanto    eo    Q143     "),
+            call("Estonian     et    Q9072    "),
+            call("Finnish      fi    Q1412    "),
+            call("French       fr    Q150     "),
+            call("German       de    Q188     "),
+            call("Greek        el    Q36510   "),
+            call("Gurmukhi     pa    Q58635   "),
+            call("Hausa        ha    Q56475   "),
+            call("Hebrew       he    Q9288    "),
+            call("Hindi        hi    Q11051   "),
+            call("Indonesian   id    Q9240    "),
+            call("Italian      it    Q652     "),
+            call("Japanese     ja    Q5287    "),
+            call("Kurmanji     kmr   Q36163   "),
+            call("Latin        la    Q397     "),
+            call("Malay        ms    Q9237    "),
+            call("Malayalam    ml    Q36236   "),
+            call("Mandarin     zh    Q727694  "),
+            call("Nigerian     pi    Q33655   "),
+            call("Nynorsk      nn    Q25164   "),
+            call("Polish       pl    Q809     "),
+            call("Portuguese   pt    Q5146    "),
+            call("Russian      ru    Q7737    "),
+            call("Shahmukhi    pnb   Q58635   "),
+            call("Slovak       sk    Q9058    "),
+            call("Spanish      es    Q1321    "),
+            call("Swahili      sw    Q7838    "),
+            call("Swedish      sv    Q9027    "),
+            call("Tajik        tg    Q9260    "),
+            call("Tamil        ta    Q5885    "),
+            call("Ukrainian    ua    Q8798    "),
+            call("Urdu         ur    Q11051   "),
+            call("Yoruba       yo    Q34311   "),
+            call("--------------------------"),
             call(),
         ]
         mock_print.assert_has_calls(expected_calls)
@@ -66,6 +98,8 @@ def test_list_data_types_all_languages(self, mock_print):
             call("adverbs"),
             call("emoji-keywords"),
             call("nouns"),
+            call("personal-pronouns"),
+            call("postpositions"),
             call("prepositions"),
             call("proper-nouns"),
             call("verbs"),
@@ -149,14 +183,46 @@ def test_list_languages_for_data_type_valid(self, mock_print):
             call(),
             call("Available languages: nouns"),
             call("--------------------------"),
+            call("Arabic"),
+            call("Basque"),
+            call("Bengali"),
+            call("Chinese/Mandarin"),
+            call("Czech"),
+            call("Danish"),
             call("English"),
+            call("Esperanto"),
+            call("Estonian"),
+            call("Finnish"),
             call("French"),
             call("German"),
+            call("Greek"),
+            call("Hausa"),
+            call("Hebrew"),
+            call("Hindustani/Hindi"),
+            call("Hindustani/Urdu"),
+            call("Indonesian"),
             call("Italian"),
+            call("Japanese"),
+            call("Kurmanji"),
+            call("Latin"),
+            call("Malay"),
+            call("Malayalam"),
+            call("Norwegian/Bokmål"),
+            call("Norwegian/Nynorsk"),
+            call("Pidgin/Nigerian"),
+            call("Polish"),
             call("Portuguese"),
+            call("Punjabi/Gurmukhi"),
+            call("Punjabi/Shahmukhi"),
             call("Russian"),
+            call("Slovak"),
             call("Spanish"),
+            call("Swahili"),
             call("Swedish"),
+            call("Tajik"),
+            call("Tamil"),
+            call("Ukrainian"),
+            call("Yoruba"),
             call("--------------------------"),
             call(),
         ]
diff --git a/tests/cli/test_utils.py b/tests/cli/test_utils.py
index a827666a2..333c3b7d7 100644
--- a/tests/cli/test_utils.py
+++ b/tests/cli/test_utils.py
@@ -187,7 +187,7 @@ def test_validate_language_and_data_type_invalid_language(self, mock_get_qid):
                 language=language_qid, data_type=data_type_qid
             )
 
-        self.assertEqual(str(context.exception), "Invalid language InvalidLanguage.")
+        self.assertEqual(str(context.exception), "Invalid language 'InvalidLanguage'.")
 
     @patch("scribe_data.cli.total.get_qid_by_input")
     def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
@@ -201,7 +201,7 @@ def test_validate_language_and_data_type_invalid_data_type(self, mock_get_qid):
                 language=language_qid, data_type=data_type_qid
             )
 
-        self.assertEqual(str(context.exception), "Invalid data-type InvalidDataType.")
+        self.assertEqual(str(context.exception), "Invalid data-type 'InvalidDataType'.")
 
     @patch("scribe_data.cli.total.get_qid_by_input")
     def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
@@ -217,7 +217,7 @@ def test_validate_language_and_data_type_both_invalid(self, mock_get_qid):
 
         self.assertEqual(
             str(context.exception),
-            "Invalid language InvalidLanguage.\nInvalid data-type InvalidDataType.",
+            "Invalid language 'InvalidLanguage'.\nInvalid data-type 'InvalidDataType'.",
         )
 
     def test_validate_language_and_data_type_with_list(self):
@@ -248,5 +248,5 @@ def test_validate_language_and_data_type_mixed_validity_in_lists(self):
         data_types = ["nouns", "InvalidDataType"]
         with self.assertRaises(ValueError) as context:
             validate_language_and_data_type(languages, data_types)
-        self.assertIn("Invalid language InvalidLanguage", str(context.exception))
-        self.assertIn("Invalid data-type InvalidDataType", str(context.exception))
+        self.assertIn("Invalid language 'InvalidLanguage'", str(context.exception))
+        self.assertIn("Invalid data-type 'InvalidDataType'", str(context.exception))
diff --git a/tests/load/test_update_utils.py b/tests/load/test_update_utils.py
index 638ee09dd..43eaa2038 100644
--- a/tests/load/test_update_utils.py
+++ b/tests/load/test_update_utils.py
@@ -21,7 +21,6 @@
 """
 
 import sys
-import unittest
 from pathlib import Path
 
 import pytest
@@ -31,25 +30,6 @@
 from scribe_data import utils
 
 
-def test_get_scribe_languages():
-    test_case = unittest.TestCase()
-
-    # test for content, not order
-    test_case.assertCountEqual(
-        utils.get_scribe_languages(),
-        [
-            "English",
-            "French",
-            "German",
-            "Italian",
-            "Portuguese",
-            "Russian",
-            "Spanish",
-            "Swedish",
-        ],
-    )
-
-
 @pytest.mark.parametrize(
     "language, qid_code",
     [
@@ -61,6 +41,7 @@ def test_get_scribe_languages():
         ("russian", "Q7737"),
         ("spanish", "Q1321"),
         ("swedish", "Q9027"),
+        ("bokmål", "Q25167"),
     ],
 )
 def test_get_language_qid_positive(language, qid_code):
@@ -88,6 +69,7 @@ def test_get_language_qid_negative():
         ("russian", "ru"),
         ("spanish", "es"),
         ("SwedisH", "sv"),
+        ("bokmål", "nb"),
     ],
 )
 def test_get_language_iso_positive(language, iso_code):
@@ -100,7 +82,7 @@ def test_get_language_iso_negative():
 
     assert (
         str(excp.value)
-        == "Gibberish is currently not a supported language for ISO conversion."
+        == "GIBBERISH is currently not a supported language for ISO conversion."
     )
 
 
@@ -115,6 +97,7 @@ def test_get_language_iso_negative():
         ("ru", "Russian"),
         ("es", "Spanish"),
         ("sv", "Swedish"),
+        ("nb", "Bokmål"),
     ],
 )
 def test_get_language_from_iso_positive(iso_code, language):
@@ -129,89 +112,69 @@ def test_get_language_from_iso_negative():
 
 
 @pytest.mark.parametrize(
-    "language, remove_words",
-    [
-        (
-            "english",
-            [
-                "of",
-                "the",
-                "The",
-                "and",
-            ],
-        ),
-        (
-            "french",
-            [
-                "of",
-                "the",
-                "The",
-                "and",
-            ],
-        ),
-        ("german", ["of", "the", "The", "and", "NeinJa", "et", "redirect"]),
-        ("italian", ["of", "the", "The", "and", "text", "from"]),
-        ("portuguese", ["of", "the", "The", "and", "jbutadptflora"]),
-        (
-            "russian",
-            [
-                "of",
-                "the",
-                "The",
-                "and",
-            ],
-        ),
-        ("spanish", ["of", "the", "The", "and"]),
-        ("swedish", ["of", "the", "The", "and", "Checklist", "Catalogue"]),
-    ],
-)
-def test_get_language_words_to_remove(language, remove_words):
-    test_case = unittest.TestCase()
-
-    # ignore order, only content matters
-    test_case.assertCountEqual(
-        utils.get_language_words_to_remove(language), remove_words
-    )
-
-
-def test_get_language_words_to_remove_negative():
-    with pytest.raises(ValueError) as excp:
-        _ = utils.get_language_words_to_remove("python")
-
-    assert str(excp.value) == "Python is currently not a supported language."
-
-
-@pytest.mark.parametrize(
-    "language, ignore_words",
+    "lang, expected_output",
     [
-        (
-            "french",
-            [
-                "XXe",
-            ],
-        ),
-        ("german", ["Gemeinde", "Familienname"]),
-        ("italian", ["The", "ATP"]),
-        ("portuguese", []),
-        ("russian", []),
-        ("spanish", []),
-        ("swedish", ["databasdump"]),
+        ("nynorsk", "Norwegian/Nynorsk"),
+        ("bokmål", "Norwegian/Bokmål"),
+        ("english", "English"),
     ],
 )
-def test_get_language_words_to_ignore(language, ignore_words):
-    test_case = unittest.TestCase()
-
-    # ignore order, only content matters
-    test_case.assertCountEqual(
-        utils.get_language_words_to_ignore(language), ignore_words
-    )
+def test_format_sublanguage_name_positive(lang, expected_output):
+    assert utils.format_sublanguage_name(lang) == expected_output
 
 
-def test_get_language_words_to_ignore_negative():
+def test_format_sublanguage_name_negative():
     with pytest.raises(ValueError) as excp:
-        _ = utils.get_language_words_to_ignore("JAVA")
-
-    assert str(excp.value) == "Java is currently not a supported language."
+        _ = utils.format_sublanguage_name("soccer")
+
+    assert str(excp.value) == "SOCCER is not a valid language or sub-language."
+
+
+def test_list_all_languages():
+    expected_languages = [
+        "arabic",
+        "basque",
+        "bengali",
+        "bokmål",
+        "czech",
+        "danish",
+        "english",
+        "esperanto",
+        "estonian",
+        "finnish",
+        "french",
+        "german",
+        "greek",
+        "gurmukhi",
+        "hausa",
+        "hebrew",
+        "hindi",
+        "indonesian",
+        "italian",
+        "japanese",
+        "kurmanji",
+        "latin",
+        "malay",
+        "malayalam",
+        "mandarin",
+        "nigerian",
+        "nynorsk",
+        "polish",
+        "portuguese",
+        "russian",
+        "shahmukhi",
+        "slovak",
+        "spanish",
+        "swahili",
+        "swedish",
+        "tajik",
+        "tamil",
+        "ukrainian",
+        "urdu",
+        "yoruba",
+    ]
+
+    assert utils.list_all_languages() == expected_languages
 
 
 def test_get_ios_data_path():