From 49141ce35d8832bb7fd71f0707285f68fbfc3941 Mon Sep 17 00:00:00 2001 From: axif Date: Thu, 31 Oct 2024 16:36:50 +0600 Subject: [PATCH] feat interactive v2.0 --- docs/source/scribe_data/cli.rst | 72 ++++++++++-- src/scribe_data/cli/interactive.py | 173 +++++++++++++++++++++++----- src/scribe_data/cli/total.py | 113 ++++++++++++------ tests/cli/test_interactive.py | 178 +++++++++++++++++++++++++++++ tests/cli/test_total.py | 106 +++++++++++++++++ 5 files changed, 566 insertions(+), 76 deletions(-) create mode 100644 tests/cli/test_interactive.py diff --git a/docs/source/scribe_data/cli.rst b/docs/source/scribe_data/cli.rst index c99eaed29..db67abb50 100644 --- a/docs/source/scribe_data/cli.rst +++ b/docs/source/scribe_data/cli.rst @@ -143,14 +143,31 @@ Options: - ``-ot, --output-type {json,csv,tsv}``: The output file type. - ``-ope, --outputs-per-entry OUTPUTS_PER_ENTRY``: How many outputs should be generated per data entry. - ``-o, --overwrite``: Whether to overwrite existing files (default: False). -- ``-a, --all ALL``: Get all languages and data types. +- ``-a, --all``: Get all languages and data types. Can be combined with `-dt` to get all languages for a specific data type, or with `-lang` to get all data types for a specific language. - ``-i, --interactive``: Run in interactive mode. -Example: +Examples: + +.. code-block:: bash + + $ scribe-data get --all + Getting data for all languages and all data types... + +.. code-block:: bash + + $ scribe-data get --all -dt nouns + Getting all nouns for all languages... + +.. code-block:: bash + + $ scribe-data get --all -lang English + Getting all data types for English... .. code-block:: bash $ scribe-data get -l English --data-type verbs -od ~/path/for/output + Getting and formatting English verbs + Data updated: 100%|████████████████████████| 1/1 [00:29<00:00, 29.73s/process] Behavior and Output: ^^^^^^^^^^^^^^^^^^^^ @@ -242,31 +259,64 @@ Usage: Options: ^^^^^^^^ -- ``-lang, --language LANGUAGE``: The language(s) to check totals for. +- ``-lang, --language LANGUAGE``: The language(s) to check totals for. Can be a language name or QID. - ``-dt, --data-type DATA_TYPE``: The data type(s) to check totals for. -- ``-a, --all ALL``: Get totals for all languages and data types. +- ``-a, --all``: Get totals for all languages and data types. Examples: .. code-block:: text - $scribe-data total -dt nouns # verbs, adjectives, etc - Data type: nouns - Total number of lexemes: 123456 + $ scribe-data total --all + Total lexemes for all languages and data types: + ============================================== + Language Data Type Total Lexemes + ============================================== + English nouns 123456 + verbs 234567 + ... .. code-block:: text - $scribe-data total -lang English - Language: English - Total number of lexemes: 123456 + $ scribe-data total --language English + Returning total counts for English data types... + + Language Data Type Total Wikidata Lexemes + ================================================================ + English adjectives 12,848 + adverbs 19,998 + nouns 30,786 + ... .. code-block:: text - $scribe-data total -lang English -dt nouns # verbs, adjectives, etc + $ scribe-data total --language Q1860 + Wikidata QID Q1860 passed. Checking all data types. + + Language Data Type Total Wikidata Lexemes + ================================================================ + Q1860 adjectives 12,848 + adverbs 19,998 + articles 0 + conjunctions 72 + nouns 30,786 + personal pronouns 32 + ... + +.. code-block:: text + + $ scribe-data total --language English -dt nouns Language: English Data type: nouns Total number of lexemes: 12345 +.. code-block:: text + + $ scribe-data total --language Q1860 -dt verbs + Language: Q1860 + Data type: verbs + Total number of lexemes: 23456 + Convert Command ~~~~~~~~~~~~~~~ diff --git a/src/scribe_data/cli/interactive.py b/src/scribe_data/cli/interactive.py index 756370504..1aa51e5eb 100644 --- a/src/scribe_data/cli/interactive.py +++ b/src/scribe_data/cli/interactive.py @@ -33,7 +33,8 @@ from rich.logging import RichHandler from rich.table import Table from tqdm import tqdm - +from scribe_data.cli.total import total_wrapper +from scribe_data.cli.list import list_wrapper from scribe_data.cli.get import get_data from scribe_data.cli.version import get_version_message from scribe_data.utils import ( @@ -53,6 +54,7 @@ ) console = Console() logger = logging.getLogger("rich") +MESSAGE = "[bold cyan]Thank you for using Scribe-Data![/bold cyan]" class ScribeDataConfig: @@ -64,6 +66,7 @@ def __init__(self): self.output_type: str = "json" self.output_dir: Path = Path(DEFAULT_JSON_EXPORT_DIR) self.overwrite: bool = False + self.configured: bool = False config = ScribeDataConfig() @@ -110,30 +113,32 @@ def configure_settings(): ) # MARK: Languages language_completer = WordCompleter(["All"] + config.languages, ignore_case=True) - if not config.selected_languages: - selected_languages = prompt( - "Select languages (comma-separated or type 'All'): ", - completer=language_completer, - ) + initial_language_selection = ", ".join(config.selected_languages) + selected_languages = prompt( + "Select languages (comma-separated or type 'All'): ", + default=initial_language_selection, + completer=language_completer, + ) - if "All" in selected_languages: - config.selected_languages = config.languages - else: - config.selected_languages = [ - lang.strip() - for lang in selected_languages.split(",") - if lang.strip() in config.languages - ] + if "All" in selected_languages: + config.selected_languages = config.languages + else: + config.selected_languages = [ + lang.strip() + for lang in selected_languages.split(",") + if lang.strip() in config.languages + ] if not config.selected_languages: rprint("[yellow]No language selected. Please try again.[/yellow]") return configure_settings() # MARK: Data Types - data_type_completer = WordCompleter(["All"] + config.data_types, ignore_case=True) + initial_data_type_selection = ", ".join(config.selected_data_types) selected_data_types = prompt( "Select data types (comma-separated or type 'All'): ", + default=initial_data_type_selection, completer=data_type_completer, ) @@ -151,7 +156,6 @@ def configure_settings(): return configure_settings() # MARK: Output Type - output_type_completer = WordCompleter(["json", "csv", "tsv"], ignore_case=True) config.output_type = prompt( "Select output type (json/csv/tsv): ", completer=output_type_completer @@ -163,12 +167,10 @@ def configure_settings(): ) # MARK: Output Directory - if output_dir := prompt(f"Enter output directory (default: {config.output_dir}): "): config.output_dir = Path(output_dir) # MARK: Overwrite Confirmation - overwrite_completer = WordCompleter(["Y", "n"], ignore_case=True) overwrite = ( prompt("Overwrite existing files? (Y/n): ", completer=overwrite_completer) @@ -176,6 +178,7 @@ def configure_settings(): ) config.overwrite = overwrite.lower() == "y" + config.configured = True display_summary() @@ -226,34 +229,148 @@ def run_request(): # MARK: Start -def start_interactive_mode(): +def request_total_lexeme(): """ - Provides base options and forwarding to other interactive mode functionality. + Requests language and data type for lexeme totals. """ - rprint( - f"[bold cyan]Welcome to {get_version_message()} interactive mode![/bold cyan]" + # MARK: Language Selection + language_completer = WordCompleter(["All"] + config.languages, ignore_case=True) + initial_language_selection = ", ".join(config.selected_languages) + selected_languages = prompt( + "Select languages (comma-separated or 'All'): ", + default=initial_language_selection, + completer=language_completer, ) + if "All" in selected_languages: + config.selected_languages = config.languages + elif selected_languages.strip(): # Check if input is not just whitespace + config.selected_languages = [ + lang.strip() + for lang in selected_languages.split(",") + if lang.strip() in config.languages + ] + + if not config.selected_languages: + rprint("[yellow]No language selected. Please try again.[/yellow]") + return request_total_lexeme() + + # MARK: Data Type Selection + data_type_completer = WordCompleter(["All"] + config.data_types, ignore_case=True) + initial_data_type_selection = ", ".join(config.selected_data_types) + selected_data_types = prompt( + "Select data types (comma-separated or 'All'): ", + default=initial_data_type_selection, + completer=data_type_completer, + ) + if "All" in selected_data_types.capitalize(): + config.selected_data_types = config.data_types + elif selected_data_types.strip(): # Check if input is not just whitespace + config.selected_data_types = [ + dt.strip() + for dt in selected_data_types.split(",") + if dt.strip() in config.data_types + ] + + if not config.selected_data_types: + rprint("[yellow]No data type selected. Please try again.[/yellow]") + return request_total_lexeme() + +def request_total_lexeme_loop(): + """ + Continuously prompts for lexeme requests until exit. + """ while True: choice = questionary.select( "What would you like to do?", choices=[ - Choice("Configure request", "configure"), - Choice("Run configured data request", "run"), + Choice("Request total lexeme", "total"), + Choice("Run for total lexeme", "run"), Choice("Exit", "exit"), ], ).ask() + if choice == "run": + total_wrapper( + language=config.selected_languages, + data_type=config.selected_data_types, + all_bool=False, + ) + config.selected_languages, config.selected_data_types = [], [] + rprint(MESSAGE) + break + elif choice == "exit": + return + else: + # config.selected_languages, config.selected_data_types = [], [] + request_total_lexeme() + + +def see_list_languages(): + """ + See list of languages. + """ + + choice = questionary.select( + "What would you like to list?", + choices=[ + Choice("All languages", "all_languages"), + Choice("Languages for a specific data type", "languages_for_data_type"), + Choice("Data types for a specific language", "data_types_for_language"), + ], + ).ask() + + if choice == "all_languages": + list_wrapper(all_bool=True) + elif choice == "languages_for_data_type": + list_wrapper(data_type=True) + elif choice == "data_types_for_language": + list_wrapper(language=True) + + +def start_interactive_mode(): + """ + Entry point for interactive mode. + """ + rprint( + f"[bold cyan]Welcome to {get_version_message()} interactive mode![/bold cyan]" + ) + while True: + # Check if both selected_languages and selected_data_types are empty + if not config.selected_languages and not config.selected_data_types: + choices = [ + Choice("Request get data", "configure"), + Choice("Request total lexeme", "total"), + Choice("See list of languages", "languages"), + Choice("Exit", "exit"), + ] + else: + choices = [ + Choice("Request get data", "configure"), + Choice("Exit", "exit"), + ] + if config.configured: + choices.insert(1, Choice("Run configured data request", "run")) + else: + choices.insert(1, Choice("Request total lexeme", "total")) + + choice = questionary.select("What would you like to do?", choices=choices).ask() + if choice == "configure": configure_settings() - + elif choice == "total": + request_total_lexeme() + request_total_lexeme_loop() + break + elif choice == "languages": + see_list_languages() + break elif choice == "run": run_request() - rprint("[bold cyan]Thank you for using Scribe-Data![/bold cyan]") + rprint(MESSAGE) break - else: - rprint("[bold cyan]Thank you for using Scribe-Data![/bold cyan]") + rprint(MESSAGE) break diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py index 466f0c731..3e3809486 100644 --- a/src/scribe_data/cli/total.py +++ b/src/scribe_data/cli/total.py @@ -25,12 +25,11 @@ import requests from SPARQLWrapper import JSON - +from typing import List, Union from scribe_data.utils import ( LANGUAGE_DATA_EXTRACTION_DIR, data_type_metadata, format_sublanguage_name, - language_map, language_metadata, language_to_qid, list_all_languages, @@ -65,7 +64,7 @@ def get_qid_by_input(input_str): def get_datatype_list(language): """ - Get the data types for a given language based on the project directory structure. + Get the data types for a given language based on the project directory structure, including handling sub-languages. Parameters ---------- @@ -77,29 +76,42 @@ def get_datatype_list(language): data_types : list[str] or None A list of the corresponding data types. """ + language_key = language.strip().lower() # Normalize input languages = list_all_languages(language_metadata) - if language.lower() in languages: - language_data = language_map.get(language.lower()) - languages = format_sublanguage_name(language, language_metadata) - language_dir = LANGUAGE_DATA_EXTRACTION_DIR / language - - if not language_data: - raise ValueError(f"Language '{language}' is not recognized.") - - data_types = [f.name for f in language_dir.iterdir() if f.is_dir()] - if not data_types: - raise ValueError( - f"No data types available for language '{language.capitalize()}'." - ) - - data_types = sorted(data_types) - - for t in ["autosuggestions", "emoji_keywords"]: - if t in data_types: - data_types.remove(t) - - return data_types + # Adjust language_key for sub-languages using the format_sublanguage_name function + formatted_language = format_sublanguage_name(language_key, language_metadata) + language_key = formatted_language.split("/")[ + 0 + ].lower() # Use the main language part if formatted + + if language_key in languages: + if "sub_languages" in language_metadata[language_key]: + sub_languages = language_metadata[language_key]["sub_languages"] + data_types = [] + for sub_lang_key in sub_languages: + sub_lang_dir = ( + LANGUAGE_DATA_EXTRACTION_DIR / sub_languages[sub_lang_key]["iso"] + ) + if sub_lang_dir.exists(): + data_types.extend( + [f.name for f in sub_lang_dir.iterdir() if f.is_dir()] + ) + if not data_types: + raise ValueError( + f"No data types available for sub-languages of '{formatted_language}'." + ) + return sorted(set(data_types)) # Remove duplicates and sort + else: + language_dir = LANGUAGE_DATA_EXTRACTION_DIR / language_key + if not language_dir.exists(): + raise ValueError(f"Directory '{language_dir}' does not exist.") + data_types = [f.name for f in language_dir.iterdir() if f.is_dir()] + if not data_types: + raise ValueError( + f"No data types available for language '{formatted_language}'." + ) + return sorted(data_types) else: # return all data types return data_type_metadata @@ -171,15 +183,16 @@ def print_total_lexemes(language: str = None): else: print(f"Returning total counts for {language} data types...\n") - def print_total_header(): + def print_total_header(language, dt, total_lexemes): """ Prints the header of the total command output. """ + language_display = ( + "All Languages" if language is None else language.capitalize() + ) print(f"{'Language':<20} {'Data Type':<25} {'Total Wikidata Lexemes':<25}") print("=" * 70) - print( - f"{language.capitalize():<20} {dt.replace('_', '-'): <25} {total_lexemes:<25}" - ) + print(f"{language_display:<20} {dt.replace('_', '-'): <25} {total_lexemes:<25}") if language is None: # all languages languages = list_all_languages(language_metadata) @@ -192,7 +205,7 @@ def print_total_header(): total_lexemes = get_total_lexemes(lang, dt, False) total_lexemes = f"{total_lexemes:,}" if first_row: - print_total_header() + print_total_header(lang, dt, total_lexemes) first_row = False else: @@ -215,7 +228,7 @@ def print_total_header(): total_lexemes = get_total_lexemes(language, dt, False) total_lexemes = f"{total_lexemes:,}" if first_row: - print_total_header() + print_total_header(language, dt, total_lexemes) first_row = False else: @@ -343,19 +356,20 @@ def get_total_lexemes(language, data_type, doPrint=True): def total_wrapper( - language: str = None, data_type: str = None, all_bool: bool = False + language: Union[str, List[str]] = None, + data_type: Union[str, List[str]] = None, + all_bool: bool = False, ) -> None: """ Conditionally provides the full functionality of the total command. + Now accepts lists for language and data type to output a table of total lexemes. Parameters ---------- - language : str - The language to potentially total data types for. - - data_type : str - The data type to check for. - + language : Union[str, List[str]] + The language(s) to potentially total data types for. + data_type : Union[str, List[str]] + The data type(s) to check for. all_bool : boolean Whether all languages and data types should be listed. """ @@ -363,6 +377,31 @@ def total_wrapper( if (not language and not data_type) and all_bool: print_total_lexemes() + elif isinstance(language, list) or isinstance(data_type, list): + languages = language if isinstance(language, list) else [language] + data_types = data_type if isinstance(data_type, list) else [data_type] + + print(f"{'Language':<20} {'Data Type':<25} {'Total Lexemes':<25}") + print("=" * 70) + + for lang in languages: + first_row = ( + True # Flag to check if it's the first data type for the language + ) + for dt in data_types: + total_lexemes = get_total_lexemes(lang, dt, False) + total_lexemes = ( + f"{total_lexemes:,}" if total_lexemes is not None else "N/A" + ) + if first_row: + print(f"{lang:<20} {dt:<25} {total_lexemes:<25}") + first_row = False + else: + print( + f"{'':<20} {dt:<25} {total_lexemes:<25}" + ) # Print empty space for language + print() + elif language is not None and data_type is None: print_total_lexemes(language) diff --git a/tests/cli/test_interactive.py b/tests/cli/test_interactive.py new file mode 100644 index 000000000..fdce95546 --- /dev/null +++ b/tests/cli/test_interactive.py @@ -0,0 +1,178 @@ +""" +Interactive for the list file functions. + +.. raw:: html + +""" + +import unittest +from unittest.mock import patch, MagicMock, call +from pathlib import Path +from scribe_data.cli.interactive import ( + ScribeDataConfig, + configure_settings, + display_summary, + run_request, + request_total_lexeme, +) + + +class TestScribeDataInteractive(unittest.TestCase): + def setUp(self): + """Set up test fixtures before each test method.""" + self.config = ScribeDataConfig() + # Mock the language_metadata and data_type_metadata + self.config.languages = ["english", "spanish", "french"] + self.config.data_types = ["nouns", "verbs"] + + def test_scribe_data_config_initialization(self): + """Test ScribeDataConfig initialization.""" + self.assertEqual(self.config.selected_languages, []) + self.assertEqual(self.config.selected_data_types, []) + self.assertEqual(self.config.output_type, "json") + self.assertIsInstance(self.config.output_dir, Path) + self.assertFalse(self.config.overwrite) + self.assertFalse(self.config.configured) + + @patch("scribe_data.cli.interactive.prompt") + @patch("scribe_data.cli.interactive.rprint") + def test_configure_settings_all_languages(self, mock_rprint, mock_prompt): + """Test configure_settings with 'All' languages selection.""" + # Set up mock responses + responses = iter( + [ + "All", # languages + "nouns", # data types + "json", # output type + "", # output directory (default) + "y", # overwrite + ] + ) + mock_prompt.side_effect = lambda *args, **kwargs: next(responses) + + with patch("scribe_data.cli.interactive.config", self.config): + with patch("scribe_data.cli.interactive.display_summary"): + configure_settings() + + self.assertEqual(self.config.selected_languages, self.config.languages) + self.assertEqual(self.config.selected_data_types, ["nouns"]) + self.assertEqual(self.config.output_type, "json") + self.assertTrue(self.config.configured) + + @patch("scribe_data.cli.interactive.prompt") + @patch("scribe_data.cli.interactive.rprint") + def test_configure_settings_specific_languages(self, mock_rprint, mock_prompt): + """Test configure_settings with specific language selection.""" + # Set up mock responses + responses = iter( + [ + "english, spanish", # languages + "nouns, verbs", # data types + "csv", # output type + "/custom/path", # output directory + "n", # overwrite + ] + ) + mock_prompt.side_effect = lambda *args, **kwargs: next(responses) + + with patch("scribe_data.cli.interactive.config", self.config): + with patch("scribe_data.cli.interactive.display_summary"): + configure_settings() + + self.assertEqual(self.config.selected_languages, ["english", "spanish"]) + self.assertEqual(self.config.selected_data_types, ["nouns", "verbs"]) + self.assertEqual(self.config.output_type, "csv") + self.assertEqual(str(self.config.output_dir), "/custom/path") + self.assertFalse(self.config.overwrite) + + @patch("scribe_data.cli.interactive.get_data") + @patch("scribe_data.cli.interactive.tqdm") + @patch("scribe_data.cli.interactive.logger") + def test_run_request(self, mock_logger, mock_tqdm, mock_get_data): + """Test run_request functionality.""" + # Setup + self.config.selected_languages = ["english"] + self.config.selected_data_types = ["nouns"] + self.config.configured = True + + mock_get_data.return_value = True + mock_progress = MagicMock() + mock_tqdm.return_value.__enter__.return_value = mock_progress + + with patch("scribe_data.cli.interactive.config", self.config): + run_request() + + mock_get_data.assert_called_once_with( + language="english", + data_type="nouns", + output_type=self.config.output_type, + output_dir=str(self.config.output_dir), + overwrite=self.config.overwrite, + interactive=True, + ) + + @patch("scribe_data.cli.interactive.prompt") + @patch("scribe_data.cli.interactive.rprint") + def test_request_total_lexeme(self, mock_rprint, mock_prompt): + """Test request_total_lexeme functionality.""" + # Set up mock responses + mock_prompt.side_effect = [ + "english, french", # First call for languages + "nouns", # First call for data types + ] + + with patch("scribe_data.cli.interactive.config", self.config): + with patch( + "scribe_data.cli.interactive.list_all_languages", + return_value=["english", "french"], + ): + request_total_lexeme() + + # Verify the config was updated correctly + self.assertEqual(self.config.selected_languages, ["english", "french"]) + self.assertEqual(self.config.selected_data_types, ["nouns"]) + + # Verify prompt was called with correct arguments + expected_calls = [ + call( + "Select languages (comma-separated or 'All'): ", + completer=unittest.mock.ANY, + default="", + ), + call( + "Select data types (comma-separated or 'All'): ", + completer=unittest.mock.ANY, + default="", + ), + ] + mock_prompt.assert_has_calls(expected_calls, any_order=False) + + @patch("rich.console.Console.print") + def test_display_summary(self, mock_print): + """Test display_summary functionality.""" + self.config.selected_languages = ["english"] + self.config.selected_data_types = ["nouns"] + self.config.output_type = "json" + + with patch("scribe_data.cli.interactive.config", self.config): + display_summary() + mock_print.assert_called() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/cli/test_total.py b/tests/cli/test_total.py index f601c26db..88ec28387 100644 --- a/tests/cli/test_total.py +++ b/tests/cli/test_total.py @@ -26,6 +26,9 @@ from scribe_data.cli.total import ( get_qid_by_input, get_total_lexemes, + get_datatype_list, + check_qid_is_language, + total_wrapper, ) @@ -133,6 +136,42 @@ def test_get_total_lexemes_various_data_types(self, mock_query, mock_get_qid): ] mock_print.assert_has_calls(expected_calls) + @patch("scribe_data.cli.total.get_qid_by_input") + @patch("scribe_data.cli.total.sparql.query") + @patch("scribe_data.cli.total.LANGUAGE_DATA_EXTRACTION_DIR") + def test_get_total_lexemes_sub_languages(self, mock_dir, mock_query, mock_get_qid): + # Setup for sub-languages + mock_get_qid.side_effect = lambda x: { + "bokmål": "Q25167", + "nynorsk": "Q25164", + }.get(x.lower()) + mock_results = MagicMock() + mock_results.convert.return_value = { + "results": {"bindings": [{"total": {"value": "30"}}]} + } + mock_query.return_value = mock_results + + # Mocking directory paths and contents + mock_dir.__truediv__.return_value.exists.return_value = True + mock_dir.__truediv__.return_value.iterdir.return_value = [ + MagicMock(name="verbs", is_dir=lambda: True), + MagicMock(name="nouns", is_dir=lambda: True), + ] + + with patch("builtins.print") as mock_print: + get_total_lexemes("Norwegian", "verbs") + get_total_lexemes("Norwegian", "nouns") + + expected_calls = [ + call( + "\nLanguage: Norwegian\nData type: verbs\nTotal number of lexemes: 30\n" + ), + call( + "\nLanguage: Norwegian\nData type: nouns\nTotal number of lexemes: 30\n" + ), + ] + mock_print.assert_has_calls(expected_calls) + class TestGetQidByInput(unittest.TestCase): def setUp(self): @@ -154,3 +193,70 @@ def test_get_qid_by_input_invalid(self, mock_data_type_metadata): mock_data_type_metadata.update(self.valid_data_types) self.assertIsNone(get_qid_by_input("invalid_data_type")) + + +class TestGetDatatypeList(unittest.TestCase): + @patch("scribe_data.cli.total.LANGUAGE_DATA_EXTRACTION_DIR") + def test_get_datatype_list_invalid_language(self, mock_dir): + mock_dir.__truediv__.return_value.exists.return_value = False + + with self.assertRaises(ValueError): + get_datatype_list("InvalidLanguage") + + @patch("scribe_data.cli.total.LANGUAGE_DATA_EXTRACTION_DIR") + def test_get_datatype_list_no_data_types(self, mock_dir): + mock_dir.__truediv__.return_value.exists.return_value = True + mock_dir.__truediv__.return_value.iterdir.return_value = [] + + with self.assertRaises(ValueError): + get_datatype_list("English") + + +class TestCheckQidIsLanguage(unittest.TestCase): + @patch("scribe_data.cli.total.requests.get") + def test_check_qid_is_language_valid(self, mock_get): + mock_response = MagicMock() + mock_response.json.return_value = { + "statements": {"P31": [{"value": {"content": "Q34770"}}]}, + "labels": {"en": "English"}, + } + mock_get.return_value = mock_response + + with patch("builtins.print") as mock_print: + result = check_qid_is_language("Q1860") + + self.assertEqual(result, "English") + mock_print.assert_called_once_with("English (Q1860) is a language.\n") + + @patch("scribe_data.cli.total.requests.get") + def test_check_qid_is_language_invalid(self, mock_get): + mock_response = MagicMock() + mock_response.json.return_value = { + "statements": {"P31": [{"value": {"content": "Q5"}}]}, + "labels": {"en": "Human"}, + } + mock_get.return_value = mock_response + + with self.assertRaises(ValueError): + check_qid_is_language("Q5") + + +class TestTotalWrapper(unittest.TestCase): + @patch("scribe_data.cli.total.print_total_lexemes") + def test_total_wrapper_all_bool(self, mock_print_total_lexemes): + total_wrapper(all_bool=True) + mock_print_total_lexemes.assert_called_once_with() + + @patch("scribe_data.cli.total.print_total_lexemes") + def test_total_wrapper_language_only(self, mock_print_total_lexemes): + total_wrapper(language="English") + mock_print_total_lexemes.assert_called_once_with("English") + + @patch("scribe_data.cli.total.get_total_lexemes") + def test_total_wrapper_language_and_data_type(self, mock_get_total_lexemes): + total_wrapper(language="English", data_type="nouns") + mock_get_total_lexemes.assert_called_once_with("English", "nouns") + + def test_total_wrapper_invalid_input(self): + with self.assertRaises(ValueError): + total_wrapper()