diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py index 6a0e0426..1d78c3b9 100644 --- a/src/scribe_data/cli/get.py +++ b/src/scribe_data/cli/get.py @@ -22,6 +22,7 @@ import os # for removing original JSON files import subprocess +import sys from pathlib import Path from typing import List, Union @@ -35,6 +36,11 @@ ) from scribe_data.wikidata.query_data import query_data +from scribe_data.check.check_pyicu import ( + check_and_install_pyicu, + check_if_pyicu_installed, +) + def get_data( language: str = None, @@ -136,7 +142,14 @@ def get_data( # MARK: Emojis elif data_type in {"emoji-keywords", "emoji_keywords"}: - generate_emoji(language=language, output_dir=output_dir) + if not check_if_pyicu_installed(): + check_and_install_pyicu() + # Try to dynamically import the PyICU module + try: + generate_emoji(language=language, output_dir=output_dir) + except ImportError: + os.execv(sys.executable, ["python"] + sys.argv) + generate_emoji(language=language, output_dir=output_dir) # MARK: Query Data diff --git a/src/scribe_data/cli/interactive.py b/src/scribe_data/cli/interactive.py index 1aa51e5e..d79101a9 100644 --- a/src/scribe_data/cli/interactive.py +++ b/src/scribe_data/cli/interactive.py @@ -34,7 +34,8 @@ from rich.table import Table from tqdm import tqdm from scribe_data.cli.total import total_wrapper -from scribe_data.cli.list import list_wrapper + +# from scribe_data.cli.list import list_wrapper from scribe_data.cli.get import get_data from scribe_data.cli.version import get_version_message from scribe_data.utils import ( @@ -54,7 +55,7 @@ ) console = Console() logger = logging.getLogger("rich") -MESSAGE = "[bold cyan]Thank you for using Scribe-Data![/bold cyan]" +THANK_YOU_MESSAGE = "[bold cyan]Thank you for using Scribe-Data![/bold cyan]" class ScribeDataConfig: @@ -97,32 +98,31 @@ def display_summary(): console.print("\n") -def configure_settings(): - """ - Configures the settings of the interactive mode request. +# Helper function to create a WordCompleter +def create_word_completer( + options: List[str], include_all: bool = False +) -> WordCompleter: + if include_all: + options = ["All"] + options + return WordCompleter(options, ignore_case=True) - Asks for: - - Languages - - Data types - - Output type - - Output directory - - Whether to overwrite + +# MARK: Language Selection +def prompt_for_languages(): """ - rprint( - "[cyan]Follow the prompts below. Press tab for completions and enter to select.[/cyan]" - ) - # MARK: Languages - language_completer = WordCompleter(["All"] + config.languages, ignore_case=True) + Requests language and data type for lexeme totals. + """ + # MARK: Language Selection + language_completer = create_word_completer(config.languages, include_all=True) initial_language_selection = ", ".join(config.selected_languages) selected_languages = prompt( - "Select languages (comma-separated or type 'All'): ", + "Select languages (comma-separated or 'All'): ", default=initial_language_selection, completer=language_completer, ) - if "All" in selected_languages: config.selected_languages = config.languages - else: + elif selected_languages.strip(): # Check if input is not just whitespace config.selected_languages = [ lang.strip() for lang in selected_languages.split(",") @@ -131,32 +131,55 @@ def configure_settings(): if not config.selected_languages: rprint("[yellow]No language selected. Please try again.[/yellow]") - return configure_settings() + return prompt_for_languages() - # MARK: Data Types - data_type_completer = WordCompleter(["All"] + config.data_types, ignore_case=True) - initial_data_type_selection = ", ".join(config.selected_data_types) - selected_data_types = prompt( - "Select data types (comma-separated or type 'All'): ", - default=initial_data_type_selection, - completer=data_type_completer, - ) - if "All" in selected_data_types.capitalize(): - config.selected_data_types = config.data_types - else: - config.selected_data_types = [ - dt.strip() - for dt in selected_data_types.split(",") - if dt.strip() in config.data_types - ] +# MARK: Data Type Selection +def prompt_for_data_types(): + data_type_completer = create_word_completer(config.data_types, include_all=True) + initial_data_type_selection = ", ".join(config.selected_data_types) + while True: + selected_data_types = prompt( + "Select data types (comma-separated or 'All'): ", + default=initial_data_type_selection, + completer=data_type_completer, + ) + if "All" in selected_data_types.capitalize(): + config.selected_data_types = config.data_types + break + elif selected_data_types.strip(): # Check if input is not just whitespace + config.selected_data_types = [ + dt.strip() + for dt in selected_data_types.split(",") + if dt.strip() in config.data_types + ] + if config.selected_data_types: + break # Exit loop if valid data types are selected - if not config.selected_data_types: rprint("[yellow]No data type selected. Please try again.[/yellow]") - return configure_settings() + + +def configure_settings(): + """ + Configures the settings of the interactive mode request. + + Asks for: + - Languages + - Data types + - Output type + - Output directory + - Whether to overwrite + """ + rprint( + "[cyan]Follow the prompts below. Press tab for completions and enter to select.[/cyan]" + ) + # MARK: Languages + prompt_for_languages() + # MARK: Data Types + prompt_for_data_types() # MARK: Output Type - output_type_completer = WordCompleter(["json", "csv", "tsv"], ignore_case=True) + output_type_completer = create_word_completer(["json", "csv", "tsv"]) config.output_type = prompt( "Select output type (json/csv/tsv): ", completer=output_type_completer ) @@ -171,7 +194,7 @@ def configure_settings(): config.output_dir = Path(output_dir) # MARK: Overwrite Confirmation - overwrite_completer = WordCompleter(["Y", "n"], ignore_case=True) + overwrite_completer = create_word_completer(["Y", "n"]) overwrite = ( prompt("Overwrite existing files? (Y/n): ", completer=overwrite_completer) or "y" @@ -226,56 +249,6 @@ def run_request(): rprint("[bold green]Data request completed successfully![/bold green]") -# MARK: Start - - -def request_total_lexeme(): - """ - Requests language and data type for lexeme totals. - """ - # MARK: Language Selection - language_completer = WordCompleter(["All"] + config.languages, ignore_case=True) - initial_language_selection = ", ".join(config.selected_languages) - selected_languages = prompt( - "Select languages (comma-separated or 'All'): ", - default=initial_language_selection, - completer=language_completer, - ) - if "All" in selected_languages: - config.selected_languages = config.languages - elif selected_languages.strip(): # Check if input is not just whitespace - config.selected_languages = [ - lang.strip() - for lang in selected_languages.split(",") - if lang.strip() in config.languages - ] - - if not config.selected_languages: - rprint("[yellow]No language selected. Please try again.[/yellow]") - return request_total_lexeme() - - # MARK: Data Type Selection - data_type_completer = WordCompleter(["All"] + config.data_types, ignore_case=True) - initial_data_type_selection = ", ".join(config.selected_data_types) - selected_data_types = prompt( - "Select data types (comma-separated or 'All'): ", - default=initial_data_type_selection, - completer=data_type_completer, - ) - if "All" in selected_data_types.capitalize(): - config.selected_data_types = config.data_types - elif selected_data_types.strip(): # Check if input is not just whitespace - config.selected_data_types = [ - dt.strip() - for dt in selected_data_types.split(",") - if dt.strip() in config.data_types - ] - - if not config.selected_data_types: - rprint("[yellow]No data type selected. Please try again.[/yellow]") - return request_total_lexeme() - - def request_total_lexeme_loop(): """ Continuously prompts for lexeme requests until exit. @@ -297,38 +270,41 @@ def request_total_lexeme_loop(): all_bool=False, ) config.selected_languages, config.selected_data_types = [], [] - rprint(MESSAGE) + rprint(THANK_YOU_MESSAGE) break elif choice == "exit": return else: - # config.selected_languages, config.selected_data_types = [], [] - request_total_lexeme() + prompt_for_languages() + prompt_for_data_types() -def see_list_languages(): - """ - See list of languages. - """ +# MARK: List + +# def see_list_languages(): +# """ +# See list of languages. +# """ - choice = questionary.select( - "What would you like to list?", - choices=[ - Choice("All languages", "all_languages"), - Choice("Languages for a specific data type", "languages_for_data_type"), - Choice("Data types for a specific language", "data_types_for_language"), - ], - ).ask() +# choice = questionary.select( +# "What would you like to list?", +# choices=[ +# Choice("All languages", "all_languages"), +# Choice("Languages for a specific data type", "languages_for_data_type"), +# Choice("Data types for a specific language", "data_types_for_language"), +# ], +# ).ask() - if choice == "all_languages": - list_wrapper(all_bool=True) - elif choice == "languages_for_data_type": - list_wrapper(data_type=True) - elif choice == "data_types_for_language": - list_wrapper(language=True) +# if choice == "all_languages": +# list_wrapper(all_bool=True) +# elif choice == "languages_for_data_type": +# list_wrapper(data_type=True) +# elif choice == "data_types_for_language": +# list_wrapper(language=True) -def start_interactive_mode(): +# MARK: Start +def start_interactive_mode(selectMode: str = None): """ Entry point for interactive mode. """ @@ -338,39 +314,47 @@ def start_interactive_mode(): while True: # Check if both selected_languages and selected_data_types are empty if not config.selected_languages and not config.selected_data_types: - choices = [ - Choice("Request get data", "configure"), - Choice("Request total lexeme", "total"), - Choice("See list of languages", "languages"), - Choice("Exit", "exit"), - ] + if selectMode == "Get": + choices = [ + Choice("Request get data", "configure"), + # Choice("See list of languages", "languages"), + Choice("Exit", "exit"), + ] + elif selectMode == "Total": + choices = [ + Choice("Request total lexeme", "total"), + # Choice("See list of languages", "languages"), + Choice("Exit", "exit"), + ] + else: choices = [ Choice("Request get data", "configure"), Choice("Exit", "exit"), ] if config.configured: - choices.insert(1, Choice("Run configured data request", "run")) + choices.insert(1, Choice("Request for get data", "run")) else: - choices.insert(1, Choice("Request total lexeme", "total")) + choices.insert(1, Choice("Request for total lexeme", "total")) choice = questionary.select("What would you like to do?", choices=choices).ask() if choice == "configure": configure_settings() elif choice == "total": - request_total_lexeme() + prompt_for_languages() + prompt_for_data_types() request_total_lexeme_loop() break - elif choice == "languages": - see_list_languages() - break + # elif choice == "languages": + # see_list_languages() + # break elif choice == "run": run_request() - rprint(MESSAGE) + rprint(THANK_YOU_MESSAGE) break else: - rprint(MESSAGE) + rprint(THANK_YOU_MESSAGE) break diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py index 313ab74d..cea99f88 100644 --- a/src/scribe_data/cli/main.py +++ b/src/scribe_data/cli/main.py @@ -178,6 +178,9 @@ def main() -> None: action=argparse.BooleanOptionalAction, help="Check for all languages and data types.", ) + total_parser.add_argument( + "-i", "--interactive", action="store_true", help="Run in interactive mode" + ) # MARK: Convert @@ -273,7 +276,7 @@ def main() -> None: elif args.command in ["get", "g"]: if args.interactive: - start_interactive_mode() + start_interactive_mode(selectMode="Get") else: get_data( @@ -287,9 +290,12 @@ def main() -> None: ) elif args.command in ["total", "t"]: - total_wrapper( - language=args.language, data_type=args.data_type, all_bool=args.all - ) + if args.interactive: + start_interactive_mode(selectMode="Total") + else: + total_wrapper( + language=args.language, data_type=args.data_type, all_bool=args.all + ) elif args.command in ["convert", "c"]: convert_wrapper( diff --git a/src/scribe_data/unicode/generate_emoji_keywords.py b/src/scribe_data/unicode/generate_emoji_keywords.py index beb34257..3a1eb8ab 100644 --- a/src/scribe_data/unicode/generate_emoji_keywords.py +++ b/src/scribe_data/unicode/generate_emoji_keywords.py @@ -23,10 +23,6 @@ import os from pathlib import Path -from scribe_data.check.check_pyicu import ( - check_and_install_pyicu, - check_if_pyicu_installed, -) from scribe_data.unicode.process_unicode import gen_emoji_lexicon from scribe_data.utils import export_formatted_data, get_language_iso @@ -55,33 +51,30 @@ def generate_emoji(language, output_dir: str = None): ------- None: The function does not return any value but outputs data to the specified directory. """ - if check_and_install_pyicu() and check_if_pyicu_installed() is False: - print("Thank you.") - if check_if_pyicu_installed(): - iso = get_language_iso(language=language) - path_to_cldr_annotations = ( - Path(__file__).parent / "cldr-annotations-full" / "annotations" - ) - if iso in os.listdir(path_to_cldr_annotations): - print(f"Emoji Generation for language {language} is supported") + iso = get_language_iso(language=language) + path_to_cldr_annotations = ( + Path(__file__).parent / "cldr-annotations-full" / "annotations" + ) + if iso in os.listdir(path_to_cldr_annotations): + print(f"Emoji Generation for language {language} is supported") - else: - print(f"Emoji Generation for language {language} is not supported") - return + else: + print(f"Emoji Generation for language {language} is not supported") + return - updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir - export_dir = Path(updated_path) / language.capitalize() - export_dir.mkdir(parents=True, exist_ok=True) + updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir + export_dir = Path(updated_path) / language.capitalize() + export_dir.mkdir(parents=True, exist_ok=True) - if emoji_keywords_dict := gen_emoji_lexicon( - language=language, - emojis_per_keyword=EMOJI_KEYWORDS_DICT, - ): - export_formatted_data( - file_path=output_dir, - formatted_data=emoji_keywords_dict, - query_data_in_use=True, - language=language.capitalize(), - data_type=DATA_TYPE, - ) + if emoji_keywords_dict := gen_emoji_lexicon( + language=language, + emojis_per_keyword=EMOJI_KEYWORDS_DICT, + ): + export_formatted_data( + file_path=output_dir, + formatted_data=emoji_keywords_dict, + query_data_in_use=True, + language=language.capitalize(), + data_type=DATA_TYPE, + )