diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py index 17c20b9b..cc4ff34c 100644 --- a/src/scribe_data/cli/get.py +++ b/src/scribe_data/cli/get.py @@ -103,27 +103,21 @@ def get_data( # MARK: Get All if all: + # Using wikimedia lexeme based dump + if wiki_dump: print("wiki_dump", wiki_dump) download_wrapper(None, wiki_dump) else: - # user_response = input( - # "We'll using lexeme dump from dumps.wikimedia.org/wikidatawiki/entities." - # "Do you want to Use it? (Yes/Cancel): " - # ).strip().lower() - # if user_response == "yes" or user_response=="": print("Using wikimedia lexeme dump...") file_path = download_wrapper() - if file_path: + if isinstance(file_path, str) and file_path: rprint("[bold green]we'll use this lexeme dump[/bold green]", file_path) rprint( "[bold red]Parsing lexeme dump feature will be available soon...[/bold red]" ) - else: - print("Error occurred! Please check the dump file") - # else: - # print("canceled...") - # return + + # Using sparql based data extract # if language: # language_or_sub_language = language.split(" ")[0] diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py index 523088d3..a5b6f6ac 100644 --- a/src/scribe_data/utils.py +++ b/src/scribe_data/utils.py @@ -38,7 +38,7 @@ DEFAULT_CSV_EXPORT_DIR = "scribe_data_csv_export" DEFAULT_TSV_EXPORT_DIR = "scribe_data_tsv_export" DEFAULT_SQLITE_EXPORT_DIR = "scribe_data_sqlite_export" -DEFAULT_DUMP_EXPORT_DIR = "scribe_data_wiki-dumps_export" +DEFAULT_DUMP_EXPORT_DIR = "scribe_data_wiki_dumps_export" LANGUAGE_DATA_EXTRACTION_DIR = ( Path(__file__).parent / "wikidata" / "language_data_extraction" @@ -635,7 +635,7 @@ def check_lexeme_dump_prompt_download(output_dir): rprint(f" - {Path(output_dir)}/{dump.name}") user_input = input( - "\nDo you want to\n (d)elete existing dumps,\n (s)kip download,\n (u)se existing latest dump\n or download (n)ew version? [d/s/u/n]: " + "\nDo you want to\n - Delete existing dumps,\n - Skip download,\n - Use existing latest dump\n -Download (n)ew version?\n [d/s/u/n]: " ).lower() if user_input == "d": for dump in existing_dumps: