Merge branch 'scribe-org:main' into AK_docstrings

scribe-org · Oct 27, 2024 · 9592a26 · 9592a26
2 parents b9bf39a + 40c41ab
commit 9592a26
Show file tree

Hide file tree

Showing 61 changed files with 211 additions and 121 deletions.
diff --git a/src/scribe_data/check/check_query_forms.py b/src/scribe_data/check/check_query_forms.py
@@ -493,6 +493,48 @@ def check_forms_order(query_text: str) -> bool:
     return select_vars == where_vars
 
 
+# MARK: Docstring Format
+
+
+def check_docstring(query_text: str) -> bool:
+    """
+    Checks the docstring of a SPARQL query text to ensure it follows the standard format.
+
+    Parameters
+    ----------
+        query_text : str
+            The SPARQL query's text to be checked.
+
+    Returns
+    -------
+        bool
+            True if the docstring is correctly formatted.
+    """
+    # Split the text into lines.
+    query_lines = query_text.splitlines(keepends=True)
+
+    # Regex patterns for each line in the docstring and corresponding error messages.
+    patterns = [
+        (r"^# tool: scribe-data\n", "Error in line 1:"),
+        (
+            r"^# All (.+?) \(Q\d+\) .+ \(Q\d+\) and the given forms\.\n",
+            "Error in line 2:",
+        ),
+        (
+            r"^# Enter this query at https://query\.wikidata\.org/\.\n",
+            "Error in line 3:",
+        ),
+    ]
+    return next(
+        (
+            (False, f"{error_line_number} {query_lines[i].strip()}")
+            for i, (pattern, error_line_number) in enumerate(patterns)
+            if not re.match(pattern, query_lines[i])
+        ),
+        True,
+    )
+
+
 # MARK: Main Query Forms Validation
 
 
@@ -508,6 +550,14 @@ def check_query_forms() -> None:
         with open(query_file, "r", encoding="utf-8") as file:
             query_text = file.read()
 
+        # Check the docstring format.
+        docstring_check_result = check_docstring(query_text)
+        if docstring_check_result is not True:
+            error_output += (
+                f"\n{index}. {query_file_str}:\n  - {docstring_check_result}\n"
+            )
+            index += 1
+
         # Check for unique return forms and handle the error message.
         unique_check_result = check_unique_return_forms(query_text)
         if unique_check_result is not True:

diff --git a/src/scribe_data/cli/interactive.py b/src/scribe_data/cli/interactive.py
@@ -25,6 +25,8 @@
 from typing import List
 
 import questionary
+from prompt_toolkit import prompt
+from prompt_toolkit.completion import WordCompleter
 from questionary import Choice
 from rich import print as rprint
 from rich.console import Console
@@ -103,77 +105,78 @@ def configure_settings():
         - Output directory
         - Whether to overwrite
     """
+    rprint(
+        "[cyan]Follow the prompts below. Press tab for completions and enter to select.[/cyan]"
+    )
     # MARK: Languages
-
+    language_completer = WordCompleter(["All"] + config.languages, ignore_case=True)
     if not config.selected_languages:
-        language_selected = False
-        language_choices = ["All"] + config.languages
-        selected_languages = questionary.checkbox(
-            message="Select languages and press enter:",
-            choices=language_choices,
-        ).ask()
+        selected_languages = prompt(
+            "Select languages (comma-separated or type 'All'): ",
+            completer=language_completer,
+        )
 
         if "All" in selected_languages:
             config.selected_languages = config.languages
-            language_selected = True
-
-        elif selected_languages:
-            config.selected_languages = selected_languages
-            language_selected = True
-
         else:
-            rprint(
-                "[yellow]No language selected. Please select at least one option with space followed by enter.[/yellow]"
-            )
-            if questionary.confirm("Continue?", default=True).ask():
-                return configure_settings()
+            config.selected_languages = [
+                lang.strip()
+                for lang in selected_languages.split(",")
+                if lang.strip() in config.languages
+            ]
 
-    else:
-        language_selected = True
+    if not config.selected_languages:
+        rprint("[yellow]No language selected. Please try again.[/yellow]")
+        return configure_settings()
 
-    if language_selected:
-        # MARK: Data Types
+    # MARK: Data Types
 
-        data_type_selected = False
-        data_type_choices = ["All"] + config.data_types
-        selected_data_types = questionary.checkbox(
-            "Select data types and press enter:",
-            choices=data_type_choices,
-        ).ask()
+    data_type_completer = WordCompleter(["All"] + config.data_types, ignore_case=True)
+    selected_data_types = prompt(
+        "Select data types (comma-separated or type 'All'): ",
+        completer=data_type_completer,
+    )
 
-        if "All" in selected_data_types:
-            config.selected_data_types = config.data_types
-            data_type_selected = True
+    if "All" in selected_data_types.capitalize():
+        config.selected_data_types = config.data_types
+    else:
+        config.selected_data_types = [
+            dt.strip()
+            for dt in selected_data_types.split(",")
+            if dt.strip() in config.data_types
+        ]
 
-        elif selected_data_types:
-            config.selected_data_types = selected_data_types
-            data_type_selected = True
+    if not config.selected_data_types:
+        rprint("[yellow]No data type selected. Please try again.[/yellow]")
+        return configure_settings()
 
-        else:
-            rprint(
-                "[yellow]No data type selected. Please select at least one option with space followed by enter.[/yellow]"
-            )
-            if questionary.confirm("Continue?", default=True).ask():
-                return configure_settings()
+    # MARK: Output Type
 
-        if data_type_selected:
-            # MARK: Output Type
+    output_type_completer = WordCompleter(["json", "csv", "tsv"], ignore_case=True)
+    config.output_type = prompt(
+        "Select output type (json/csv/tsv): ", completer=output_type_completer
+    )
+    while config.output_type not in ["json", "csv", "tsv"]:
+        rprint("[yellow]Invalid output type selected. Please try again.[/yellow]")
+        config.output_type = prompt(
+            "Select output type (json/csv/tsv): ", completer=output_type_completer
+        )
+
+    # MARK: Output Directory
 
-            config.output_type = questionary.select(
-                "Select output type:", choices=["json", "csv", "tsv"]
-            ).ask()
+    if output_dir := prompt(f"Enter output directory (default: {config.output_dir}): "):
+        config.output_dir = Path(output_dir)
 
-            config.output_dir = Path(
-                questionary.text(
-                    "Enter output directory:", default=str(config.output_dir)
-                ).ask()
-            )
+    # MARK: Overwrite Confirmation
 
-            config.overwrite = questionary.confirm(
-                "Overwrite existing files?", default=config.overwrite
-            ).ask()
+    overwrite_completer = WordCompleter(["Y", "n"], ignore_case=True)
+    overwrite = (
+        prompt("Overwrite existing files? (Y/n): ", completer=overwrite_completer)
+        or "y"
+    )
+    config.overwrite = overwrite.lower() == "y"
 
-            display_summary()
+    display_summary()
 
 
 def run_request():
@@ -228,7 +231,7 @@ def start_interactive_mode():
     Provides base options and forwarding to other interactive mode functionality.
     """
     rprint(
-        f"[bold green]Welcome to {get_version_message()} interactive mode![/bold green]"
+        f"[bold cyan]Welcome to {get_version_message()} interactive mode![/bold cyan]"
     )
 
     while True:

diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py
@@ -20,6 +20,9 @@
     -->
 """
 
+from http.client import IncompleteRead
+from urllib.error import HTTPError
+
 from SPARQLWrapper import JSON
 
 from scribe_data.utils import (
@@ -244,7 +247,28 @@ def get_total_lexemes(language, data_type, doPrint=True):
 
     sparql.setQuery(query)
     sparql.setReturnFormat(JSON)
-    results = sparql.query().convert()
+    try_count = 0
+    max_retries = 2
+    results = None
+
+    while try_count <= max_retries and results is None:
+        try:
+            results = sparql.query().convert()
+
+        except HTTPError as http_err:
+            print(f"HTTPError occurred: {http_err}")
+
+        except IncompleteRead as read_err:
+            print(f"Incomplete read error occurred: {read_err}")
+
+        try_count += 1
+
+        if results is None:
+            if try_count <= max_retries:
+                print("The query will be retried ...")
+
+            else:
+                print("Query failed after retries.")
 
     # Check if the query returned any results.
     if (

diff --git a/src/scribe_data/wikidata/language_data_extraction/bengali/adjectives/query_adjectives.sparql b/src/scribe_data/wikidata/language_data_extraction/bengali/adjectives/query_adjectives.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Bengali (Bangla Q9610) adjectives (Q34698) and the given forms.
+# All Bengali (Q9610) adjectives (Q34698) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/src/scribe_data/wikidata/language_data_extraction/bengali/adverbs/query_adverbs.sparql b/src/scribe_data/wikidata/language_data_extraction/bengali/adverbs/query_adverbs.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Bengali (Bangla Q9610) adverbs (Q380057) and the given forms.
+# All Bengali (Q9610) adverbs (Q380057) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/src/scribe_data/wikidata/language_data_extraction/bengali/nouns/query_nouns.sparql b/src/scribe_data/wikidata/language_data_extraction/bengali/nouns/query_nouns.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Bengali (Bangla Q9610) nouns (Q1084) and the given forms.
+# All Bengali (Q9610) nouns (Q1084) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/...e_data/wikidata/language_data_extraction/bengali/postpositions/query_postpositions.sparql b/...e_data/wikidata/language_data_extraction/bengali/postpositions/query_postpositions.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Bengali (Bangla Q9610) postpositions (Q161873) and the given forms.
+# All Bengali (Q9610) postpositions (Q161873) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 

diff --git a/...ibe_data/wikidata/language_data_extraction/bengali/prepositions/query_prepositions.sparql b/...ibe_data/wikidata/language_data_extraction/bengali/prepositions/query_prepositions.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Bengali (Bangla Q9610) prepositions (Q4833830) and the given forms.
+# All Bengali (Q9610) prepositions (Q4833830) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/...ibe_data/wikidata/language_data_extraction/bengali/proper_nouns/query_proper_nouns.sparql b/...ibe_data/wikidata/language_data_extraction/bengali/proper_nouns/query_proper_nouns.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Bengali (Bangla Q9610) proper nouns (Q147276) and the given forms.
+# All Bengali (Q9610) proper nouns (Q147276) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/src/scribe_data/wikidata/language_data_extraction/bengali/verbs/query_verbs.sparql b/src/scribe_data/wikidata/language_data_extraction/bengali/verbs/query_verbs.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Bengali (Bangla Q9610) verbs (Q24905) and the given forms.
+# All Bengali (Q9610) verbs (Q24905) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/src/scribe_data/wikidata/language_data_extraction/dagbani/adjectives/query_adjectives.sparql b/src/scribe_data/wikidata/language_data_extraction/dagbani/adjectives/query_adjectives.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# Dagbani (Q32238) adjectives (Q34698) and the given forms.
+# All Dagbani (Q32238) adjectives (Q34698) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/...ibe_data/wikidata/language_data_extraction/dagbani/prepositions/query_prepositions.sparql b/...ibe_data/wikidata/language_data_extraction/dagbani/prepositions/query_prepositions.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Dagbani (Q32238) prepositions and the given forms.
+# All Dagbani (Q32238) prepositions (Q4833830) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/src/scribe_data/wikidata/language_data_extraction/dagbani/verbs/query_verbs.sparql b/src/scribe_data/wikidata/language_data_extraction/dagbani/verbs/query_verbs.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# Dagbani (Q32238) verbs and the given forms.
+# All Dagbani (Q32238) verbs (Q24905) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 SELECT

diff --git a/...ata/wikidata/language_data_extraction/hindustani/hindi/adjectives/query_adjectives.sparql b/...ata/wikidata/language_data_extraction/hindustani/hindi/adjectives/query_adjectives.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Hindi (from Hindustani Q11051) adjectives (Q34698) and the given forms..
+# All Hindi Hindustani (Q11051) adjectives (Q34698) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.

diff --git a/...ribe_data/wikidata/language_data_extraction/hindustani/hindi/adverbs/query_adverbs.sparql b/...ribe_data/wikidata/language_data_extraction/hindustani/hindi/adverbs/query_adverbs.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Hindi (from Hindustani Q11051) adverbs (Q380057) and the given forms.
+# All Hindi Hindustani (Q11051) adverbs (Q380057) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.

diff --git a/src/scribe_data/wikidata/language_data_extraction/hindustani/hindi/nouns/query_nouns.sparql b/src/scribe_data/wikidata/language_data_extraction/hindustani/hindi/nouns/query_nouns.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Hindi (from Hindustani Q11051) nouns (Q1084) and the given forms.
+# All Hindi Hindustani (Q11051) nouns (Q1084) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.

diff --git a/...kidata/language_data_extraction/hindustani/hindi/postpositions/query_postpositions.sparql b/...kidata/language_data_extraction/hindustani/hindi/postpositions/query_postpositions.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Hindi (from Hindustani Q11051) postpositions (Q161873) and the given forms.
+# All Hindi Hindustani (Q11051) postpositions (Q161873) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.

diff --git a/...wikidata/language_data_extraction/hindustani/hindi/prepositions/query_prepositions.sparql b/...wikidata/language_data_extraction/hindustani/hindi/prepositions/query_prepositions.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Hindi (from Hindustani Q11051) prepositions (Q4833830) and the given forms.
+# All Hindi Hindustani (Q11051) prepositions (Q4833830) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.

diff --git a/...wikidata/language_data_extraction/hindustani/hindi/proper_nouns/query_proper_nouns.sparql b/...wikidata/language_data_extraction/hindustani/hindi/proper_nouns/query_proper_nouns.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Hindi (from Hindustani Q11051) proper nouns (Q147276) and the given forms.
+# All Hindi Hindustani (Q11051) proper nouns (Q147276) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.

diff --git a/src/scribe_data/wikidata/language_data_extraction/hindustani/hindi/verbs/query_verbs.sparql b/src/scribe_data/wikidata/language_data_extraction/hindustani/hindi/verbs/query_verbs.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Hindi (from Hindustani Q11051) verbs (Q24905) and the given forms.
+# All Hindi Hindustani (Q11051) verbs (Q24905) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "hi" to remove Urdu (ur) words.

diff --git a/...data/wikidata/language_data_extraction/hindustani/urdu/adjectives/query_adjectives.sparql b/...data/wikidata/language_data_extraction/hindustani/urdu/adjectives/query_adjectives.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Urdu (from Hindustani Q11051) adjectives (Q34698) and the given forms..
+# All Urdu Hindustani (Q11051) adjectives (Q34698) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "ur" to remove Hindi (hi) words.

diff --git a/...cribe_data/wikidata/language_data_extraction/hindustani/urdu/adverbs/query_adverbs.sparql b/...cribe_data/wikidata/language_data_extraction/hindustani/urdu/adverbs/query_adverbs.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Urdu (from Hindustani Q11051) adverbs (Q380057) and the given forms.
+# All Urdu Hindustani (Q11051) adverbs (Q380057) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "ur" to remove Hindi (hi) words.

diff --git a/src/scribe_data/wikidata/language_data_extraction/hindustani/urdu/nouns/query_nouns.sparql b/src/scribe_data/wikidata/language_data_extraction/hindustani/urdu/nouns/query_nouns.sparql
@@ -1,5 +1,5 @@
 # tool: scribe-data
-# All Urdu (from Hindustani Q11051) nouns (Q1084) and the given forms.
+# All Urdu Hindustani (Q11051) nouns (Q1084) and the given forms.
 # Enter this query at https://query.wikidata.org/.
 
 # Note: We need to filter for "ur" to remove Hindi (hi) words.