Skip to content

Commit

Permalink
Merge branch 'scribe-org:main' into AK_docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
KesharwaniArpita authored Oct 27, 2024
2 parents b9bf39a + 40c41ab commit 9592a26
Show file tree
Hide file tree
Showing 61 changed files with 211 additions and 121 deletions.
50 changes: 50 additions & 0 deletions src/scribe_data/check/check_query_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,48 @@ def check_forms_order(query_text: str) -> bool:
return select_vars == where_vars


# MARK: Docstring Format


def check_docstring(query_text: str) -> bool:
"""
Checks the docstring of a SPARQL query text to ensure it follows the standard format.
Parameters
----------
query_text : str
The SPARQL query's text to be checked.
Returns
-------
bool
True if the docstring is correctly formatted.
"""
# Split the text into lines.
query_lines = query_text.splitlines(keepends=True)

# Regex patterns for each line in the docstring and corresponding error messages.
patterns = [
(r"^# tool: scribe-data\n", "Error in line 1:"),
(
r"^# All (.+?) \(Q\d+\) .+ \(Q\d+\) and the given forms\.\n",
"Error in line 2:",
),
(
r"^# Enter this query at https://query\.wikidata\.org/\.\n",
"Error in line 3:",
),
]
return next(
(
(False, f"{error_line_number} {query_lines[i].strip()}")
for i, (pattern, error_line_number) in enumerate(patterns)
if not re.match(pattern, query_lines[i])
),
True,
)


# MARK: Main Query Forms Validation


Expand All @@ -508,6 +550,14 @@ def check_query_forms() -> None:
with open(query_file, "r", encoding="utf-8") as file:
query_text = file.read()

# Check the docstring format.
docstring_check_result = check_docstring(query_text)
if docstring_check_result is not True:
error_output += (
f"\n{index}. {query_file_str}:\n - {docstring_check_result}\n"
)
index += 1

# Check for unique return forms and handle the error message.
unique_check_result = check_unique_return_forms(query_text)
if unique_check_result is not True:
Expand Down
113 changes: 58 additions & 55 deletions src/scribe_data/cli/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from typing import List

import questionary
from prompt_toolkit import prompt
from prompt_toolkit.completion import WordCompleter
from questionary import Choice
from rich import print as rprint
from rich.console import Console
Expand Down Expand Up @@ -103,77 +105,78 @@ def configure_settings():
- Output directory
- Whether to overwrite
"""
rprint(
"[cyan]Follow the prompts below. Press tab for completions and enter to select.[/cyan]"
)
# MARK: Languages

language_completer = WordCompleter(["All"] + config.languages, ignore_case=True)
if not config.selected_languages:
language_selected = False
language_choices = ["All"] + config.languages
selected_languages = questionary.checkbox(
message="Select languages and press enter:",
choices=language_choices,
).ask()
selected_languages = prompt(
"Select languages (comma-separated or type 'All'): ",
completer=language_completer,
)

if "All" in selected_languages:
config.selected_languages = config.languages
language_selected = True

elif selected_languages:
config.selected_languages = selected_languages
language_selected = True

else:
rprint(
"[yellow]No language selected. Please select at least one option with space followed by enter.[/yellow]"
)
if questionary.confirm("Continue?", default=True).ask():
return configure_settings()
config.selected_languages = [
lang.strip()
for lang in selected_languages.split(",")
if lang.strip() in config.languages
]

else:
language_selected = True
if not config.selected_languages:
rprint("[yellow]No language selected. Please try again.[/yellow]")
return configure_settings()

if language_selected:
# MARK: Data Types
# MARK: Data Types

data_type_selected = False
data_type_choices = ["All"] + config.data_types
selected_data_types = questionary.checkbox(
"Select data types and press enter:",
choices=data_type_choices,
).ask()
data_type_completer = WordCompleter(["All"] + config.data_types, ignore_case=True)
selected_data_types = prompt(
"Select data types (comma-separated or type 'All'): ",
completer=data_type_completer,
)

if "All" in selected_data_types:
config.selected_data_types = config.data_types
data_type_selected = True
if "All" in selected_data_types.capitalize():
config.selected_data_types = config.data_types
else:
config.selected_data_types = [
dt.strip()
for dt in selected_data_types.split(",")
if dt.strip() in config.data_types
]

elif selected_data_types:
config.selected_data_types = selected_data_types
data_type_selected = True
if not config.selected_data_types:
rprint("[yellow]No data type selected. Please try again.[/yellow]")
return configure_settings()

else:
rprint(
"[yellow]No data type selected. Please select at least one option with space followed by enter.[/yellow]"
)
if questionary.confirm("Continue?", default=True).ask():
return configure_settings()
# MARK: Output Type

if data_type_selected:
# MARK: Output Type
output_type_completer = WordCompleter(["json", "csv", "tsv"], ignore_case=True)
config.output_type = prompt(
"Select output type (json/csv/tsv): ", completer=output_type_completer
)
while config.output_type not in ["json", "csv", "tsv"]:
rprint("[yellow]Invalid output type selected. Please try again.[/yellow]")
config.output_type = prompt(
"Select output type (json/csv/tsv): ", completer=output_type_completer
)

# MARK: Output Directory

config.output_type = questionary.select(
"Select output type:", choices=["json", "csv", "tsv"]
).ask()
if output_dir := prompt(f"Enter output directory (default: {config.output_dir}): "):
config.output_dir = Path(output_dir)

config.output_dir = Path(
questionary.text(
"Enter output directory:", default=str(config.output_dir)
).ask()
)
# MARK: Overwrite Confirmation

config.overwrite = questionary.confirm(
"Overwrite existing files?", default=config.overwrite
).ask()
overwrite_completer = WordCompleter(["Y", "n"], ignore_case=True)
overwrite = (
prompt("Overwrite existing files? (Y/n): ", completer=overwrite_completer)
or "y"
)
config.overwrite = overwrite.lower() == "y"

display_summary()
display_summary()


def run_request():
Expand Down Expand Up @@ -228,7 +231,7 @@ def start_interactive_mode():
Provides base options and forwarding to other interactive mode functionality.
"""
rprint(
f"[bold green]Welcome to {get_version_message()} interactive mode![/bold green]"
f"[bold cyan]Welcome to {get_version_message()} interactive mode![/bold cyan]"
)

while True:
Expand Down
26 changes: 25 additions & 1 deletion src/scribe_data/cli/total.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
-->
"""

from http.client import IncompleteRead
from urllib.error import HTTPError

from SPARQLWrapper import JSON

from scribe_data.utils import (
Expand Down Expand Up @@ -244,7 +247,28 @@ def get_total_lexemes(language, data_type, doPrint=True):

sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
try_count = 0
max_retries = 2
results = None

while try_count <= max_retries and results is None:
try:
results = sparql.query().convert()

except HTTPError as http_err:
print(f"HTTPError occurred: {http_err}")

except IncompleteRead as read_err:
print(f"Incomplete read error occurred: {read_err}")

try_count += 1

if results is None:
if try_count <= max_retries:
print("The query will be retried ...")

else:
print("Query failed after retries.")

# Check if the query returned any results.
if (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) adjectives (Q34698) and the given forms.
# All Bengali (Q9610) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) adverbs (Q380057) and the given forms.
# All Bengali (Q9610) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) nouns (Q1084) and the given forms.
# All Bengali (Q9610) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) postpositions (Q161873) and the given forms.
# All Bengali (Q9610) postpositions (Q161873) and the given forms.
# Enter this query at https://query.wikidata.org/.


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) prepositions (Q4833830) and the given forms.
# All Bengali (Q9610) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) proper nouns (Q147276) and the given forms.
# All Bengali (Q9610) proper nouns (Q147276) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) verbs (Q24905) and the given forms.
# All Bengali (Q9610) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# Dagbani (Q32238) adjectives (Q34698) and the given forms.
# All Dagbani (Q32238) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Dagbani (Q32238) prepositions and the given forms.
# All Dagbani (Q32238) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# Dagbani (Q32238) verbs and the given forms.
# All Dagbani (Q32238) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) adjectives (Q34698) and the given forms..
# All Hindi Hindustani (Q11051) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) adverbs (Q380057) and the given forms.
# All Hindi Hindustani (Q11051) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) nouns (Q1084) and the given forms.
# All Hindi Hindustani (Q11051) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) postpositions (Q161873) and the given forms.
# All Hindi Hindustani (Q11051) postpositions (Q161873) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) prepositions (Q4833830) and the given forms.
# All Hindi Hindustani (Q11051) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) proper nouns (Q147276) and the given forms.
# All Hindi Hindustani (Q11051) proper nouns (Q147276) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) verbs (Q24905) and the given forms.
# All Hindi Hindustani (Q11051) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) adjectives (Q34698) and the given forms..
# All Urdu Hindustani (Q11051) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) adverbs (Q380057) and the given forms.
# All Urdu Hindustani (Q11051) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) nouns (Q1084) and the given forms.
# All Urdu Hindustani (Q11051) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Loading

0 comments on commit 9592a26

Please sign in to comment.