Skip to content

Commit

Permalink
Merge pull request #489 from OmarAI2003/check-docstring-format
Browse files Browse the repository at this point in the history
Implement check_docstring function for SPARQL docstring validation
  • Loading branch information
andrewtavis authored Oct 26, 2024
2 parents bfde990 + 60755bd commit 40c41ab
Show file tree
Hide file tree
Showing 58 changed files with 122 additions and 59 deletions.
50 changes: 50 additions & 0 deletions src/scribe_data/check/check_query_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,48 @@ def check_forms_order(query_text: str) -> bool:
return select_vars == where_vars


# MARK: Docstring Format


def check_docstring(query_text: str) -> bool:
"""
Checks the docstring of a SPARQL query text to ensure it follows the standard format.
Parameters
----------
query_text : str
The SPARQL query's text to be checked.
Returns
-------
bool
True if the docstring is correctly formatted.
"""
# Split the text into lines.
query_lines = query_text.splitlines(keepends=True)

# Regex patterns for each line in the docstring and corresponding error messages.
patterns = [
(r"^# tool: scribe-data\n", "Error in line 1:"),
(
r"^# All (.+?) \(Q\d+\) .+ \(Q\d+\) and the given forms\.\n",
"Error in line 2:",
),
(
r"^# Enter this query at https://query\.wikidata\.org/\.\n",
"Error in line 3:",
),
]
return next(
(
(False, f"{error_line_number} {query_lines[i].strip()}")
for i, (pattern, error_line_number) in enumerate(patterns)
if not re.match(pattern, query_lines[i])
),
True,
)


# MARK: Main Query Forms Validation
def check_query_forms() -> None:
"""
Expand All @@ -506,6 +548,14 @@ def check_query_forms() -> None:
with open(query_file, "r", encoding="utf-8") as file:
query_text = file.read()

# Check the docstring format.
docstring_check_result = check_docstring(query_text)
if docstring_check_result is not True:
error_output += (
f"\n{index}. {query_file_str}:\n - {docstring_check_result}\n"
)
index += 1

# Check for unique return forms and handle the error message.
unique_check_result = check_unique_return_forms(query_text)
if unique_check_result is not True:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) adjectives (Q34698) and the given forms.
# All Bengali (Q9610) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) adverbs (Q380057) and the given forms.
# All Bengali (Q9610) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) nouns (Q1084) and the given forms.
# All Bengali (Q9610) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) postpositions (Q161873) and the given forms.
# All Bengali (Q9610) postpositions (Q161873) and the given forms.
# Enter this query at https://query.wikidata.org/.


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) prepositions (Q4833830) and the given forms.
# All Bengali (Q9610) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) proper nouns (Q147276) and the given forms.
# All Bengali (Q9610) proper nouns (Q147276) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bengali (Bangla Q9610) verbs (Q24905) and the given forms.
# All Bengali (Q9610) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# Dagbani (Q32238) adjectives (Q34698) and the given forms.
# All Dagbani (Q32238) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Dagbani (Q32238) prepositions and the given forms.
# All Dagbani (Q32238) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# Dagbani (Q32238) verbs and the given forms.
# All Dagbani (Q32238) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) adjectives (Q34698) and the given forms..
# All Hindi Hindustani (Q11051) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) adverbs (Q380057) and the given forms.
# All Hindi Hindustani (Q11051) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) nouns (Q1084) and the given forms.
# All Hindi Hindustani (Q11051) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) postpositions (Q161873) and the given forms.
# All Hindi Hindustani (Q11051) postpositions (Q161873) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) prepositions (Q4833830) and the given forms.
# All Hindi Hindustani (Q11051) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) proper nouns (Q147276) and the given forms.
# All Hindi Hindustani (Q11051) proper nouns (Q147276) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Hindi (from Hindustani Q11051) verbs (Q24905) and the given forms.
# All Hindi Hindustani (Q11051) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "hi" to remove Urdu (ur) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) adjectives (Q34698) and the given forms..
# All Urdu Hindustani (Q11051) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) adverbs (Q380057) and the given forms.
# All Urdu Hindustani (Q11051) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) nouns (Q1084) and the given forms.
# All Urdu Hindustani (Q11051) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) postpositions (Q161873) and the given forms.
# All Urdu Hindustani (Q11051) postpositions (Q161873) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) prepositions (Q4833830) and the given forms.
# All Urdu Hindustani (Q11051) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) proper nouns (Q147276) and the given forms.
# All Urdu Hindustani (Q11051) proper nouns (Q147276) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindi (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Urdu (from Hindustani Q11051) verbs and the currently implemented conjugations for each.
# All Urdu Hindustani (Q11051) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "ur" to remove Hindustani (hi) words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# tool: scribe-data
# tool: scribe-data
# All Indonesian (Q9240) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@

# tool: scribe-data
# All Italian (Q652) verbs and the currently implemented tenses for each.
# All Italian (Q652) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Malayalam (Q36236) nouns (Q1084) and the given forms and the given forms.
# All Malayalam (Q36236) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Malayalam (Q36236) proper nouns (Q147276) and the given forms and the given forms.
# All Malayalam (Q36236) proper nouns (Q147276) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# tool: scribe-data
# All Bokmål Norwegian (Q25167) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: This query is for Bokmål (Q25167) rather than Nynorsk (Q25164).

SELECT
(REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID)
?adverb

WHERE {
?lexeme dct:language wd:Q25167 ;
wikibase:lexicalCategory wd:Q380057 ;
wikibase:lemma ?adverb .
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bokmål (Q25167) verbs and basic forms.
# All Bokmål (Q25167) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Bokmål (Q25167) verbs and additional forms.
# All Bokmål (Q25167) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Nynorsk Norwegian (Q25164) adverbs.
# All Nynorsk Norwegian (Q25164) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: This query is for Nynorsk (Q25164) rather than Bokmål (Q25167).
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All persian (Q9168) prepositions and the given forms.
# All persian (Q9168) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Persian (Q9168) verbs (Q24905) and their indicative aorist forms.
# All Persian (Q9168) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Persian (Q9168) verbs (Q24905) and the given forms, including past tense.
# All Persian (Q9168) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Persian (Q9168) verbs and the given present perfect tense forms.
# All Persian (Q9168) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Persian (Q9168) verbs (Q24905) and the given forms, including present subjunctive.
# All Persian (Q9168) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Gurmukhi (from Punjabi Q58635) adjectives (Q34698) and the given forms.
# All Punjabi Gurmukhi (Q58635) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Gurmukhi (from Punjabi Q58635) adverbs (Q380057) and the given forms.
# All Punjabi Shahmukhi (Q58635) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Gurmukhi (from Punjabi Q58635) nouns (Q1084) and the given forms.
# All Punjabi Gurmukhi (Q58635) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "pa" to select Gurmukhi words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Gurmukhi (from Punjabi Q58635) prepositions (Q4833830) and the given forms.
# All Punjabi Gurmukhi (Q58635) prepositions (Q4833830) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Gurmukhi (from Punjabi Q58635) proper nouns (Q147276) and the given forms.
# All Punjabi Gurmukhi (Q58635) proper nouns (Q147276) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "pa" to select Gurmukhi words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Gurmukhi (from Punjabi Q58635) verbs (Q24905) and the given forms.
# All Punjabi Gurmukhi (Q58635) verbs (Q24905) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "pa" to select Gurmukhi words.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Shahmukhi (from Punjabi Q58635) adjectives (Q34698) and the given forms.
# All Punjabi Shahmukhi (Q58635) adjectives (Q34698) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Shahmukhi (from Punjabi Q58635) adverbs (Q380057) and the given forms.
# All Punjabi Shahmukhi (Q58635) adverbs (Q380057) and the given forms.
# Enter this query at https://query.wikidata.org/.

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# tool: scribe-data
# All Shahmukhi (from Punjabi Q58635) nouns (Q1084) and the given forms.
# All Punjabi Shahmukhi (Q58635) nouns (Q1084) and the given forms.
# Enter this query at https://query.wikidata.org/.

# Note: We need to filter for "pnb" to select Shahmukhi words.
Expand Down
Loading

0 comments on commit 40c41ab

Please sign in to comment.