Skip to content

Commit

Permalink
Added check for docstring validation in the queries
Browse files Browse the repository at this point in the history
  • Loading branch information
KesharwaniArpita authored Oct 27, 2024
1 parent a487a18 commit b9bf39a
Showing 1 changed file with 42 additions and 0 deletions.
42 changes: 42 additions & 0 deletions src/scribe_data/check/check_query_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,8 @@ def check_forms_order(query_text: str) -> bool:


# MARK: Main Query Forms Validation


def check_query_forms() -> None:
"""
Validates SPARQL queries in the language data directory to check for correct form QIDs.
Expand Down Expand Up @@ -595,5 +597,45 @@ def check_query_forms() -> None:
print("All query forms are labeled and formatted correctly.")


# Mark: Docstring Validation


def check_sparql_docstring_format(file_path: Path) -> bool:
"""
Checks if the SPARQL query docstring at the beginning of the file is correct and follows the specified format.
Parameters
----------
file_path : Path
The path to the SPARQL query file to check.
Returns
-------
bool
True if the docstring format is correct, False otherwise.
"""

docstring_patterns = [
r"# tool: scribe-data",
r"# All [A-Za-z]+ \(Q\d+\) [A-Za-z\s]+\ \(Q\d+\) and the given forms\.",
r"# Enter this query at https://query.wikidata.org/\."
]

try:
with open(file_path, "r", encoding="utf-8") as file:
# Read the first few lines for the docstring
lines = [file.readline().strip() for _ in range(3)]

for line, pattern in zip(lines, docstring_patterns):
if not re.match(pattern, line):
print(f"Docstring format error in {file_path}: '{line}' does not match '{pattern}'")
return False
return True

except Exception as e:
print(f"Error reading {file_path}: {e}")
return False


if __name__ == "__main__":
check_query_forms()

0 comments on commit b9bf39a

Please sign in to comment.