From 39d1bace92f22756c17039c88534df48f9e47ae5 Mon Sep 17 00:00:00 2001 From: Omar Agiez Date: Sat, 26 Oct 2024 22:18:30 +0300 Subject: [PATCH] Implement check_docstring function for SPARQL docstring validation - Created check_docstring to verify docstring format using regex. - Integrated it into the main validation process to report errors. --- src/scribe_data/check/check_query_forms.py | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/scribe_data/check/check_query_forms.py b/src/scribe_data/check/check_query_forms.py index 9495563f..b7c2766c 100644 --- a/src/scribe_data/check/check_query_forms.py +++ b/src/scribe_data/check/check_query_forms.py @@ -493,6 +493,45 @@ def check_forms_order(query_text: str) -> bool: return select_vars == where_vars +# MARK: docstring Format + + +def check_docstring(query_text: str) -> bool: + """ + Checks the docstring of a SPARQL query text to ensure it follows the standard format. + + Parameters + ---------- + query_text : str + The SPARQL query's text to be checked. + + Returns + ------- + bool + True if the docstring is correctly formatted; otherwise, . + """ + # Split the text into lines. + lines = query_text.splitlines(keepends=True) + + # Regex patterns for each line in the docstring and corresponding error messages. + patterns = [ + (r"^# tool: scribe-data\n", "Error in line 1:"), + ( + r"^# All (.+?) \(Q\d+\) .+ \(Q\d+\) and the given forms\.\n", + "Error in line 2:", + ), + ( + r"^# Enter this query at https://query\.wikidata\.org/\.\n", + "Error in line 3:", + ), + ] + # Check each line against its corresponding pattern. + for i, (pattern, error_line_number) in enumerate(patterns): + if not re.match(pattern, lines[i]): + return (False, f"{error_line_number} {lines[i].strip()}") + return True + + # MARK: Main Query Forms Validation def check_query_forms() -> None: """ @@ -506,6 +545,14 @@ def check_query_forms() -> None: with open(query_file, "r", encoding="utf-8") as file: query_text = file.read() + # Check the docstring format. + docstring_check_result = check_docstring(query_text) + if docstring_check_result is not True: + error_output += ( + f"\n{index}. {query_file_str}:\n - {docstring_check_result}\n" + ) + index += 1 + # Check for unique return forms and handle the error message. unique_check_result = check_unique_return_forms(query_text) if unique_check_result is not True: