diff --git a/src/scribe_data/check/check_query_forms.py b/src/scribe_data/check/check_query_forms.py index 5897c420..b4ab72d0 100644 --- a/src/scribe_data/check/check_query_forms.py +++ b/src/scribe_data/check/check_query_forms.py @@ -162,29 +162,29 @@ def extract_form_qids(form_text: str): return [q.split("wd:")[1].split(" .")[0] for q in match[0].split(", ")] -# MARK: Correct Panctuation +# MARK: Punctuation def check_query_formatting(form_text: str): """ - Checks the formatting of the given SPARQL query text for common issues. + Checks the formatting of the given SPARQL query text for common formatting issues. Parameters ---------- - query_text : str - The SPARQL query text to check. + query_text : str + The SPARQL query text to check. Returns ------- - str - A message indicating formatting issues, if any. + bool + Whether there are formatting errors with the query. """ - # Check for spaces before commas - if re.search(r"\s+[,]", form_text): + # Check for spaces before commas that should not exist. + if re.search(r"\s,", form_text): return False - # Check for spaces before periods and semicolons - if re.search(r"\S(?=[.;])", form_text): + # Check for non space characters before periods and semicolons that should not exist. + if re.search(r"\S[.;]", form_text): return False return True @@ -478,7 +478,7 @@ def check_query_forms() -> None: "ontolex:lexicalForm" in form_text and "ontolex:representation" in form_text ): - correct_form_spacing = check_query_formatting(form_text=form_text) + correct_formatting = check_query_formatting(form_text=form_text) form_rep_label = extract_form_rep_label(form_text=form_text) check = check_form_label(form_text=form_text) qids = extract_form_qids(form_text=form_text) @@ -486,30 +486,29 @@ def check_query_forms() -> None: query_form_check_dict[form_rep_label] = { "form_rep_match": check, - "correct_form_spacing": correct_form_spacing, + "correct_formatting": correct_formatting, "qids": qids, "correct_form_rep_label": correct_form_rep_label, } if query_form_check_dict: incorrect_query_labels = [] - for k in query_form_check_dict: - if k != query_form_check_dict[k]["correct_form_spacing"] is False: + for k, v in query_form_check_dict.items(): + if k != v["correct_formatting"] is False: incorrect_query_labels.append( ( k, "Invalid query formatting found - please put spaces before all periods and semicolons and also remove spaces before commas.", ) ) - else: - if k != query_form_check_dict[k]["correct_form_rep_label"]: - incorrect_query_labels.append( - (k, query_form_check_dict[k]["correct_form_rep_label"]) - ) - elif query_form_check_dict[k]["form_rep_match"] is False: - incorrect_query_labels.append( - (k, "Form and representation labels don't match") - ) + elif k != query_form_check_dict[k]["correct_form_rep_label"]: + incorrect_query_labels.append( + (k, query_form_check_dict[k]["correct_form_rep_label"]) + ) + elif query_form_check_dict[k]["form_rep_match"] is False: + incorrect_query_labels.append( + (k, "Form and representation labels don't match") + ) if incorrect_query_labels: current_rep_label_to_correct_label_str = [