Skip to content

Commit

Permalink
Merge pull request #485 from VNW22/Update-Check-Query-Forms
Browse files Browse the repository at this point in the history
Update check query forms
  • Loading branch information
andrewtavis authored Oct 25, 2024
2 parents 328d916 + 9d953c0 commit 86676b0
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 19 deletions.
42 changes: 39 additions & 3 deletions src/scribe_data/check/check_query_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,34 @@ def extract_form_qids(form_text: str):
return [q.split("wd:")[1].split(" .")[0] for q in match[0].split(", ")]


# MARK: Punctuation


def check_query_formatting(form_text: str):
"""
Checks the formatting of the given SPARQL query text for common formatting issues.
Parameters
----------
query_text : str
The SPARQL query text to check.
Returns
-------
bool
Whether there are formatting errors with the query.
"""
# Check for spaces before commas that should not exist.
if re.search(r"\s,", form_text):
return False

# Check for non space characters before periods and semicolons that should not exist.
if re.search(r"\S[.;]", form_text):
return False

return True


# MARK: Correct Label


Expand Down Expand Up @@ -450,25 +478,33 @@ def check_query_forms() -> None:
"ontolex:lexicalForm" in form_text
and "ontolex:representation" in form_text
):
correct_formatting = check_query_formatting(form_text=form_text)
form_rep_label = extract_form_rep_label(form_text=form_text)
check = check_form_label(form_text=form_text)
qids = extract_form_qids(form_text=form_text)
correct_form_rep_label = return_correct_form_label(qids=qids)

query_form_check_dict[form_rep_label] = {
"form_rep_match": check,
"correct_formatting": correct_formatting,
"qids": qids,
"correct_form_rep_label": correct_form_rep_label,
}

if query_form_check_dict:
incorrect_query_labels = []
for k in query_form_check_dict:
if k != query_form_check_dict[k]["correct_form_rep_label"]:
for k, v in query_form_check_dict.items():
if k != v["correct_formatting"] is False:
incorrect_query_labels.append(
(
k,
"Invalid query formatting found - please put spaces before all periods and semicolons and also remove spaces before commas.",
)
)
elif k != query_form_check_dict[k]["correct_form_rep_label"]:
incorrect_query_labels.append(
(k, query_form_check_dict[k]["correct_form_rep_label"])
)

elif query_form_check_dict[k]["form_rep_match"] is False:
incorrect_query_labels.append(
(k, "Form and representation labels don't match")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,24 @@ SELECT
?pastWordStem

WHERE {
?lexeme dct:language wd:Q9168;
wikibase:lexicalCategory wd:Q24905;
wikibase:lemma ?infinitive.
?lexeme dct:language wd:Q9168 ;
wikibase:lexicalCategory wd:Q24905 ;
wikibase:lemma ?infinitive .

#MARK: Past Participle

OPTIONAL {
?lexeme ontolex:lexicalForm ?presentParticipleForm .
?presentParticipleForm ontolex:representation ?presentParticiple ;
wikibase:grammaticalFeature wd:Q192613, wd:Q814722 .
FILTER(lang(?presentParticiple) = "fa").
FILTER(lang(?presentParticiple) = "fa") .
}

OPTIONAL {
?lexeme ontolex:lexicalForm ?pastParticipleForm .
?pastParticipleForm ontolex:representation ?pastParticiple ;
wikibase:grammaticalFeature wd:Q814722, wd:Q1994301 .
FILTER(lang(?pastParticiple) = "fa").
FILTER(lang(?pastParticiple) = "fa") .
}

#MARK: Word Stem
Expand All @@ -37,13 +37,13 @@ WHERE {
?lexeme ontolex:lexicalForm ?presentWordStemForm .
?presentWordStemForm ontolex:representation ?presentWordStem ;
wikibase:grammaticalFeature wd:Q192613, wd:Q210523 .
FILTER(lang(?presentWordStem) = "fa").
FILTER(lang(?presentWordStem) = "fa") .
}

OPTIONAL {
?lexeme ontolex:lexicalForm ?pastWordStemForm .
?pastWordStemForm ontolex:representation ?pastWordStem ;
wikibase:grammaticalFeature wd:Q1994301, wd:Q210523 .
FILTER(lang(?pastWordStem) = "fa").
FILTER(lang(?pastWordStem) = "fa") .
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,51 +13,51 @@ SELECT
?indicativeThirdPersonAoristPlural

WHERE {
?lexeme dct:language wd:Q9168;
wikibase:lexicalCategory wd:Q24905;
wikibase:lemma ?infinitive.
?lexeme dct:language wd:Q9168 ;
wikibase:lexicalCategory wd:Q24905 ;
wikibase:lemma ?infinitive .

#MARK: Indicative Aorist

OPTIONAL {
?lexeme ontolex:lexicalForm ?indicativeFirstPersonAoristSingularForm .
?indicativeFirstPersonAoristSingularForm ontolex:representation ?indicativeFirstPersonAoristSingular ;
wikibase:grammaticalFeature wd:Q21714344, wd:Q110786, wd:Q682111, wd:Q216497 .
FILTER(lang(?indicativeFirstPersonAoristSingular) = "fa").
FILTER(lang(?indicativeFirstPersonAoristSingular) = "fa") .
}

OPTIONAL {
?lexeme ontolex:lexicalForm ?indicativeSecondPersonAoristSingularForm .
?indicativeSecondPersonAoristSingularForm ontolex:representation ?indicativeSecondPersonAoristSingular ;
wikibase:grammaticalFeature wd:Q51929049, wd:Q110786, wd:Q682111, wd:Q216497 .
FILTER(lang(?indicativeSecondPersonAoristSingular) = "fa").
FILTER(lang(?indicativeSecondPersonAoristSingular) = "fa") .
}

OPTIONAL {
?lexeme ontolex:lexicalForm ?indicativeThirdPersonAoristSingularForm .
?indicativeThirdPersonAoristSingularForm ontolex:representation ?indicativeThirdPersonAoristSingular ;
wikibase:grammaticalFeature wd:Q51929074, wd:Q110786, wd:Q682111, wd:Q216497 .
FILTER(lang(?indicativeThirdPersonAoristSingular) = "fa").
FILTER(lang(?indicativeThirdPersonAoristSingular) = "fa") .
}

OPTIONAL {
?lexeme ontolex:lexicalForm ?indicativeFirstPersonAoristPluralForm .
?indicativeFirstPersonAoristPluralForm ontolex:representation ?indicativeFirstPersonAoristPlural ;
wikibase:grammaticalFeature wd:Q21714344, wd:Q146786, wd:Q682111, wd:Q216497 .
FILTER(lang(?indicativeFirstPersonAoristPlural) = "fa").
FILTER(lang(?indicativeFirstPersonAoristPlural) = "fa") .
}

OPTIONAL {
?lexeme ontolex:lexicalForm ?indicativeSecondPersonAoristPluralForm .
?indicativeSecondPersonAoristPluralForm ontolex:representation ?indicativeSecondPersonAoristPlural ;
wikibase:grammaticalFeature wd:Q51929049, wd:Q146786, wd:Q682111, wd:Q216497 .
FILTER(lang(?indicativeSecondPersonAoristPlural) = "fa").
FILTER(lang(?indicativeSecondPersonAoristPlural) = "fa") .
}

OPTIONAL {
?lexeme ontolex:lexicalForm ?indicativeThirdPersonAoristPluralForm .
?indicativeThirdPersonAoristPluralForm ontolex:representation ?indicativeThirdPersonAoristPlural ;
wikibase:grammaticalFeature wd:Q51929074, wd:Q146786, wd:Q682111, wd:Q216497 .
FILTER(lang(?indicativeThirdPersonAoristPlural) = "fa").
FILTER(lang(?indicativeThirdPersonAoristPlural) = "fa") .
}
}

0 comments on commit 86676b0

Please sign in to comment.