Skip to content

Commit

Permalink
Merge branch 'ru2' into ru
Browse files Browse the repository at this point in the history
  • Loading branch information
De7vID committed Oct 11, 2024
2 parents ba3a3a8 + a06115b commit 97e04a1
Show file tree
Hide file tree
Showing 33 changed files with 5,808 additions and 1,075 deletions.
2 changes: 1 addition & 1 deletion EXTRA
Original file line number Diff line number Diff line change
@@ -1 +1 @@
15350
15444
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2024.07.14a
2024.08.20a
28 changes: 23 additions & 5 deletions book/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

derived_index = DefaultDict[str, List[BoqwizEntry]](list)


def make_derived_index():
global derived_index
for entry in dictionary.entries.values():
Expand All @@ -45,17 +46,22 @@ def make_derived_index():
"see": lambda entry, arg: re.search(fix_xifan(arg), entry.see_also or ""),
}


def add_operators(language: str):
QUERY_OPERATORS[language] = lambda entry, arg: (re.search(arg, entry.definition[language]) or arg in entry.search_tags.get(language, []))
QUERY_OPERATORS[language] = lambda entry, arg: (re.search(arg, entry.definition[language]) or
arg in entry.search_tags.get(language, []))
QUERY_OPERATORS[language+"notes"] = lambda entry, arg: re.search(arg, entry.notes.get(language, ""))
QUERY_OPERATORS[language+"ex"] = lambda entry, arg: re.search(arg, entry.examples.get(language, ""))


def init_operators():
for language in dictionary.locales:
add_operators(language)


init_operators()


def get_wiki_name(name: str) -> str:
name = name.replace(" ", "")
ans = ""
Expand All @@ -71,6 +77,7 @@ def get_wiki_name(name: str) -> str:

return ans.capitalize()


class DictionaryQuery:
def __init__(self, query: str, language: str, link_format: Literal["html", "latex"] = "html"):
self.query = query
Expand Down Expand Up @@ -157,7 +164,7 @@ def dsl_query(self, query: str, included: Set[str]):
try:
f = query_function(entry)

except:
except Exception:
logger.exception("Error during executing query", exc_info=sys.exc_info())
f = False

Expand Down Expand Up @@ -199,7 +206,8 @@ def parse_term(self, parts: List[str]):
part = parts.pop(0)
if part == "(":
r = self.parse_or(parts)
if parts: parts.pop(0) # )
if parts:
parts.pop(0) # )
return r

if part in {"NOT", "EI"}:
Expand Down Expand Up @@ -240,7 +248,8 @@ def render_entry(self, entry: BoqwizEntry, include_derivs: bool = True) -> dict:
"syllables": yajwiz.split_to_syllables(entry.name),
"morphemes": list(map(list, yajwiz.split_to_morphemes(entry.name))),
"pos": self.locale_strings["unknown"],
"simple_pos": "affix" if entry.name.startswith("-") or entry.name.endswith("-") or entry.name == "0" else entry.simple_pos,
"simple_pos": "affix" if entry.name.startswith("-") or entry.name.endswith("-") or entry.name == "0"
else entry.simple_pos,
"boqwi_tags": list(entry.tags),
"tags": [],
"rendered_link": self.link_renderer.render_link(entry.name, entry.simple_pos, entry.tags),
Expand Down Expand Up @@ -379,6 +388,7 @@ def fix_links(self, text: str) -> str:
ans += text
return ans.replace("\n", "<br>")


class LinkRenderer:
def __init__(self, query: DictionaryQuery):
self.query = query
Expand Down Expand Up @@ -429,6 +439,7 @@ def render_link(self, link_text: str, link_type: str, tags: Collection[str]):

return f"<a href=\"?q=tlh:&quot;^{link_text.replace(' ', '+')}$&quot;{pos}{hom_pos}\" class=\"pos-{style}\"{defn}>{hyp}<span okrand>{link_text}</span>{hom}</a>"


class LinkRendererLatex(LinkRenderer):
def fix_link(self, link: str) -> str:
link_text, link_type, tags, parts1, parts2 = parse_link(link)
Expand All @@ -444,7 +455,7 @@ def fix_link(self, link: str) -> str:
return "\\klingonref[src]{" + link_text + "}"

elif link_type == "url":
addr = parts2[2]
# addr = parts2[2]
return "\\klingonref[url]{" + link_text + "}"

elif len(parts1) == 2:
Expand Down Expand Up @@ -474,16 +485,20 @@ def _render_link(self, link_text: str, link_type: str, tags: Collection[str]):

return "\\klingonref[%s]{%s\\klingontext{%s}%s}" % (style, hyp, link_text, hom)


def dictionary_query(query: str, lang: str, link_format: Literal["html", "latex"]):
return DictionaryQuery(query=query, language=lang, link_format=link_format).execute_query()


def any_word_starts_with(word: str, words: List[str]):
return any([part.lower().startswith(word.lower()) for part in words])


def get_id(link_text: str, link_type: str, tags: Collection[str]) -> str:
homonyms = [tag.strip("h") for tag in tags if re.fullmatch(r"\d+h?", tag)]
return link_text + ":" + ":".join([link_type] + homonyms)


def get_links(text: str) -> List[str]:
ids = []
while "{" in text:
Expand All @@ -496,6 +511,7 @@ def get_links(text: str) -> List[str]:

return ids


def parse_link(link: str):
parts1 = link.split("@@")
parts2 = parts1[0].split(":")
Expand All @@ -504,6 +520,7 @@ def parse_link(link: str):
tags = parts2[2].split(",") if len(parts2) > 2 else []
return link_text, link_type, tags, parts1, parts2


def fix_xifan(query: str) -> str:
query = re.sub(r"i", "I", query)
query = re.sub(r"d", "D", query)
Expand All @@ -515,4 +532,5 @@ def fix_xifan(query: str) -> str:
query = re.sub(r"(?<!n)g(?!h)", "gh", query)
return query


make_derived_index()
1 change: 1 addition & 0 deletions expected_two_letter_verbs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
<column name="entry_name">Qu</column>
<column name="entry_name">Sa</column>
<column name="entry_name">tu</column>
<column name="entry_name">ye</column>
<column name="entry_name">yo</column>
9 changes: 9 additions & 0 deletions generate_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,15 @@ then
echo
fi

# Print any field beginning in a space, or ending in a space or comma.
MISPLACED_SPACE_OR_COMMA=$(grep "> \|>.* \|[^ >]\+ .*<\|>.*[ ,]<" $TMP_DIR/mem.xml)
if [[ ! -z "$MISPLACED_SPACE_OR_COMMA" ]]
then
echo "Misplaced space or comma:"
echo "$MISPLACED_SPACE_OR_COMMA"
echo
fi

# Print any junk that accidentally added to the XML file at the beginning of a line.
BOL_JUNK=$(grep "^\s*[^ ]\+\s*<\(table\|column\)" $TMP_DIR/mem.xml)
if [[ ! -z "$BOL_JUNK" ]]
Expand Down
Loading

0 comments on commit 97e04a1

Please sign in to comment.