-
Notifications
You must be signed in to change notification settings - Fork 17
/
kli_new_words.py
executable file
·81 lines (76 loc) · 3.67 KB
/
kli_new_words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
# Script to extract new words from, e.g., https://www.kli.org/activities/qepmey/qepa-chamah-sochdich/new-words/.
import pandas as pd
df_list = pd.read_html('new_words.html')
df = df_list[1]
def print_entry(entry_name, part_of_speech, definition, notes):
print(" <table name=\"mem\">")
print(" <column name=\"_id\"></column>")
print(" <column name=\"entry_name\">{}</column>".format(entry_name))
print(" <column name=\"part_of_speech\">{}</column>".format(part_of_speech))
print(" <column name=\"definition\">{}</column>".format(definition))
print(" <column name=\"definition_de\">TRANSLATE</column>")
print(" <column name=\"definition_fa\">TRANSLATE</column>")
print(" <column name=\"definition_sv\">TRANSLATE</column>")
print(" <column name=\"definition_ru\">TRANSLATE</column>")
print(" <column name=\"definition_zh_HK\">TRANSLATE</column>")
print(" <column name=\"definition_pt\">TRANSLATE</column>")
print(" <column name=\"definition_fi\">TRANSLATE</column>")
print(" <column name=\"definition_fr\">TRANSLATE</column>")
print(" <column name=\"synonyms\"></column>")
print(" <column name=\"antonyms\"></column>")
print(" <column name=\"see_also\"></column>")
print(" <column name=\"notes\">{}</column>".format(notes))
print(" <column name=\"notes_de\"></column>")
print(" <column name=\"notes_fa\"></column>")
print(" <column name=\"notes_sv\"></column>")
print(" <column name=\"notes_ru\"></column>")
print(" <column name=\"notes_zh_HK\"></column>")
print(" <column name=\"notes_pt\"></column>")
print(" <column name=\"notes_fi\"></column>")
print(" <column name=\"notes_fr\"></column>")
print(" <column name=\"hidden_notes\"></column>")
print(" <column name=\"components\"></column>")
print(" <column name=\"examples\"></column>")
print(" <column name=\"examples_de\"></column>")
print(" <column name=\"examples_fa\"></column>")
print(" <column name=\"examples_sv\"></column>")
print(" <column name=\"examples_ru\"></column>")
print(" <column name=\"examples_zh_HK\"></column>")
print(" <column name=\"examples_pt\"></column>")
print(" <column name=\"examples_fi\"></column>")
print(" <column name=\"examples_fr\"></column>")
print(" <column name=\"search_tags\"></column>")
print(" <column name=\"search_tags_de\"></column>")
print(" <column name=\"search_tags_fa\"></column>")
print(" <column name=\"search_tags_sv\"></column>")
print(" <column name=\"search_tags_ru\"></column>")
print(" <column name=\"search_tags_zh_HK\"></column>")
print(" <column name=\"search_tags_pt\"></column>")
print(" <column name=\"search_tags_fi\"></column>")
print(" <column name=\"search_tags_fr\"></column>")
print(" <column name=\"source\">[1] {qep'a' 30 (2023):src}</column>")
print(" </table>")
print("")
for index, row in df.iterrows():
entry_name = str(row[0])
part_of_speech = str(row[1])
if part_of_speech == "Number":
part_of_speech = "n:num"
elif part_of_speech == "Noun":
part_of_speech = "n"
elif part_of_speech == "Verb":
part_of_speech = "v"
elif part_of_speech == "Body Part":
part_of_speech = "n:body"
elif part_of_speech == "Language User":
part_of_speech = "n:being"
elif part_of_speech == "Adverb":
part_of_speech = "adv"
definition = str(row[2])
if definition.startswith("be ") and part_of_speech == "v":
part_of_speech = "v:is"
notes = str(row[3])
if notes == "nan":
notes = ""
print_entry(entry_name, part_of_speech, definition, notes)