-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathextract_quests.py
48 lines (41 loc) · 1.46 KB
/
extract_quests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import argparse
import glob
import json
import os
import re
def get_quest_dialogue(repo, lang):
with open(
os.path.join(repo, f"TextMap/TextMap{lang}.json"),
"r",
encoding="utf-8",
) as f:
map_hash_to_txt = json.load(f)
samples = []
for file in glob.iglob(os.path.join(repo, "BinOutput/CodexQuest/*.json")):
with open(file, "r", encoding="utf-8") as f:
lines = ""
for line in f:
if m := re.search(r"\"textId\": (?P<hash>\d+)", line):
replace = map_hash_to_txt.get(m["hash"], "")
if replace:
replace = json.dumps(replace, ensure_ascii=False)
line = line.replace(m["hash"], f"{replace}" if replace else "null")
lines += line
sample = json.loads(lines)
if sample:
samples.append(sample)
with open(f"extracted_quest/quest_{lang}.jsonl", "w", encoding="utf-8") as f:
for sample in samples:
print(json.dumps(sample, ensure_ascii=False), file=f)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--repo",
default="../AnimeGameData",
type=str,
required=True,
help="data dir",
)
parser.add_argument("--lang", default="EN", type=str, help="language type")
args = parser.parse_args()
get_quest_dialogue(args.repo, args.lang)