forked from VOICEVOX/voicevox_core
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run-asyncio.py
120 lines (101 loc) · 3.44 KB
/
run-asyncio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""asyncio版のサンプルコードです。"""
import asyncio
import dataclasses
import json
import logging
from argparse import ArgumentParser
from pathlib import Path
from typing import Tuple
from voicevox_core import AccelerationMode, AudioQuery
from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile
async def main() -> None:
logging.basicConfig(format="[%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel("DEBUG")
logging.getLogger("voicevox_core_python_api").setLevel("DEBUG")
logging.getLogger("voicevox_core").setLevel("DEBUG")
(
acceleration_mode,
vvm_path,
onnxruntime_filename,
open_jtalk_dict_dir,
text,
out,
style_id,
) = parse_args()
logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})")
onnxruntime = await Onnxruntime.load_once(filename=onnxruntime_filename)
logger.debug("%s", f"{onnxruntime.supported_devices()=}")
logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})")
synthesizer = Synthesizer(
onnxruntime,
await OpenJtalk.new(open_jtalk_dict_dir),
acceleration_mode=acceleration_mode,
)
logger.debug("%s", f"{synthesizer.metas=}")
logger.debug("%s", f"{synthesizer.is_gpu_mode=}")
logger.info("%s", f"Loading `{vvm_path}`")
async with await VoiceModelFile.open(vvm_path) as model:
await synthesizer.load_voice_model(model)
logger.info("%s", f"Creating an AudioQuery from {text!r}")
audio_query = await synthesizer.audio_query(text, style_id)
logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}")
wav = await synthesizer.synthesis(audio_query, style_id)
out.write_bytes(wav)
logger.info("%s", f"Wrote `{out}`")
def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]:
argparser = ArgumentParser()
argparser.add_argument(
"--mode",
default="AUTO",
type=AccelerationMode,
help='モード ("AUTO", "CPU", "GPU")',
)
argparser.add_argument(
"vvm",
type=Path,
help="vvmファイルへのパス",
)
argparser.add_argument(
"--onnxruntime",
default=Onnxruntime.LIB_VERSIONED_FILENAME,
help="ONNX Runtimeのライブラリのfilename",
)
argparser.add_argument(
"--dict-dir",
default="./open_jtalk_dic_utf_8-1.11",
type=Path,
help="Open JTalkの辞書ディレクトリ",
)
argparser.add_argument(
"--text",
default="この音声は、ボイスボックスを使用して、出力されています。",
help="読み上げさせたい文章",
)
argparser.add_argument(
"--out",
default="./output.wav",
type=Path,
help="出力wavファイルのパス",
)
argparser.add_argument(
"--style-id",
default=0,
type=int,
help="話者IDを指定",
)
args = argparser.parse_args()
# FIXME: 流石に多くなってきたので、`dataclass`化する
return (
args.mode,
args.vvm,
args.onnxruntime,
args.dict_dir,
args.text,
args.out,
args.style_id,
)
def display_as_json(audio_query: AudioQuery) -> str:
return json.dumps(dataclasses.asdict(audio_query), ensure_ascii=False)
if __name__ == "__main__":
asyncio.run(main())