From c61d5db7f6132bc241d760993ce0f63a450eeb40 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Sun, 1 Dec 2024 21:06:49 +0900 Subject: [PATCH] =?UTF-8?q?refactor:=20Python=20API=E3=81=AEexample?= =?UTF-8?q?=E3=81=AECLI=E5=BC=95=E6=95=B0=E3=82=92dataclass=E5=8C=96=20(#8?= =?UTF-8?q?81)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 次のFIXMEを解消する。 > ```py > # FIXME: 流石に多くなってきたので、`dataclass`化する > ``` --- example/python/run-asyncio.py | 153 +++++++++++++++--------------- example/python/run.py | 171 +++++++++++++++++----------------- 2 files changed, 161 insertions(+), 163 deletions(-) diff --git a/example/python/run-asyncio.py b/example/python/run-asyncio.py index 176ac290f..a5321c94c 100644 --- a/example/python/run-asyncio.py +++ b/example/python/run-asyncio.py @@ -6,12 +6,75 @@ import logging from argparse import ArgumentParser from pathlib import Path -from typing import Tuple from voicevox_core import AccelerationMode, AudioQuery from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile +@dataclasses.dataclass +class Args: + mode: AccelerationMode + vvm: Path + onnxruntime: str + dict_dir: Path + text: str + out: Path + style_id: int + + @staticmethod + def parse_args() -> "Args": + argparser = ArgumentParser() + argparser.add_argument( + "--mode", + default="AUTO", + type=AccelerationMode, + help='モード ("AUTO", "CPU", "GPU")', + ) + argparser.add_argument( + "vvm", + type=Path, + help="vvmファイルへのパス", + ) + argparser.add_argument( + "--onnxruntime", + default=Onnxruntime.LIB_VERSIONED_FILENAME, + help="ONNX Runtimeのライブラリのfilename", + ) + argparser.add_argument( + "--dict-dir", + default="./open_jtalk_dic_utf_8-1.11", + type=Path, + help="Open JTalkの辞書ディレクトリ", + ) + argparser.add_argument( + "--text", + default="この音声は、ボイスボックスを使用して、出力されています。", + help="読み上げさせたい文章", + ) + argparser.add_argument( + "--out", + default="./output.wav", + type=Path, + help="出力wavファイルのパス", + ) + argparser.add_argument( + "--style-id", + default=0, + type=int, + help="話者IDを指定", + ) + args = argparser.parse_args() + return Args( + args.mode, + args.vvm, + args.onnxruntime, + args.dict_dir, + args.text, + args.out, + args.style_id, + ) + + async def main() -> None: logging.basicConfig(format="[%(levelname)s] %(name)s: %(message)s") logger = logging.getLogger(__name__) @@ -19,97 +82,33 @@ async def main() -> None: logging.getLogger("voicevox_core_python_api").setLevel("DEBUG") logging.getLogger("voicevox_core").setLevel("DEBUG") - ( - acceleration_mode, - vvm_path, - onnxruntime_filename, - open_jtalk_dict_dir, - text, - out, - style_id, - ) = parse_args() + args = Args.parse_args() - logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})") - onnxruntime = await Onnxruntime.load_once(filename=onnxruntime_filename) + logger.info("%s", f"Loading ONNX Runtime ({args.onnxruntime=})") + onnxruntime = await Onnxruntime.load_once(filename=args.onnxruntime) logger.debug("%s", f"{onnxruntime.supported_devices()=}") - logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})") + logger.info("%s", f"Initializing ({args.mode=}, {args.dict_dir=})") synthesizer = Synthesizer( - onnxruntime, - await OpenJtalk.new(open_jtalk_dict_dir), - acceleration_mode=acceleration_mode, + onnxruntime, await OpenJtalk.new(args.dict_dir), acceleration_mode=args.mode ) logger.debug("%s", f"{synthesizer.metas=}") logger.debug("%s", f"{synthesizer.is_gpu_mode=}") - logger.info("%s", f"Loading `{vvm_path}`") - async with await VoiceModelFile.open(vvm_path) as model: + logger.info("%s", f"Loading `{args.vvm}`") + async with await VoiceModelFile.open(args.vvm) as model: await synthesizer.load_voice_model(model) - logger.info("%s", f"Creating an AudioQuery from {text!r}") - audio_query = await synthesizer.audio_query(text, style_id) + logger.info("%s", f"Creating an AudioQuery from {args.text!r}") + audio_query = await synthesizer.audio_query(args.text, args.style_id) logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}") - wav = await synthesizer.synthesis(audio_query, style_id) - - out.write_bytes(wav) - logger.info("%s", f"Wrote `{out}`") + wav = await synthesizer.synthesis(audio_query, args.style_id) - -def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]: - argparser = ArgumentParser() - argparser.add_argument( - "--mode", - default="AUTO", - type=AccelerationMode, - help='モード ("AUTO", "CPU", "GPU")', - ) - argparser.add_argument( - "vvm", - type=Path, - help="vvmファイルへのパス", - ) - argparser.add_argument( - "--onnxruntime", - default=Onnxruntime.LIB_VERSIONED_FILENAME, - help="ONNX Runtimeのライブラリのfilename", - ) - argparser.add_argument( - "--dict-dir", - default="./open_jtalk_dic_utf_8-1.11", - type=Path, - help="Open JTalkの辞書ディレクトリ", - ) - argparser.add_argument( - "--text", - default="この音声は、ボイスボックスを使用して、出力されています。", - help="読み上げさせたい文章", - ) - argparser.add_argument( - "--out", - default="./output.wav", - type=Path, - help="出力wavファイルのパス", - ) - argparser.add_argument( - "--style-id", - default=0, - type=int, - help="話者IDを指定", - ) - args = argparser.parse_args() - # FIXME: 流石に多くなってきたので、`dataclass`化する - return ( - args.mode, - args.vvm, - args.onnxruntime, - args.dict_dir, - args.text, - args.out, - args.style_id, - ) + args.out.write_bytes(wav) + logger.info("%s", f"Wrote `{args.out}`") def display_as_json(audio_query: AudioQuery) -> str: diff --git a/example/python/run.py b/example/python/run.py index caa2f36db..e7e353dc5 100644 --- a/example/python/run.py +++ b/example/python/run.py @@ -3,12 +3,82 @@ import logging from argparse import ArgumentParser from pathlib import Path -from typing import Tuple from voicevox_core import AccelerationMode, AudioQuery, wav_from_s16le from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile +@dataclasses.dataclass +class Args: + mode: AccelerationMode + vvm: Path + onnxruntime: str + dict_dir: Path + text: str + out: Path + style_id: int + streaming: bool + + @staticmethod + def parse_args() -> "Args": + argparser = ArgumentParser() + argparser.add_argument( + "--mode", + default="AUTO", + type=AccelerationMode, + help='モード ("AUTO", "CPU", "GPU")', + ) + argparser.add_argument( + "vvm", + type=Path, + help="vvmファイルへのパス", + ) + argparser.add_argument( + "--onnxruntime", + default=Onnxruntime.LIB_VERSIONED_FILENAME, + help="ONNX Runtimeのライブラリのfilename", + ) + argparser.add_argument( + "--dict-dir", + default="./open_jtalk_dic_utf_8-1.11", + type=Path, + help="Open JTalkの辞書ディレクトリ", + ) + argparser.add_argument( + "--text", + default="この音声は、ボイスボックスを使用して、出力されています。", + help="読み上げさせたい文章", + ) + argparser.add_argument( + "--out", + default="./output.wav", + type=Path, + help="出力wavファイルのパス", + ) + argparser.add_argument( + "--style-id", + default=0, + type=int, + help="話者IDを指定", + ) + argparser.add_argument( + "--streaming", + action="store_true", + help="ストリーミング生成", + ) + args = argparser.parse_args() + return Args( + args.mode, + args.vvm, + args.onnxruntime, + args.dict_dir, + args.text, + args.out, + args.style_id, + args.streaming, + ) + + def main() -> None: logging.basicConfig(format="[%(levelname)s] %(name)s: %(message)s") logger = logging.getLogger(__name__) @@ -16,44 +86,33 @@ def main() -> None: logging.getLogger("voicevox_core_python_api").setLevel("DEBUG") logging.getLogger("voicevox_core").setLevel("DEBUG") - ( - acceleration_mode, - vvm_path, - onnxruntime_filename, - open_jtalk_dict_dir, - text, - out, - style_id, - streaming, - ) = parse_args() + args = Args.parse_args() - logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})") - onnxruntime = Onnxruntime.load_once(filename=onnxruntime_filename) + logger.info("%s", f"Loading ONNX Runtime ({args.onnxruntime=})") + onnxruntime = Onnxruntime.load_once(filename=args.onnxruntime) logger.debug("%s", f"{onnxruntime.supported_devices()=}") - logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})") + logger.info("%s", f"Initializing ({args.mode=}, {args.dict_dir=})") synthesizer = Synthesizer( - onnxruntime, - OpenJtalk(open_jtalk_dict_dir), - acceleration_mode=acceleration_mode, + onnxruntime, OpenJtalk(args.dict_dir), acceleration_mode=args.mode ) logger.debug("%s", f"{synthesizer.metas=}") logger.debug("%s", f"{synthesizer.is_gpu_mode=}") - logger.info("%s", f"Loading `{vvm_path}`") - with VoiceModelFile.open(vvm_path) as model: + logger.info("%s", f"Loading `{args.vvm}`") + with VoiceModelFile.open(args.vvm) as model: synthesizer.load_voice_model(model) - logger.info("%s", f"Creating an AudioQuery from {text!r}") - audio_query = synthesizer.audio_query(text, style_id) + logger.info("%s", f"Creating an AudioQuery from {args.text!r}") + audio_query = synthesizer.audio_query(args.text, args.style_id) logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}") - if streaming: + if args.streaming: logger.info("%s", "In streaming mode") chunk_sec = 1.0 - audio_feature = synthesizer.precompute_render(audio_query, style_id) + audio_feature = synthesizer.precompute_render(audio_query, args.style_id) chunk_frames = int(audio_feature.frame_rate * chunk_sec) pcm = b"" for i in range(0, audio_feature.frame_length, chunk_frames): @@ -67,70 +126,10 @@ def main() -> None: ) else: - wav = synthesizer.synthesis(audio_query, style_id) + wav = synthesizer.synthesis(audio_query, args.style_id) - out.write_bytes(wav) - logger.info("%s", f"Wrote `{out}`") - - -def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int, bool]: - argparser = ArgumentParser() - argparser.add_argument( - "--mode", - default="AUTO", - type=AccelerationMode, - help='モード ("AUTO", "CPU", "GPU")', - ) - argparser.add_argument( - "vvm", - type=Path, - help="vvmファイルへのパス", - ) - argparser.add_argument( - "--onnxruntime", - default=Onnxruntime.LIB_VERSIONED_FILENAME, - help="ONNX Runtimeのライブラリのfilename", - ) - argparser.add_argument( - "--dict-dir", - default="./open_jtalk_dic_utf_8-1.11", - type=Path, - help="Open JTalkの辞書ディレクトリ", - ) - argparser.add_argument( - "--text", - default="この音声は、ボイスボックスを使用して、出力されています。", - help="読み上げさせたい文章", - ) - argparser.add_argument( - "--out", - default="./output.wav", - type=Path, - help="出力wavファイルのパス", - ) - argparser.add_argument( - "--style-id", - default=0, - type=int, - help="話者IDを指定", - ) - argparser.add_argument( - "--streaming", - action="store_true", - help="ストリーミング生成", - ) - args = argparser.parse_args() - # FIXME: 流石に多くなってきたので、`dataclass`化する - return ( - args.mode, - args.vvm, - args.onnxruntime, - args.dict_dir, - args.text, - args.out, - args.style_id, - args.streaming, - ) + args.out.write_bytes(wav) + logger.info("%s", f"Wrote `{args.out}`") def display_as_json(audio_query: AudioQuery) -> str: