Skip to content

Commit

Permalink
Merge branch 'main' into feat-rename-audio-query-to-create-audio-query
Browse files Browse the repository at this point in the history
  • Loading branch information
qryxip committed Dec 1, 2024
2 parents 3122a44 + c61d5db commit 6b6a771
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 164 deletions.
2 changes: 1 addition & 1 deletion docs/guide/user/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ with VoiceModelFile.open("model/0.vvm") as model:
```python
text = "サンプル音声です"
style_id = 0
audio_query = synthesizer.audio_query(text, style_id)
audio_query = synthesizer.create_audio_query(text, style_id)
pprint(audio_query)
```

Expand Down
153 changes: 76 additions & 77 deletions example/python/run-asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,110 +6,109 @@
import logging
from argparse import ArgumentParser
from pathlib import Path
from typing import Tuple

from voicevox_core import AccelerationMode, AudioQuery
from voicevox_core.asyncio import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile


@dataclasses.dataclass
class Args:
mode: AccelerationMode
vvm: Path
onnxruntime: str
dict_dir: Path
text: str
out: Path
style_id: int

@staticmethod
def parse_args() -> "Args":
argparser = ArgumentParser()
argparser.add_argument(
"--mode",
default="AUTO",
type=AccelerationMode,
help='モード ("AUTO", "CPU", "GPU")',
)
argparser.add_argument(
"vvm",
type=Path,
help="vvmファイルへのパス",
)
argparser.add_argument(
"--onnxruntime",
default=Onnxruntime.LIB_VERSIONED_FILENAME,
help="ONNX Runtimeのライブラリのfilename",
)
argparser.add_argument(
"--dict-dir",
default="./open_jtalk_dic_utf_8-1.11",
type=Path,
help="Open JTalkの辞書ディレクトリ",
)
argparser.add_argument(
"--text",
default="この音声は、ボイスボックスを使用して、出力されています。",
help="読み上げさせたい文章",
)
argparser.add_argument(
"--out",
default="./output.wav",
type=Path,
help="出力wavファイルのパス",
)
argparser.add_argument(
"--style-id",
default=0,
type=int,
help="話者IDを指定",
)
args = argparser.parse_args()
return Args(
args.mode,
args.vvm,
args.onnxruntime,
args.dict_dir,
args.text,
args.out,
args.style_id,
)


async def main() -> None:
logging.basicConfig(format="[%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel("DEBUG")
logging.getLogger("voicevox_core_python_api").setLevel("DEBUG")
logging.getLogger("voicevox_core").setLevel("DEBUG")

(
acceleration_mode,
vvm_path,
onnxruntime_filename,
open_jtalk_dict_dir,
text,
out,
style_id,
) = parse_args()
args = Args.parse_args()

logger.info("%s", f"Loading ONNX Runtime ({onnxruntime_filename=})")
onnxruntime = await Onnxruntime.load_once(filename=onnxruntime_filename)
logger.info("%s", f"Loading ONNX Runtime ({args.onnxruntime=})")
onnxruntime = await Onnxruntime.load_once(filename=args.onnxruntime)

logger.debug("%s", f"{onnxruntime.supported_devices()=}")

logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})")
logger.info("%s", f"Initializing ({args.mode=}, {args.dict_dir=})")
synthesizer = Synthesizer(
onnxruntime,
await OpenJtalk.new(open_jtalk_dict_dir),
acceleration_mode=acceleration_mode,
onnxruntime, await OpenJtalk.new(args.dict_dir), acceleration_mode=args.mode
)

logger.debug("%s", f"{synthesizer.metas=}")
logger.debug("%s", f"{synthesizer.is_gpu_mode=}")

logger.info("%s", f"Loading `{vvm_path}`")
async with await VoiceModelFile.open(vvm_path) as model:
logger.info("%s", f"Loading `{args.vvm}`")
async with await VoiceModelFile.open(args.vvm) as model:
await synthesizer.load_voice_model(model)

logger.info("%s", f"Creating an AudioQuery from {text!r}")
audio_query = await synthesizer.create_audio_query(text, style_id)
logger.info("%s", f"Creating an AudioQuery from {args.text!r}")
audio_query = await synthesizer.create_audio_query(args.text, args.style_id)

logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}")
wav = await synthesizer.synthesis(audio_query, style_id)

out.write_bytes(wav)
logger.info("%s", f"Wrote `{out}`")
wav = await synthesizer.synthesis(audio_query, args.style_id)


def parse_args() -> Tuple[AccelerationMode, Path, str, Path, str, Path, int]:
argparser = ArgumentParser()
argparser.add_argument(
"--mode",
default="AUTO",
type=AccelerationMode,
help='モード ("AUTO", "CPU", "GPU")',
)
argparser.add_argument(
"vvm",
type=Path,
help="vvmファイルへのパス",
)
argparser.add_argument(
"--onnxruntime",
default=Onnxruntime.LIB_VERSIONED_FILENAME,
help="ONNX Runtimeのライブラリのfilename",
)
argparser.add_argument(
"--dict-dir",
default="./open_jtalk_dic_utf_8-1.11",
type=Path,
help="Open JTalkの辞書ディレクトリ",
)
argparser.add_argument(
"--text",
default="この音声は、ボイスボックスを使用して、出力されています。",
help="読み上げさせたい文章",
)
argparser.add_argument(
"--out",
default="./output.wav",
type=Path,
help="出力wavファイルのパス",
)
argparser.add_argument(
"--style-id",
default=0,
type=int,
help="話者IDを指定",
)
args = argparser.parse_args()
# FIXME: 流石に多くなってきたので、`dataclass`化する
return (
args.mode,
args.vvm,
args.onnxruntime,
args.dict_dir,
args.text,
args.out,
args.style_id,
)
args.out.write_bytes(wav)
logger.info("%s", f"Wrote `{args.out}`")


def display_as_json(audio_query: AudioQuery) -> str:
Expand Down
Loading

0 comments on commit 6b6a771

Please sign in to comment.