Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into release-0.5
Browse files Browse the repository at this point in the history
  • Loading branch information
Hiroshiba committed Sep 12, 2021
2 parents 3c74e57 + 264aff6 commit fcb38c0
Show file tree
Hide file tree
Showing 10 changed files with 706 additions and 199 deletions.
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.2
0.5.2
96 changes: 85 additions & 11 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,20 @@
import resampy
import soundfile
import uvicorn
from fastapi import FastAPI, Response
from fastapi import FastAPI, HTTPException, Response
from fastapi.middleware.cors import CORSMiddleware
from starlette.responses import FileResponse

from voicevox_engine.full_context_label import extract_full_context_label
from voicevox_engine.model import AccentPhrase, AudioQuery, Mora, Speaker
from voicevox_engine.kana_parser import create_kana, parse_kana
from voicevox_engine.model import (
AccentPhrase,
AudioQuery,
Mora,
ParseKanaBadRequest,
ParseKanaError,
Speaker,
)
from voicevox_engine.mora_list import openjtalk_mora2text
from voicevox_engine.synthesis_engine import SynthesisEngine

Expand Down Expand Up @@ -44,11 +52,14 @@ def make_synthesis_engine(
if voicevox_dir.exists():
sys.path.insert(0, str(voicevox_dir))

has_voicevox_core = True
try:
import core
except ImportError:
from voicevox_engine.dev import core

has_voicevox_core = False

# 音声ライブラリの Python モジュールをロードできなかった
print(
"Notice: mock-library will be used. Try re-run with valid --voicevox_dir", # noqa
Expand All @@ -60,12 +71,20 @@ def make_synthesis_engine(

core.initialize(voicelib_dir.as_posix() + "/", use_gpu)

return SynthesisEngine(
yukarin_s_forwarder=core.yukarin_s_forward,
yukarin_sa_forwarder=core.yukarin_sa_forward,
decode_forwarder=core.decode_forward,
if has_voicevox_core:
return SynthesisEngine(
yukarin_s_forwarder=core.yukarin_s_forward,
yukarin_sa_forwarder=core.yukarin_sa_forward,
decode_forwarder=core.decode_forward,
)

from voicevox_engine.dev.synthesis_engine import (
SynthesisEngine as mock_synthesis_engine,
)

# モックで置き換える
return mock_synthesis_engine()


def mora_to_text(mora: str):
if mora[-1:] in ["A", "I", "U", "E", "O"]:
Expand Down Expand Up @@ -98,15 +117,22 @@ def generate_app(engine: SynthesisEngine) -> FastAPI:
def replace_mora_data(
accent_phrases: List[AccentPhrase], speaker_id: int
) -> List[AccentPhrase]:
return engine.replace_phoneme_data(
accent_phrases=accent_phrases, speaker_id=speaker_id
return engine.replace_mora_pitch(
accent_phrases=engine.replace_phoneme_length(
accent_phrases=accent_phrases,
speaker_id=speaker_id,
),
speaker_id=speaker_id,
)

def create_accent_phrases(text: str, speaker_id: int) -> List[AccentPhrase]:
if len(text.strip()) == 0:
return []

utterance = extract_full_context_label(text)
if len(utterance.breath_groups) == 0:
return []

return replace_mora_data(
accent_phrases=[
AccentPhrase(
Expand Down Expand Up @@ -162,8 +188,9 @@ def audio_query(text: str, speaker: int):
"""
クエリの初期値を得ます。ここで得られたクエリはそのまま音声合成に利用できます。各値の意味は`Schemas`を参照してください。
"""
accent_phrases = create_accent_phrases(text, speaker_id=speaker)
return AudioQuery(
accent_phrases=create_accent_phrases(text, speaker_id=speaker),
accent_phrases=accent_phrases,
speedScale=1,
pitchScale=0,
intonationScale=1,
Expand All @@ -172,16 +199,41 @@ def audio_query(text: str, speaker: int):
postPhonemeLength=0.1,
outputSamplingRate=default_sampling_rate,
outputStereo=False,
kana=create_kana(accent_phrases),
)

@app.post(
"/accent_phrases",
response_model=List[AccentPhrase],
tags=["クエリ編集"],
summary="テキストからアクセント句を得る",
responses={
400: {
"description": "読み仮名のパースに失敗",
"model": ParseKanaBadRequest,
}
},
)
def accent_phrases(text: str, speaker: int):
return create_accent_phrases(text, speaker_id=speaker)
def accent_phrases(text: str, speaker: int, is_kana: bool = False):
"""
テキストからアクセント句を得ます。
is_kanaが`true`のとき、テキストは次のようなAquesTalkライクな記法に従う読み仮名として処理されます。デフォルトは`false`です。
* 全てのカナはカタカナで記述される
* アクセント句は`/`または`、`で区切る。`、`で区切った場合に限り無音区間が挿入される。
* カナの手前に`_`を入れるとそのカナは無声化される
* アクセント位置を`'`で指定する。全てのアクセント句にはアクセント位置を1つ指定する必要がある。
"""
if is_kana:
try:
accent_phrases = parse_kana(text)
except ParseKanaError as err:
raise HTTPException(
status_code=400,
detail=ParseKanaBadRequest(err).dict(),
)
return replace_mora_data(accent_phrases=accent_phrases, speaker_id=speaker)
else:
return create_accent_phrases(text, speaker_id=speaker)

@app.post(
"/mora_data",
Expand All @@ -192,6 +244,28 @@ def accent_phrases(text: str, speaker: int):
def mora_data(accent_phrases: List[AccentPhrase], speaker: int):
return replace_mora_data(accent_phrases, speaker_id=speaker)

@app.post(
"/mora_length",
response_model=List[AccentPhrase],
tags=["クエリ編集"],
summary="アクセント句から音素長を得る",
)
def mora_length(accent_phrases: List[AccentPhrase], speaker: int):
return engine.replace_phoneme_length(
accent_phrases=accent_phrases, speaker_id=speaker
)

@app.post(
"/mora_pitch",
response_model=List[AccentPhrase],
tags=["クエリ編集"],
summary="アクセント句から音高を得る",
)
def mora_pitch(accent_phrases: List[AccentPhrase], speaker: int):
return engine.replace_mora_pitch(
accent_phrases=accent_phrases, speaker_id=speaker
)

@app.post(
"/synthesis",
response_class=FileResponse,
Expand Down
76 changes: 76 additions & 0 deletions test/test_kana_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from unittest import TestCase

from voicevox_engine.kana_parser import create_kana, parse_kana
from voicevox_engine.model import ParseKanaError, ParseKanaErrorCode


class TestParseKana(TestCase):
def test_phrase_length(self):
self.assertEqual(len(parse_kana("ア'/ア'")), 2)
self.assertEqual(len(parse_kana("ア'、ア'")), 2)
self.assertEqual(len(parse_kana("ア'/ア'/ア'/ア'/ア'")), 5)
self.assertEqual(len(parse_kana("ス'")), 1)
self.assertEqual(len(parse_kana("_ス'")), 1)
self.assertEqual(len(parse_kana("ギェ'")), 1)
self.assertEqual(len(parse_kana("ギェ'、ギェ'/ギェ'")), 3)

def test_accent(self):
self.assertEqual(parse_kana("シャ'シシュシェショ")[0].accent, 1)
self.assertEqual(parse_kana("シャ'_シシュシェショ")[0].accent, 1)
self.assertEqual(parse_kana("シャシ'シュシェショ")[0].accent, 2)
self.assertEqual(parse_kana("シャ_シ'シュシェショ")[0].accent, 2)
self.assertEqual(parse_kana("シャシシュ'シェショ")[0].accent, 3)
self.assertEqual(parse_kana("シャ_シシュ'シェショ")[0].accent, 3)
self.assertEqual(parse_kana("シャシシュシェショ'")[0].accent, 5)
self.assertEqual(parse_kana("シャ_シシュシェショ'")[0].accent, 5)

def test_mora_length(self):
self.assertEqual(len(parse_kana("シャ'シシュシェショ")[0].moras), 5)
self.assertEqual(len(parse_kana("シャ'_シシュシェショ")[0].moras), 5)
self.assertEqual(len(parse_kana("シャシ'シュシェショ")[0].moras), 5)
self.assertEqual(len(parse_kana("シャ_シ'シュシェショ")[0].moras), 5)
self.assertEqual(len(parse_kana("シャシシュシェショ'")[0].moras), 5)
self.assertEqual(len(parse_kana("シャ_シシュシェショ'")[0].moras), 5)

def test_pause(self):
self.assertIsNone(parse_kana("ア'/ア'")[0].pause_mora)
self.assertIsNone(parse_kana("ア'/ア'")[1].pause_mora)
self.assertIsNotNone(parse_kana("ア'、ア'")[0].pause_mora)
self.assertIsNone(parse_kana("ア'、ア'")[1].pause_mora)

def test_unvoice(self):
self.assertEqual(parse_kana("ス'")[0].moras[0].vowel, "u")
self.assertEqual(parse_kana("_ス'")[0].moras[0].vowel, "U")

def test_roundtrip(self):
for text in ["コンニチワ'", "ワタシワ'/シャチョオデ'_ス", "トテモ'、エラ'インデス"]:
self.assertEqual(create_kana(parse_kana(text)), text)

for text in ["ヲ'", "ェ'"]:
self.assertEqual(create_kana(parse_kana(text)), text)


class TestParseKanaException(TestCase):
def _assert_error_code(self, kana: str, code: ParseKanaErrorCode):
with self.assertRaises(ParseKanaError) as err:
parse_kana(kana)
self.assertEqual(err.exception.errcode, code)

def test_exceptions(self):
self._assert_error_code("アクセント", ParseKanaErrorCode.ACCENT_NOTFOUND)
self._assert_error_code("'アクセント", ParseKanaErrorCode.ACCENT_TOP)
self._assert_error_code("ア'ク'セント", ParseKanaErrorCode.ACCENT_TWICE)
self._assert_error_code("ひ'らがな", ParseKanaErrorCode.UNKNOWN_TEXT)
self._assert_error_code("__ス'", ParseKanaErrorCode.UNKNOWN_TEXT)
self._assert_error_code("ア'/", ParseKanaErrorCode.EMPTY_PHRASE)
self._assert_error_code("/ア'", ParseKanaErrorCode.EMPTY_PHRASE)

with self.assertRaises(ParseKanaError) as err:
parse_kana("ヒト'ツメ/フタツメ")
self.assertEqual(err.exception.errcode, ParseKanaErrorCode.ACCENT_NOTFOUND)
self.assertEqual(err.exception.kwargs, {"text": "フタツメ"})

with self.assertRaises(ParseKanaError) as err:
parse_kana("ア'/")
self.assertEqual(err.exception.errcode, ParseKanaErrorCode.EMPTY_PHRASE)
self.assertEqual(err.exception.kwargs, {"position": "2"})
42 changes: 40 additions & 2 deletions voicevox_engine/dev/core/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
from typing import Any, Dict, List

import numpy as np
from pyopenjtalk import tts
from resampy import resample

DUMMY_TEXT = "これはダミーのテキストです"

def initialize(*args: List[Any]) -> None:

def initialize(path: str, use_gpu: bool, *args: List[Any]) -> None:
pass


Expand All @@ -25,8 +29,42 @@ def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:


def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
"""
合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます(DUMMY_TEXT)
参照→SynthesisEngine のdocstring [Mock]
Parameters
----------
length : int
フレームの長さ
Returns
-------
wave : np.ndarray
音声合成した波形データ
Note
-------
ここで行う音声合成では、調声(ピッチ等)を反映しない
また、入力内容によらず常に固定の文言を読み上げる
# pyopenjtalk.tts()の出力仕様
dtype=np.float64, 16 bit, mono 48000 Hz
# resampleの説明
本来はfloat64の入力でも問題ないのかと思われたが、実際には出力が音割れひどかった。
対策として、あらかじめint16に型変換しておくと、期待通りの結果になった。
非モックdecode_forwardと合わせるために、出力を24kHzに変換した。
"""
logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため
logger.info(
"Sorry, decode_forward() is a mock. Return values are incorrect.",
)
return np.ones(length * 256)
wave, sr = tts(DUMMY_TEXT)
wave = resample(
wave.astype("int16"),
sr,
24000,
filter="kaiser_fast",
)
return wave
3 changes: 3 additions & 0 deletions voicevox_engine/dev/synthesis_engine/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .mock import SynthesisEngine

__all__ = ["SynthesisEngine"]
Loading

0 comments on commit fcb38c0

Please sign in to comment.