Skip to content

Commit

Permalink
SynthesisEngineの抽象基底クラスSynthesisEngineBaseを追加 (#227)
Browse files Browse the repository at this point in the history
* add SynthesisEngineBase

* use NotImplementedError

* add abstractmethod annotation

* add metaclass=ABCMeta

* add test_mock_synthesis_engine

* split test

* synthesis test
  • Loading branch information
aoirint authored Dec 10, 2021
1 parent f91705c commit 6a1a951
Show file tree
Hide file tree
Showing 9 changed files with 293 additions and 88 deletions.
4 changes: 2 additions & 2 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
synthesis_morphing_parameter as _synthesis_morphing_parameter,
)
from voicevox_engine.preset import Preset, PresetLoader
from voicevox_engine.synthesis_engine import SynthesisEngine, make_synthesis_engine
from voicevox_engine.synthesis_engine import SynthesisEngineBase, make_synthesis_engine
from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves


def b64encode_str(s):
return base64.b64encode(s).decode("utf-8")


def generate_app(engine: SynthesisEngine) -> FastAPI:
def generate_app(engine: SynthesisEngineBase) -> FastAPI:
root_dir = Path(__file__).parent

default_sampling_rate = engine.default_sampling_rate
Expand Down
140 changes: 140 additions & 0 deletions test/test_mock_synthesis_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from unittest import TestCase

from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine
from voicevox_engine.kana_parser import create_kana
from voicevox_engine.model import AccentPhrase, AudioQuery, Mora


class TestMockSynthesisEngine(TestCase):
def setUp(self):
super().setUp()

self.accent_phrases_hello_hiho = [
AccentPhrase(
moras=[
Mora(
text="コ",
consonant="k",
consonant_length=0.0,
vowel="o",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ン",
consonant=None,
consonant_length=None,
vowel="N",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ニ",
consonant="n",
consonant_length=0.0,
vowel="i",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="チ",
consonant="ch",
consonant_length=0.0,
vowel="i",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ワ",
consonant="w",
consonant_length=0.0,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=5,
pause_mora=Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0.0,
pitch=0.0,
),
),
AccentPhrase(
moras=[
Mora(
text="ヒ",
consonant="h",
consonant_length=0.0,
vowel="i",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ホ",
consonant="h",
consonant_length=0.0,
vowel="o",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="デ",
consonant="d",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ス",
consonant="s",
consonant_length=0.0,
vowel="U",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
self.engine = MockSynthesisEngine(speakers="")

def test_replace_phoneme_length(self):
self.assertEqual(
self.engine.replace_phoneme_length(
accent_phrases=self.accent_phrases_hello_hiho,
speaker_id=0,
),
self.accent_phrases_hello_hiho,
)

def test_replace_mora_pitch(self):
self.assertEqual(
self.engine.replace_mora_pitch(
accent_phrases=self.accent_phrases_hello_hiho,
speaker_id=0,
),
self.accent_phrases_hello_hiho,
)

def test_synthesis(self):
self.engine.synthesis(
AudioQuery(
accent_phrases=self.accent_phrases_hello_hiho,
speedScale=1,
pitchScale=0,
intonationScale=1,
volumeScale=1,
prePhonemeLength=0.1,
postPhonemeLength=0.1,
outputSamplingRate=24000,
outputStereo=False,
kana=create_kana(self.accent_phrases_hello_hiho),
),
speaker_id=0,
)
2 changes: 1 addition & 1 deletion test/test_mora_to_text.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest import TestCase

# TODO: import from voicevox_engine.synthesis_engine.mora
from voicevox_engine.synthesis_engine.synthesis_engine import mora_to_text
from voicevox_engine.synthesis_engine.synthesis_engine_base import mora_to_text


class TestMoraToText(TestCase):
Expand Down
4 changes: 2 additions & 2 deletions voicevox_engine/dev/synthesis_engine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .mock import SynthesisEngine
from .mock import MockSynthesisEngine

__all__ = ["SynthesisEngine"]
__all__ = ["MockSynthesisEngine"]
3 changes: 2 additions & 1 deletion voicevox_engine/dev/synthesis_engine/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
from scipy.signal import resample

from ...model import AccentPhrase, AudioQuery
from ...synthesis_engine import SynthesisEngineBase
from ...synthesis_engine.synthesis_engine import to_flatten_moras


class SynthesisEngine:
class MockSynthesisEngine(SynthesisEngineBase):
"""
SynthesisEngine [Mock]
"""
Expand Down
2 changes: 2 additions & 0 deletions voicevox_engine/synthesis_engine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from .forwarder import Forwarder
from .make_synthesis_engine import make_synthesis_engine
from .synthesis_engine import SynthesisEngine
from .synthesis_engine_base import SynthesisEngineBase

__all__ = [
"Forwarder",
"make_synthesis_engine",
"SynthesisEngine",
"SynthesisEngineBase",
]
6 changes: 3 additions & 3 deletions voicevox_engine/synthesis_engine/make_synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
from pathlib import Path
from typing import Optional

from .synthesis_engine import SynthesisEngine
from .synthesis_engine import SynthesisEngine, SynthesisEngineBase


def make_synthesis_engine(
use_gpu: bool,
voicelib_dir: Path,
voicevox_dir: Optional[Path] = None,
) -> SynthesisEngine:
) -> SynthesisEngineBase:
"""
音声ライブラリをロードして、音声合成エンジンを生成
Expand Down Expand Up @@ -57,7 +57,7 @@ def make_synthesis_engine(
speakers=core.metas(),
)

from ..dev.synthesis_engine import SynthesisEngine as MockSynthesisEngine
from ..dev.synthesis_engine import MockSynthesisEngine

# モックで置き換える
return MockSynthesisEngine(speakers=core.metas())
81 changes: 2 additions & 79 deletions voicevox_engine/synthesis_engine/synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
from scipy.signal import resample

from ..acoustic_feature_extractor import OjtPhoneme, SamplingData
from ..full_context_label import extract_full_context_label
from ..model import AccentPhrase, AudioQuery, Mora
from ..mora_list import openjtalk_mora2text
from .synthesis_engine_base import SynthesisEngineBase

unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
Expand Down Expand Up @@ -126,17 +125,7 @@ def pre_process(
return flatten_moras, phoneme_data_list


def mora_to_text(mora: str) -> str:
if mora[-1:] in ["A", "I", "U", "E", "O"]:
# 無声化母音を小文字に
mora = mora[:-1] + mora[-1].lower()
if mora in openjtalk_mora2text:
return openjtalk_mora2text[mora]
else:
return mora


class SynthesisEngine:
class SynthesisEngine(SynthesisEngineBase):
def __init__(
self,
yukarin_s_forwarder,
Expand Down Expand Up @@ -365,72 +354,6 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int):

return accent_phrases

def replace_mora_data(
self,
accent_phrases: List[AccentPhrase],
speaker_id: int,
) -> List[AccentPhrase]:
return self.replace_mora_pitch(
accent_phrases=self.replace_phoneme_length(
accent_phrases=accent_phrases,
speaker_id=speaker_id,
),
speaker_id=speaker_id,
)

def create_accent_phrases(self, text: str, speaker_id: int) -> List[AccentPhrase]:
if len(text.strip()) == 0:
return []

utterance = extract_full_context_label(text)
if len(utterance.breath_groups) == 0:
return []

return self.replace_mora_data(
accent_phrases=[
AccentPhrase(
moras=[
Mora(
text=mora_to_text(
"".join([p.phoneme for p in mora.phonemes])
),
consonant=(
mora.consonant.phoneme
if mora.consonant is not None
else None
),
consonant_length=0 if mora.consonant is not None else None,
vowel=mora.vowel.phoneme,
vowel_length=0,
pitch=0,
)
for mora in accent_phrase.moras
],
accent=accent_phrase.accent,
pause_mora=(
Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0,
pitch=0,
)
if (
i_accent_phrase == len(breath_group.accent_phrases) - 1
and i_breath_group != len(utterance.breath_groups) - 1
)
else None
),
)
for i_breath_group, breath_group in enumerate(utterance.breath_groups)
for i_accent_phrase, accent_phrase in enumerate(
breath_group.accent_phrases
)
],
speaker_id=speaker_id,
)

def synthesis(self, query: AudioQuery, speaker_id: int):
"""
音声合成クエリから音声合成に必要な情報を構成し、実際に音声合成を行う
Expand Down
Loading

0 comments on commit 6a1a951

Please sign in to comment.