Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SynthesisEngineの抽象基底クラスSynthesisEngineBaseを追加 #227

Merged
merged 7 commits into from
Dec 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@
SpeakerInfo,
)
from voicevox_engine.preset import Preset, PresetLoader
from voicevox_engine.synthesis_engine import SynthesisEngine, make_synthesis_engine
from voicevox_engine.synthesis_engine import SynthesisEngineBase, make_synthesis_engine
from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves


def b64encode_str(s):
return base64.b64encode(s).decode("utf-8")


def generate_app(engine: SynthesisEngine) -> FastAPI:
def generate_app(engine: SynthesisEngineBase) -> FastAPI:
root_dir = Path(__file__).parent

default_sampling_rate = engine.default_sampling_rate
Expand Down
140 changes: 140 additions & 0 deletions test/test_mock_synthesis_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from unittest import TestCase

from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine
from voicevox_engine.kana_parser import create_kana
from voicevox_engine.model import AccentPhrase, AudioQuery, Mora


class TestMockSynthesisEngine(TestCase):
def setUp(self):
super().setUp()

self.accent_phrases_hello_hiho = [
AccentPhrase(
moras=[
Mora(
text="コ",
consonant="k",
consonant_length=0.0,
vowel="o",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ン",
consonant=None,
consonant_length=None,
vowel="N",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ニ",
consonant="n",
consonant_length=0.0,
vowel="i",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="チ",
consonant="ch",
consonant_length=0.0,
vowel="i",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ワ",
consonant="w",
consonant_length=0.0,
vowel="a",
vowel_length=0.0,
pitch=0.0,
),
],
accent=5,
pause_mora=Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0.0,
pitch=0.0,
),
),
AccentPhrase(
moras=[
Mora(
text="ヒ",
consonant="h",
consonant_length=0.0,
vowel="i",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ホ",
consonant="h",
consonant_length=0.0,
vowel="o",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="デ",
consonant="d",
consonant_length=0.0,
vowel="e",
vowel_length=0.0,
pitch=0.0,
),
Mora(
text="ス",
consonant="s",
consonant_length=0.0,
vowel="U",
vowel_length=0.0,
pitch=0.0,
),
],
accent=1,
pause_mora=None,
),
]
self.engine = MockSynthesisEngine(speakers="")

def test_replace_phoneme_length(self):
self.assertEqual(
self.engine.replace_phoneme_length(
accent_phrases=self.accent_phrases_hello_hiho,
speaker_id=0,
),
self.accent_phrases_hello_hiho,
)

def test_replace_mora_pitch(self):
self.assertEqual(
self.engine.replace_mora_pitch(
accent_phrases=self.accent_phrases_hello_hiho,
speaker_id=0,
),
self.accent_phrases_hello_hiho,
)

def test_synthesis(self):
self.engine.synthesis(
AudioQuery(
accent_phrases=self.accent_phrases_hello_hiho,
speedScale=1,
pitchScale=0,
intonationScale=1,
volumeScale=1,
prePhonemeLength=0.1,
postPhonemeLength=0.1,
outputSamplingRate=24000,
outputStereo=False,
kana=create_kana(self.accent_phrases_hello_hiho),
),
speaker_id=0,
)
2 changes: 1 addition & 1 deletion test/test_mora_to_text.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest import TestCase

# TODO: import from voicevox_engine.synthesis_engine.mora
from voicevox_engine.synthesis_engine.synthesis_engine import mora_to_text
from voicevox_engine.synthesis_engine.synthesis_engine_base import mora_to_text


class TestMoraToText(TestCase):
Expand Down
4 changes: 2 additions & 2 deletions voicevox_engine/dev/synthesis_engine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .mock import SynthesisEngine
from .mock import MockSynthesisEngine

__all__ = ["SynthesisEngine"]
__all__ = ["MockSynthesisEngine"]
3 changes: 2 additions & 1 deletion voicevox_engine/dev/synthesis_engine/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
from scipy.signal import resample

from ...model import AccentPhrase, AudioQuery
from ...synthesis_engine import SynthesisEngineBase
from ...synthesis_engine.synthesis_engine import to_flatten_moras


class SynthesisEngine:
class MockSynthesisEngine(SynthesisEngineBase):
"""
SynthesisEngine [Mock]
"""
Expand Down
2 changes: 2 additions & 0 deletions voicevox_engine/synthesis_engine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from .forwarder import Forwarder
from .make_synthesis_engine import make_synthesis_engine
from .synthesis_engine import SynthesisEngine
from .synthesis_engine_base import SynthesisEngineBase

__all__ = [
"Forwarder",
"make_synthesis_engine",
"SynthesisEngine",
"SynthesisEngineBase",
]
6 changes: 3 additions & 3 deletions voicevox_engine/synthesis_engine/make_synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
from pathlib import Path
from typing import Optional

from .synthesis_engine import SynthesisEngine
from .synthesis_engine import SynthesisEngine, SynthesisEngineBase


def make_synthesis_engine(
use_gpu: bool,
voicelib_dir: Path,
voicevox_dir: Optional[Path] = None,
) -> SynthesisEngine:
) -> SynthesisEngineBase:
"""
音声ライブラリをロードして、音声合成エンジンを生成

Expand Down Expand Up @@ -57,7 +57,7 @@ def make_synthesis_engine(
speakers=core.metas(),
)

from ..dev.synthesis_engine import SynthesisEngine as MockSynthesisEngine
from ..dev.synthesis_engine import MockSynthesisEngine

# モックで置き換える
return MockSynthesisEngine(speakers=core.metas())
81 changes: 2 additions & 79 deletions voicevox_engine/synthesis_engine/synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
from scipy.signal import resample

from ..acoustic_feature_extractor import OjtPhoneme, SamplingData
from ..full_context_label import extract_full_context_label
from ..model import AccentPhrase, AudioQuery, Mora
from ..mora_list import openjtalk_mora2text
from .synthesis_engine_base import SynthesisEngineBase

unvoiced_mora_phoneme_list = ["A", "I", "U", "E", "O", "cl", "pau"]
mora_phoneme_list = ["a", "i", "u", "e", "o", "N"] + unvoiced_mora_phoneme_list
Expand Down Expand Up @@ -126,17 +125,7 @@ def pre_process(
return flatten_moras, phoneme_data_list


def mora_to_text(mora: str) -> str:
if mora[-1:] in ["A", "I", "U", "E", "O"]:
# 無声化母音を小文字に
mora = mora[:-1] + mora[-1].lower()
if mora in openjtalk_mora2text:
return openjtalk_mora2text[mora]
else:
return mora


class SynthesisEngine:
class SynthesisEngine(SynthesisEngineBase):
def __init__(
self,
yukarin_s_forwarder,
Expand Down Expand Up @@ -365,72 +354,6 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int):

return accent_phrases

def replace_mora_data(
self,
accent_phrases: List[AccentPhrase],
speaker_id: int,
) -> List[AccentPhrase]:
return self.replace_mora_pitch(
accent_phrases=self.replace_phoneme_length(
accent_phrases=accent_phrases,
speaker_id=speaker_id,
),
speaker_id=speaker_id,
)

def create_accent_phrases(self, text: str, speaker_id: int) -> List[AccentPhrase]:
if len(text.strip()) == 0:
return []

utterance = extract_full_context_label(text)
if len(utterance.breath_groups) == 0:
return []

return self.replace_mora_data(
accent_phrases=[
AccentPhrase(
moras=[
Mora(
text=mora_to_text(
"".join([p.phoneme for p in mora.phonemes])
),
consonant=(
mora.consonant.phoneme
if mora.consonant is not None
else None
),
consonant_length=0 if mora.consonant is not None else None,
vowel=mora.vowel.phoneme,
vowel_length=0,
pitch=0,
)
for mora in accent_phrase.moras
],
accent=accent_phrase.accent,
pause_mora=(
Mora(
text="、",
consonant=None,
consonant_length=None,
vowel="pau",
vowel_length=0,
pitch=0,
)
if (
i_accent_phrase == len(breath_group.accent_phrases) - 1
and i_breath_group != len(utterance.breath_groups) - 1
)
else None
),
)
for i_breath_group, breath_group in enumerate(utterance.breath_groups)
for i_accent_phrase, accent_phrase in enumerate(
breath_group.accent_phrases
)
],
speaker_id=speaker_id,
)

def synthesis(self, query: AudioQuery, speaker_id: int):
"""
音声合成クエリから音声合成に必要な情報を構成し、実際に音声合成を行う
Expand Down
Loading