Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

モーフィングの許可に関する設定およびAPIを追加 #578

Merged
merged 10 commits into from
Jan 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@
ParseKanaError,
Speaker,
SpeakerInfo,
SpeakerNotFoundError,
SupportedDevicesInfo,
UserDictWord,
WordTypes,
)
from voicevox_engine.morphing import synthesis_morphing
from voicevox_engine.morphing import is_synthesis_morphing_permitted, synthesis_morphing
from voicevox_engine.morphing import (
synthesis_morphing_parameter as _synthesis_morphing_parameter,
)
Expand Down Expand Up @@ -483,6 +484,34 @@ def multi_synthesis(
background=BackgroundTask(delete_file, f.name),
)

@app.get(
"/is_morphable",
response_model=bool,
tags=["音声合成"],
summary="2人の話者でモーフィングが可能かどうか返す",
)
def is_morphable(
base_speaker: int,
target_speaker: int,
core_version: Optional[str] = None,
):
"""
指定された2人の話者でモーフィング機能を利用可能か返します。
モーフィングの許可/禁止は`/speakers`の`speaker.supportedFeatures.synthesisMorphing`に記載されています。
プロパティが存在しない場合は、モーフィングが許可されているとみなします。
"""
engine = get_engine(core_version)

try:
is_permitted = is_synthesis_morphing_permitted(
engine, root_dir / "speaker_info", base_speaker, target_speaker
)
return is_permitted
except SpeakerNotFoundError as e:
raise HTTPException(
status_code=404, detail=f"該当する話者(speaker={e.speaker})が見つかりません"
)

@app.post(
"/synthesis_morphing",
response_class=FileResponse,
Expand All @@ -509,6 +538,20 @@ def _synthesis_morphing(
"""
engine = get_engine(core_version)

try:
is_permitted = is_synthesis_morphing_permitted(
engine, root_dir / "speaker_info", base_speaker, target_speaker
)
if not is_permitted:
raise HTTPException(
status_code=400,
detail="指定された話者ペアでのモーフィングはできません",
)
except SpeakerNotFoundError as e:
raise HTTPException(
status_code=404, detail=f"該当する話者(speaker={e.speaker})が見つかりません"
)

# 生成したパラメータはキャッシュされる
morph_param = synthesis_morphing_parameter(
engine=engine,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"speakerName": "dummy3",
"speakerUuid": "dummy3uuid",
"supportedFeatures": {"permitedSynthesisMorphing": "NOTHING"},
"styles": [
{
"styleId": 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"speakerName": "dummy2",
"speakerUuid": "dummy2uuid",
"supportedFeatures": {"permitedSynthesisMorphing": "SELF_ONLY"},
"styles": [
{
"styleId": 3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"speakerName": "dummy4",
"speakerUuid": "dummy4uuid",
"supportedFeatures": {"permitedSynthesisMorphing": "ALL"},
Segu-g marked this conversation as resolved.
Show resolved Hide resolved
"styles": [
{
"styleId": 9
Expand Down
29 changes: 29 additions & 0 deletions voicevox_engine/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,36 @@ class SpeakerStyle(BaseModel):
id: int = Field(title="スタイルID")


class SpeakerSupporPermitedSynthesisMorphing(str, Enum):
ALL = "ALL" # 全て許可
SELF_ONLY = "SELF_ONLY" # 同じ話者内でのみ許可
NOTHING = "NOTHING" # 全て禁止

@classmethod
def _missing_(cls, value: object) -> "SpeakerSupporPermitedSynthesisMorphing":
return SpeakerSupporPermitedSynthesisMorphing.ALL


class SpeakerSupportedFeatures(BaseModel):
"""
話者の対応機能の情報
"""

permited_synthesis_morphing: Optional[
SpeakerSupporPermitedSynthesisMorphing
] = Field(
title="モーフィング機能への対応", default=SpeakerSupporPermitedSynthesisMorphing(None)
)


class Speaker(BaseModel):
"""
スピーカー情報
"""

name: str = Field(title="名前")
speaker_uuid: str = Field(title="スピーカーのUUID")
supported_features: Optional[SpeakerSupportedFeatures] = Field(title="スピーカーの対応機能")
styles: List[SpeakerStyle] = Field(title="スピーカースタイルの一覧")
version: str = Field("スピーカーのバージョン")

Expand All @@ -145,6 +168,12 @@ class SpeakerInfo(BaseModel):
style_infos: List[StyleInfo] = Field(title="スタイルの追加情報")


class SpeakerNotFoundError(LookupError):
def __init__(self, speaker: int, *args: object, **kywrds: object) -> None:
self.speaker = speaker
super().__init__(f"speaker {speaker} is not found.", *args, **kywrds)


class DownloadableLibrary(BaseModel):
"""
ダウンロード可能な音声ライブラリの情報(最新情報をwebで取得することを考慮して、ローカルの情報はない)
Expand Down
84 changes: 83 additions & 1 deletion voicevox_engine/morphing.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import json
from copy import deepcopy
from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pyworld as pw

from .model import AudioQuery
from voicevox_engine.synthesis_engine.synthesis_engine_base import SynthesisEngineBase

from .model import (
AudioQuery,
SpeakerNotFoundError,
SpeakerSupporPermitedSynthesisMorphing,
)
from .synthesis_engine import SynthesisEngine


Expand Down Expand Up @@ -43,6 +51,80 @@ def create_morphing_parameter(
)


def is_synthesis_morphing_permitted(
engine: SynthesisEngineBase,
speaker_info_folder: Path,
base_speaker: int,
target_speaker: int,
) -> bool:
"""
指定されたspeakerがモーフィング可能かどうか返す
speakerが見つからない場合はSpeakerNotFoundErrorを送出する
"""

core_speakers = json.loads(engine.speakers)
base_speaker_core_info, target_speaker_core_info = None, None
for speaker in core_speakers:
style_id_arr = tuple(style["id"] for style in speaker["styles"])
if base_speaker_core_info is None and base_speaker in style_id_arr:
base_speaker_core_info = speaker
if target_speaker_core_info is None and target_speaker in style_id_arr:
target_speaker_core_info = speaker

if base_speaker_core_info is None or target_speaker_core_info is None:
raise SpeakerNotFoundError(
base_speaker if base_speaker_core_info is None else target_speaker
)

base_speaker_uuid = base_speaker_core_info["speaker_uuid"]
target_speaker_uuid = target_speaker_core_info["speaker_uuid"]

# FIXME: engineのmetasロード処理をPresetLoaderのように纏める
base_speaker_engine_info = json.loads(
(speaker_info_folder / f"{base_speaker_uuid}" / "metas.json").read_text(
encoding="utf-8"
)
)
target_speaker_engine_info = json.loads(
(speaker_info_folder / f"{target_speaker_uuid}" / "metas.json").read_text(
encoding="utf-8"
)
)

# FIXME: 他にsupported_featuresができたら共通化する
base_speaker_morphing_info: SpeakerSupporPermitedSynthesisMorphing = (
base_speaker_engine_info.get("supportedFeatures", dict()).get(
"permitedSynthesisMorphing", SpeakerSupporPermitedSynthesisMorphing(None)
)
)

target_speaker_morphing_info: SpeakerSupporPermitedSynthesisMorphing = (
target_speaker_engine_info.get("supportedFeatures", dict()).get(
"permitedSynthesisMorphing", SpeakerSupporPermitedSynthesisMorphing(None)
)
)

# 禁止されている場合はFalse
if (
base_speaker_morphing_info == SpeakerSupporPermitedSynthesisMorphing.NOTHING
or target_speaker_morphing_info
== SpeakerSupporPermitedSynthesisMorphing.NOTHING
):
return False
# 同一話者のみの場合は同一話者判定
if (
base_speaker_morphing_info == SpeakerSupporPermitedSynthesisMorphing.SELF_ONLY
or target_speaker_morphing_info
== SpeakerSupporPermitedSynthesisMorphing.SELF_ONLY
):
return base_speaker_uuid == target_speaker_uuid
# 念のため許可されているかチェック
return (
base_speaker_morphing_info == SpeakerSupporPermitedSynthesisMorphing.ALL
and target_speaker_morphing_info == SpeakerSupporPermitedSynthesisMorphing.ALL
)


def synthesis_morphing_parameter(
engine: SynthesisEngine,
query: AudioQuery,
Expand Down