From 6426ce43b9c8d59a7a7dc537b73c838a58e10f4c Mon Sep 17 00:00:00 2001 From: tarepan Date: Sat, 30 Mar 2024 17:56:40 +0000 Subject: [PATCH 1/4] =?UTF-8?q?refactor:=20=E3=82=AD=E3=83=A3=E3=83=A9?= =?UTF-8?q?=E3=82=AF=E3=82=BF=E3=83=BC=E6=83=85=E5=A0=B1=20APIs=20?= =?UTF-8?q?=E3=82=92=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC=E3=83=AB=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run.py | 157 +------------------- voicevox_engine/app/__init__.py | 0 voicevox_engine/app/routers/__init__.py | 0 voicevox_engine/app/routers/character.py | 173 +++++++++++++++++++++++ 4 files changed, 178 insertions(+), 152 deletions(-) create mode 100644 voicevox_engine/app/__init__.py create mode 100644 voicevox_engine/app/routers/__init__.py create mode 100644 voicevox_engine/app/routers/character.py diff --git a/run.py b/run.py index 674a6188f..106731d9a 100644 --- a/run.py +++ b/run.py @@ -1,6 +1,5 @@ import argparse import asyncio -import base64 import json import multiprocessing import os @@ -14,7 +13,7 @@ from io import BytesIO, TextIOWrapper from pathlib import Path from tempfile import NamedTemporaryFile, TemporaryFile -from typing import Annotated, Any, Literal, Optional +from typing import Annotated, Any, Optional import soundfile import uvicorn @@ -25,12 +24,13 @@ from fastapi.openapi.utils import get_openapi from fastapi.responses import JSONResponse from fastapi.templating import Jinja2Templates -from pydantic import ValidationError, parse_obj_as +from pydantic import ValidationError from starlette.background import BackgroundTask from starlette.middleware.errors import ServerErrorMiddleware from starlette.responses import FileResponse from voicevox_engine import __version__ +from voicevox_engine.app.routers import character from voicevox_engine.cancellable_engine import CancellableEngine from voicevox_engine.core.core_adapter import CoreAdapter from voicevox_engine.core.core_initializer import initialize_cores @@ -38,11 +38,7 @@ from voicevox_engine.engine_manifest.EngineManifestLoader import EngineManifestLoader from voicevox_engine.library_manager import LibraryManager from voicevox_engine.metas.Metas import StyleId -from voicevox_engine.metas.MetasStore import ( - MetasStore, - construct_lookup, - filter_speakers_and_styles, -) +from voicevox_engine.metas.MetasStore import MetasStore, construct_lookup from voicevox_engine.model import ( AccentPhrase, AudioQuery, @@ -54,8 +50,6 @@ ParseKanaBadRequest, ParseKanaError, Score, - Speaker, - SpeakerInfo, StyleIdNotFoundError, SupportedDevicesInfo, UserDictWord, @@ -98,10 +92,6 @@ from voicevox_engine.utility.run_utility import decide_boolean_from_env -def b64encode_str(s): - return base64.b64encode(s).decode("utf-8") - - def set_output_log_utf8() -> None: """ stdout/stderrのエンコーディングをUTF-8に切り替える関数 @@ -847,144 +837,7 @@ async def core_versions() -> Response: media_type="application/json", ) - @app.get("/speakers", response_model=list[Speaker], tags=["その他"]) - def speakers( - core_version: str | None = None, - ) -> list[Speaker]: - speakers = metas_store.load_combined_metas(get_core(core_version)) - return filter_speakers_and_styles(speakers, "speaker") - - @app.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"]) - def speaker_info( - speaker_uuid: str, - core_version: str | None = None, - ) -> SpeakerInfo: - """ - 指定されたspeaker_uuidに関する情報をjson形式で返します。 - 画像や音声はbase64エンコードされたものが返されます。 - """ - return _speaker_info( - speaker_uuid=speaker_uuid, - speaker_or_singer="speaker", - core_version=core_version, - ) - - # FIXME: この関数をどこかに切り出す - def _speaker_info( - speaker_uuid: str, - speaker_or_singer: Literal["speaker", "singer"], - core_version: str | None, - ) -> SpeakerInfo: - # エンジンに含まれる話者メタ情報は、次のディレクトリ構造に従わなければならない: - # {root_dir}/ - # speaker_info/ - # {speaker_uuid_0}/ - # policy.md - # portrait.png - # icons/ - # {id_0}.png - # {id_1}.png - # ... - # portraits/ - # {id_0}.png - # {id_1}.png - # ... - # voice_samples/ - # {id_0}_001.wav - # {id_0}_002.wav - # {id_0}_003.wav - # {id_1}_001.wav - # ... - # {speaker_uuid_1}/ - # ... - - # 該当話者の検索 - speakers = parse_obj_as( - list[Speaker], json.loads(get_core(core_version).speakers) - ) - speakers = filter_speakers_and_styles(speakers, speaker_or_singer) - for i in range(len(speakers)): - if speakers[i].speaker_uuid == speaker_uuid: - speaker = speakers[i] - break - else: - raise HTTPException(status_code=404, detail="該当する話者が見つかりません") - - try: - speaker_path = root_dir / "speaker_info" / speaker_uuid - # 話者情報の取得 - # speaker policy - policy_path = speaker_path / "policy.md" - policy = policy_path.read_text("utf-8") - # speaker portrait - portrait_path = speaker_path / "portrait.png" - portrait = b64encode_str(portrait_path.read_bytes()) - # スタイル情報の取得 - style_infos = [] - for style in speaker.styles: - id = style.id - # style icon - style_icon_path = speaker_path / "icons" / f"{id}.png" - icon = b64encode_str(style_icon_path.read_bytes()) - # style portrait - style_portrait_path = speaker_path / "portraits" / f"{id}.png" - style_portrait = None - if style_portrait_path.exists(): - style_portrait = b64encode_str(style_portrait_path.read_bytes()) - # voice samples - voice_samples = [ - b64encode_str( - ( - speaker_path - / "voice_samples/{}_{}.wav".format(id, str(j + 1).zfill(3)) - ).read_bytes() - ) - for j in range(3) - ] - style_infos.append( - { - "id": id, - "icon": icon, - "portrait": style_portrait, - "voice_samples": voice_samples, - } - ) - except FileNotFoundError: - import traceback - - traceback.print_exc() - raise HTTPException( - status_code=500, detail="追加情報が見つかりませんでした" - ) - - ret_data = SpeakerInfo( - policy=policy, - portrait=portrait, - style_infos=style_infos, - ) - return ret_data - - @app.get("/singers", response_model=list[Speaker], tags=["その他"]) - def singers( - core_version: str | None = None, - ) -> list[Speaker]: - singers = metas_store.load_combined_metas(get_core(core_version)) - return filter_speakers_and_styles(singers, "singer") - - @app.get("/singer_info", response_model=SpeakerInfo, tags=["その他"]) - def singer_info( - speaker_uuid: str, - core_version: str | None = None, - ) -> SpeakerInfo: - """ - 指定されたspeaker_uuidに関する情報をjson形式で返します。 - 画像や音声はbase64エンコードされたものが返されます。 - """ - return _speaker_info( - speaker_uuid=speaker_uuid, - speaker_or_singer="singer", - core_version=core_version, - ) + app.include_router(character.router(get_core, metas_store, root_dir)) if engine_manifest_data.supported_features.manage_library: diff --git a/voicevox_engine/app/__init__.py b/voicevox_engine/app/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/voicevox_engine/app/routers/__init__.py b/voicevox_engine/app/routers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/voicevox_engine/app/routers/character.py b/voicevox_engine/app/routers/character.py new file mode 100644 index 000000000..e095db689 --- /dev/null +++ b/voicevox_engine/app/routers/character.py @@ -0,0 +1,173 @@ +"""キャラクター情報機能を提供する API Router""" + +import base64 +import json +from pathlib import Path +from typing import Literal, Callable + +from fastapi import APIRouter, HTTPException +from pydantic import parse_obj_as + +from voicevox_engine.core.core_adapter import CoreAdapter +from voicevox_engine.metas.MetasStore import ( + MetasStore, + filter_speakers_and_styles, +) +from voicevox_engine.model import ( + Speaker, + SpeakerInfo, +) + + +def b64encode_str(s): + return base64.b64encode(s).decode("utf-8") + + +def router( + get_core: Callable[[str | None], CoreAdapter], + metas_store: MetasStore, + root_dir: Path, +) -> APIRouter: + """キャラクター情報 API Router を生成する""" + _router = APIRouter() + + @_router.get("/speakers", response_model=list[Speaker], tags=["その他"]) + def speakers( + core_version: str | None = None, + ) -> list[Speaker]: + speakers = metas_store.load_combined_metas(get_core(core_version)) + return filter_speakers_and_styles(speakers, "speaker") + + @_router.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"]) + def speaker_info( + speaker_uuid: str, + core_version: str | None = None, + ) -> SpeakerInfo: + """ + 指定されたspeaker_uuidに関する情報をjson形式で返します。 + 画像や音声はbase64エンコードされたものが返されます。 + """ + return _speaker_info( + speaker_uuid=speaker_uuid, + speaker_or_singer="speaker", + core_version=core_version, + ) + + # FIXME: この関数をどこかに切り出す + def _speaker_info( + speaker_uuid: str, + speaker_or_singer: Literal["speaker", "singer"], + core_version: str | None, + ) -> SpeakerInfo: + # エンジンに含まれる話者メタ情報は、次のディレクトリ構造に従わなければならない: + # {root_dir}/ + # speaker_info/ + # {speaker_uuid_0}/ + # policy.md + # portrait.png + # icons/ + # {id_0}.png + # {id_1}.png + # ... + # portraits/ + # {id_0}.png + # {id_1}.png + # ... + # voice_samples/ + # {id_0}_001.wav + # {id_0}_002.wav + # {id_0}_003.wav + # {id_1}_001.wav + # ... + # {speaker_uuid_1}/ + # ... + + # 該当話者の検索 + speakers = parse_obj_as( + list[Speaker], json.loads(get_core(core_version).speakers) + ) + speakers = filter_speakers_and_styles(speakers, speaker_or_singer) + for i in range(len(speakers)): + if speakers[i].speaker_uuid == speaker_uuid: + speaker = speakers[i] + break + else: + raise HTTPException(status_code=404, detail="該当する話者が見つかりません") + + try: + speaker_path = root_dir / "speaker_info" / speaker_uuid + # 話者情報の取得 + # speaker policy + policy_path = speaker_path / "policy.md" + policy = policy_path.read_text("utf-8") + # speaker portrait + portrait_path = speaker_path / "portrait.png" + portrait = b64encode_str(portrait_path.read_bytes()) + # スタイル情報の取得 + style_infos = [] + for style in speaker.styles: + id = style.id + # style icon + style_icon_path = speaker_path / "icons" / f"{id}.png" + icon = b64encode_str(style_icon_path.read_bytes()) + # style portrait + style_portrait_path = speaker_path / "portraits" / f"{id}.png" + style_portrait = None + if style_portrait_path.exists(): + style_portrait = b64encode_str(style_portrait_path.read_bytes()) + # voice samples + voice_samples = [ + b64encode_str( + ( + speaker_path + / "voice_samples/{}_{}.wav".format(id, str(j + 1).zfill(3)) + ).read_bytes() + ) + for j in range(3) + ] + style_infos.append( + { + "id": id, + "icon": icon, + "portrait": style_portrait, + "voice_samples": voice_samples, + } + ) + except FileNotFoundError: + import traceback + + traceback.print_exc() + raise HTTPException( + status_code=500, detail="追加情報が見つかりませんでした" + ) + + ret_data = SpeakerInfo( + policy=policy, + portrait=portrait, + style_infos=style_infos, + ) + return ret_data + + @_router.get("/singers", response_model=list[Speaker], tags=["その他"]) + def singers( + core_version: str | None = None, + ) -> list[Speaker]: + singers = metas_store.load_combined_metas(get_core(core_version)) + return filter_speakers_and_styles(singers, "singer") + + @_router.get("/singer_info", response_model=SpeakerInfo, tags=["その他"]) + def singer_info( + speaker_uuid: str, + core_version: str | None = None, + ) -> SpeakerInfo: + """ + 指定されたspeaker_uuidに関する情報をjson形式で返します。 + 画像や音声はbase64エンコードされたものが返されます。 + """ + return _speaker_info( + speaker_uuid=speaker_uuid, + speaker_or_singer="singer", + core_version=core_version, + ) + + return _router From 13951e60235eba0ad90cbb75b9893603de789839 Mon Sep 17 00:00:00 2001 From: tarepan Date: Sat, 30 Mar 2024 18:00:56 +0000 Subject: [PATCH 2/4] fix: lint --- voicevox_engine/app/routers/character.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/voicevox_engine/app/routers/character.py b/voicevox_engine/app/routers/character.py index e095db689..a09bffebb 100644 --- a/voicevox_engine/app/routers/character.py +++ b/voicevox_engine/app/routers/character.py @@ -3,20 +3,14 @@ import base64 import json from pathlib import Path -from typing import Literal, Callable +from typing import Callable, Literal from fastapi import APIRouter, HTTPException from pydantic import parse_obj_as from voicevox_engine.core.core_adapter import CoreAdapter -from voicevox_engine.metas.MetasStore import ( - MetasStore, - filter_speakers_and_styles, -) -from voicevox_engine.model import ( - Speaker, - SpeakerInfo, -) +from voicevox_engine.metas.MetasStore import MetasStore, filter_speakers_and_styles +from voicevox_engine.model import Speaker, SpeakerInfo def b64encode_str(s): From 9d06453cf8ca0c2e03c88afcd3e311b22fb13350 Mon Sep 17 00:00:00 2001 From: tarepan Date: Sun, 31 Mar 2024 16:42:07 +0000 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20`router`=20=E5=90=8D=E3=82=92?= =?UTF-8?q?=E6=98=8E=E7=9E=AD=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run.py | 2 +- voicevox_engine/app/routers/character.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/run.py b/run.py index 106731d9a..b9c3fbc9b 100644 --- a/run.py +++ b/run.py @@ -837,7 +837,7 @@ async def core_versions() -> Response: media_type="application/json", ) - app.include_router(character.router(get_core, metas_store, root_dir)) + app.include_router(character.generate_router(get_core, metas_store, root_dir)) if engine_manifest_data.supported_features.manage_library: diff --git a/voicevox_engine/app/routers/character.py b/voicevox_engine/app/routers/character.py index a09bffebb..dc3fc34d5 100644 --- a/voicevox_engine/app/routers/character.py +++ b/voicevox_engine/app/routers/character.py @@ -17,22 +17,22 @@ def b64encode_str(s): return base64.b64encode(s).decode("utf-8") -def router( +def generate_router( get_core: Callable[[str | None], CoreAdapter], metas_store: MetasStore, root_dir: Path, ) -> APIRouter: """キャラクター情報 API Router を生成する""" - _router = APIRouter() + router = APIRouter() - @_router.get("/speakers", response_model=list[Speaker], tags=["その他"]) + @router.get("/speakers", response_model=list[Speaker], tags=["その他"]) def speakers( core_version: str | None = None, ) -> list[Speaker]: speakers = metas_store.load_combined_metas(get_core(core_version)) return filter_speakers_and_styles(speakers, "speaker") - @_router.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"]) + @router.get("/speaker_info", response_model=SpeakerInfo, tags=["その他"]) def speaker_info( speaker_uuid: str, core_version: str | None = None, @@ -142,14 +142,14 @@ def _speaker_info( ) return ret_data - @_router.get("/singers", response_model=list[Speaker], tags=["その他"]) + @router.get("/singers", response_model=list[Speaker], tags=["その他"]) def singers( core_version: str | None = None, ) -> list[Speaker]: singers = metas_store.load_combined_metas(get_core(core_version)) return filter_speakers_and_styles(singers, "singer") - @_router.get("/singer_info", response_model=SpeakerInfo, tags=["その他"]) + @router.get("/singer_info", response_model=SpeakerInfo, tags=["その他"]) def singer_info( speaker_uuid: str, core_version: str | None = None, @@ -164,4 +164,4 @@ def singer_info( core_version=core_version, ) - return _router + return router From 2fcff40dc86a324a273302568d1791d133791765 Mon Sep 17 00:00:00 2001 From: Hiroshiba Kazuyuki Date: Mon, 15 Apr 2024 02:46:36 +0900 Subject: [PATCH 4/4] character->speaker --- run.py | 4 ++-- voicevox_engine/app/routers/{character.py => speaker.py} | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) rename voicevox_engine/app/routers/{character.py => speaker.py} (97%) diff --git a/run.py b/run.py index f35232927..0b6f61433 100644 --- a/run.py +++ b/run.py @@ -32,7 +32,7 @@ check_disabled_mutable_api, deprecated_mutable_api, ) -from voicevox_engine.app.routers import character, preset, setting, user_dict +from voicevox_engine.app.routers import preset, setting, speaker, user_dict from voicevox_engine.cancellable_engine import CancellableEngine from voicevox_engine.core.core_adapter import CoreAdapter from voicevox_engine.core.core_initializer import initialize_cores @@ -743,7 +743,7 @@ async def core_versions() -> Response: media_type="application/json", ) - app.include_router(character.generate_router(get_core, metas_store, root_dir)) + app.include_router(speaker.generate_router(get_core, metas_store, root_dir)) if engine_manifest_data.supported_features.manage_library: diff --git a/voicevox_engine/app/routers/character.py b/voicevox_engine/app/routers/speaker.py similarity index 97% rename from voicevox_engine/app/routers/character.py rename to voicevox_engine/app/routers/speaker.py index 2bb68a9ec..1e539224b 100644 --- a/voicevox_engine/app/routers/character.py +++ b/voicevox_engine/app/routers/speaker.py @@ -1,4 +1,4 @@ -"""キャラクター情報機能を提供する API Router""" +"""話者情報機能を提供する API Router""" import base64 import json @@ -22,7 +22,7 @@ def generate_router( metas_store: MetasStore, root_dir: Path, ) -> APIRouter: - """キャラクター情報 API Router を生成する""" + """話者情報 API Router を生成する""" router = APIRouter() @router.get("/speakers", response_model=list[Speaker], tags=["その他"])