forked from VOICEVOX/voicevox_engine
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into refactor/tts_engine_format
- Loading branch information
Showing
16 changed files
with
299 additions
and
313 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,130 +1,96 @@ | ||
import uuid | ||
|
||
from voicevox_engine.metas.Metas import Speaker, SpeakerStyle, StyleId, StyleType | ||
from voicevox_engine.metas.Metas import ( | ||
SpeakerStyle, | ||
SpeakerSupportedFeatures, | ||
StyleId, | ||
StyleType, | ||
) | ||
from voicevox_engine.metas.MetasStore import ( | ||
SING_STYLE_TYPES, | ||
TALK_STYLE_TYPES, | ||
_SING_STYLE_TYPES, | ||
_TALK_STYLE_TYPES, | ||
Character, | ||
filter_characters_and_styles, | ||
) | ||
|
||
|
||
def _speakers_to_characters(speakers: list[Speaker]) -> list[Character]: | ||
"""Speaker 配列をキャラクター配列へキャストする。""" | ||
characters: list[Character] = [] | ||
for speaker in speakers: | ||
styles = speaker.styles | ||
talk_styles = filter(lambda style: style.type in TALK_STYLE_TYPES, styles) | ||
sing_styles = filter(lambda style: style.type in SING_STYLE_TYPES, styles) | ||
characters.append( | ||
Character( | ||
name=speaker.name, | ||
uuid=speaker.speaker_uuid, | ||
talk_styles=list(talk_styles), | ||
sing_styles=list(sing_styles), | ||
version=speaker.version, | ||
supported_features=speaker.supported_features, | ||
) | ||
) | ||
return characters | ||
|
||
|
||
def _gen_speaker(style_types: list[StyleType]) -> Speaker: | ||
return Speaker( | ||
speaker_uuid=str(uuid.uuid4()), | ||
def _gen_character(style_types: list[StyleType]) -> Character: | ||
talk_styles = list(filter(lambda s: s in _TALK_STYLE_TYPES, style_types)) | ||
sing_styles = list(filter(lambda s: s in _SING_STYLE_TYPES, style_types)) | ||
return Character( | ||
name="", | ||
styles=[ | ||
SpeakerStyle( | ||
name="", | ||
id=StyleId(0), | ||
type=style_type, | ||
) | ||
for style_type in style_types | ||
uuid=str(uuid.uuid4()), | ||
talk_styles=[ | ||
SpeakerStyle(name="", id=StyleId(0), type=style_type) | ||
for style_type in talk_styles | ||
], | ||
sing_styles=[ | ||
SpeakerStyle(name="", id=StyleId(0), type=style_type) | ||
for style_type in sing_styles | ||
], | ||
version="", | ||
supported_features=SpeakerSupportedFeatures(), | ||
) | ||
|
||
|
||
def _equal_speakers(a: list[Speaker], b: list[Speaker]) -> bool: | ||
def _equal_characters(a: list[Character], b: list[Character]) -> bool: | ||
if len(a) != len(b): | ||
return False | ||
for i in range(len(a)): | ||
if a[i].speaker_uuid != b[i].speaker_uuid: | ||
if a[i].uuid != b[i].uuid: | ||
return False | ||
return True | ||
|
||
|
||
def test_filter_speakers_and_styles_with_speaker() -> None: | ||
def test_filter_characters_and_styles_with_talk() -> None: | ||
# Inputs | ||
speaker_talk_only = _gen_speaker(["talk"]) | ||
speaker_singing_teacher_only = _gen_speaker(["singing_teacher"]) | ||
speaker_frame_decode_only = _gen_speaker(["frame_decode"]) | ||
speaker_sing_only = _gen_speaker(["sing"]) | ||
speaker_allstyle = _gen_speaker(["talk", "singing_teacher", "frame_decode", "sing"]) | ||
talk_only = _gen_character(["talk"]) | ||
singing_teacher_only = _gen_character(["singing_teacher"]) | ||
frame_decode_only = _gen_character(["frame_decode"]) | ||
sing_only = _gen_character(["sing"]) | ||
allstyle = _gen_character(["talk", "singing_teacher", "frame_decode", "sing"]) | ||
|
||
# Outputs | ||
result = filter_characters_and_styles( | ||
_speakers_to_characters( | ||
[ | ||
speaker_talk_only, | ||
speaker_singing_teacher_only, | ||
speaker_frame_decode_only, | ||
speaker_sing_only, | ||
speaker_allstyle, | ||
] | ||
), | ||
"speaker", | ||
[talk_only, singing_teacher_only, frame_decode_only, sing_only, allstyle], | ||
"talk", | ||
) | ||
|
||
# Tests | ||
assert len(result) == 2 | ||
|
||
# 話者だけになっている | ||
assert _equal_speakers(result, [speaker_talk_only, speaker_allstyle]) | ||
assert _equal_characters(result, [talk_only, allstyle]) | ||
|
||
# スタイルがフィルタリングされている | ||
for speaker in result: | ||
for style in speaker.styles: | ||
for characters in result: | ||
for style in characters.talk_styles + characters.sing_styles: | ||
assert style.type == "talk" | ||
|
||
|
||
def test_filter_speakers_and_styles_with_singer() -> None: | ||
def test_filter_characters_and_styles_with_sing() -> None: | ||
# Inputs | ||
speaker_talk_only = _gen_speaker(["talk"]) | ||
speaker_singing_teacher_only = _gen_speaker(["singing_teacher"]) | ||
speaker_frame_decode_only = _gen_speaker(["frame_decode"]) | ||
speaker_sing_only = _gen_speaker(["sing"]) | ||
speaker_allstyle = _gen_speaker(["talk", "singing_teacher", "frame_decode", "sing"]) | ||
talk_only = _gen_character(["talk"]) | ||
singing_teacher_only = _gen_character(["singing_teacher"]) | ||
frame_decode_only = _gen_character(["frame_decode"]) | ||
sing_only = _gen_character(["sing"]) | ||
allstyle = _gen_character(["talk", "singing_teacher", "frame_decode", "sing"]) | ||
|
||
# Outputs | ||
result = filter_characters_and_styles( | ||
_speakers_to_characters( | ||
[ | ||
speaker_talk_only, | ||
speaker_singing_teacher_only, | ||
speaker_frame_decode_only, | ||
speaker_sing_only, | ||
speaker_allstyle, | ||
] | ||
), | ||
"singer", | ||
[talk_only, singing_teacher_only, frame_decode_only, sing_only, allstyle], | ||
"sing", | ||
) | ||
|
||
# Tests | ||
assert len(result) == 4 | ||
|
||
# 歌手だけになっている | ||
assert _equal_speakers( | ||
result, | ||
[ | ||
speaker_singing_teacher_only, | ||
speaker_frame_decode_only, | ||
speaker_sing_only, | ||
speaker_allstyle, | ||
], | ||
assert _equal_characters( | ||
result, [singing_teacher_only, frame_decode_only, sing_only, allstyle] | ||
) | ||
|
||
# スタイルがフィルタリングされている | ||
for speaker in result: | ||
for style in speaker.styles: | ||
for character in result: | ||
for style in character.talk_styles + character.sing_styles: | ||
assert style.type in ["singing_teacher", "frame_decode", "sing"] |
Oops, something went wrong.