Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

整理: 無音付加を CoreAdapter へ移植 #999

Merged
merged 1 commit into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion voicevox_engine/core_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,17 @@ def safe_yukarin_sa_forward(
end_accent_phrase_list: NDArray[np.int64],
style_id: StyleId,
) -> NDArray[np.float32]:
# 「指定スタイルを初期化」「mutexによる安全性」「系列長・データ型に関するアダプター」を提供する
# 「指定スタイルを初期化」「mutexによる安全性」「コア仕様に従う無音自動付加」「系列長・データ型に関するアダプター」を提供する
self.initialize_style_id_synthesis(style_id, skip_reinit=True)

# 前後無音を付加する(詳細: voicevox_engine#924)
vowel_phoneme_list = np.r_[0, vowel_phoneme_list, 0]
consonant_phoneme_list = np.r_[-1, consonant_phoneme_list, -1]
start_accent_list = np.r_[0, start_accent_list, 0]
end_accent_list = np.r_[0, end_accent_list, 0]
start_accent_phrase_list = np.r_[0, start_accent_phrase_list, 0]
end_accent_phrase_list = np.r_[0, end_accent_phrase_list, 0]

with self.mutex:
f0_list = self.core.yukarin_sa_forward(
length=vowel_phoneme_list.shape[0],
Expand All @@ -102,6 +111,10 @@ def safe_yukarin_sa_forward(
end_accent_phrase_list=end_accent_phrase_list[np.newaxis],
style_id=np.array(style_id, dtype=np.int64).reshape(-1),
)[0]

# 前後無音に相当する領域を破棄する
f0_list = f0_list[1:-1]

return f0_list

def safe_decode_forward(
Expand Down
13 changes: 3 additions & 10 deletions voicevox_engine/tts_pipeline/tts_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,18 +317,11 @@ def update_pitch(
[_create_one_hot(accent_phrase, -1) for accent_phrase in accent_phrases]
)

# 前後無音を付加する
start_accent_list = np.r_[0, start_accent_list, 0]
end_accent_list = np.r_[0, end_accent_list, 0]
start_accent_phrase_list = np.r_[0, start_accent_phrase_list, 0]
end_accent_phrase_list = np.r_[0, end_accent_phrase_list, 0]

# アクセント句系列から(前後の無音含まない)モーラ系列と(前後の無音含む)音素系列を抽出する
# アクセント句系列からモーラ系列と音素系列を抽出する
moras = to_flatten_moras(accent_phrases)
phonemes = to_flatten_phonemes(moras)
phonemes = [Phoneme("pau")] + phonemes + [Phoneme("pau")]

# 前後無音付加済みの音素系列から子音ID系列・母音ID系列を抽出する
# 音素系列から子音ID系列・母音ID系列を抽出する
consonants, vowels = split_mora(phonemes)
vowel_ids = np.array([p.phoneme_id for p in vowels], dtype=np.int64)
consonant_ids = np.array(
Expand All @@ -353,7 +346,7 @@ def update_pitch(

# 更新する
for i, mora in enumerate(moras):
mora.pitch = f0[i + 1]
mora.pitch = f0[i]

return accent_phrases

Expand Down