Skip to content

Commit

Permalink
Synthesizerの構造改革をする
Browse files Browse the repository at this point in the history
  • Loading branch information
qryxip committed Nov 18, 2023
1 parent ae4a45d commit 04b3787
Show file tree
Hide file tree
Showing 25 changed files with 1,153 additions and 1,087 deletions.
1 change: 1 addition & 0 deletions crates/voicevox_core/src/__internal.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod doctest_fixtures;
pub mod interp;

// VOICEVOX CORE内のラッパー向けの実装
// FIXME: 要議論: https://github.com/VOICEVOX/voicevox_core/issues/595
Expand Down
2 changes: 1 addition & 1 deletion crates/voicevox_core/src/__internal/doctest_fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{AccelerationMode, InitializeOptions, OpenJtalk, Synthesizer, VoiceMo

pub async fn synthesizer_with_sample_voice_model(
open_jtalk_dic_dir: impl AsRef<Path>,
) -> anyhow::Result<Synthesizer> {
) -> anyhow::Result<Synthesizer<Arc<OpenJtalk>>> {
let syntesizer = Synthesizer::new(
Arc::new(OpenJtalk::new(open_jtalk_dic_dir).unwrap()),
&InitializeOptions {
Expand Down
46 changes: 46 additions & 0 deletions crates/voicevox_core/src/__internal/interp.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use easy_ext::ext;
use ndarray::{Array1, ArrayView1, ArrayView2};

use crate::{StyleId, Synthesizer};

#[ext(PerformInference)]
impl Synthesizer<()> {
pub fn predict_duration(
&self,
phoneme_list: Array1<i64>,
style_id: StyleId,
) -> crate::Result<Vec<f32>> {
self.predict_duration(phoneme_list, style_id)
}

#[allow(clippy::too_many_arguments)]
pub fn predict_intonation(
&self,
vowel_phoneme_list: Array1<i64>,
consonant_phoneme_list: Array1<i64>,
start_accent_list: Array1<i64>,
end_accent_list: Array1<i64>,
start_accent_phrase_list: Array1<i64>,
end_accent_phrase_list: Array1<i64>,
style_id: StyleId,
) -> crate::Result<Vec<f32>> {
self.predict_intonation(
vowel_phoneme_list,
consonant_phoneme_list,
start_accent_list,
end_accent_list,
start_accent_phrase_list,
end_accent_phrase_list,
style_id,
)
}

pub fn decode(
&self,
f0: ArrayView1<'_, f32>,
phoneme: ArrayView2<'_, f32>,
style_id: StyleId,
) -> crate::Result<Vec<f32>> {
self.decode(f0, phoneme, style_id)
}
}
10 changes: 4 additions & 6 deletions crates/voicevox_core/src/engine/acoustic_feature_extractor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use std::collections::HashMap;

#[rustfmt::skip]
const PHONEME_LIST: &[&str] = &[
const PHONEME_LIST: [&str; 45] = [
"pau",
"A",
"E",
Expand Down Expand Up @@ -70,9 +70,7 @@ pub struct OjtPhoneme {
}

impl OjtPhoneme {
pub fn num_phoneme() -> usize {
PHONEME_MAP.len()
}
pub(crate) const NUM_PHONEME: usize = PHONEME_LIST.len();

pub fn space_phoneme() -> String {
"pau".into()
Expand Down Expand Up @@ -134,8 +132,8 @@ mod tests {
}

#[rstest]
fn test_num_phoneme_works() {
assert_eq!(OjtPhoneme::num_phoneme(), 45);
fn test_phoneme_map_has_enough_elements() {
assert_eq!(OjtPhoneme::NUM_PHONEME, PHONEME_MAP.len());
}

#[rstest]
Expand Down
3 changes: 1 addition & 2 deletions crates/voicevox_core/src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@ mod kana_parser;
mod model;
mod mora_list;
mod open_jtalk;
mod synthesis_engine;

use super::*;

pub use self::acoustic_feature_extractor::*;
pub use self::full_context_label::*;
pub use self::kana_parser::*;
pub use self::model::*;
pub(crate) use self::mora_list::mora2text;
pub use self::open_jtalk::OpenJtalk;
pub use self::synthesis_engine::*;
75 changes: 25 additions & 50 deletions crates/voicevox_core/src/engine/open_jtalk.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use std::io::Write;
use std::{
path::{Path, PathBuf},
sync::Mutex,
};
use std::{path::Path, sync::Mutex};

use anyhow::anyhow;
use tempfile::NamedTempFile;
Expand All @@ -22,7 +19,7 @@ pub(crate) struct OpenjtalkFunctionError {
/// テキスト解析器としてのOpen JTalk。
pub struct OpenJtalk {
resources: Mutex<Resources>,
dict_dir: Option<PathBuf>,
dict_dir: String,
}

struct Resources {
Expand All @@ -35,37 +32,34 @@ struct Resources {
unsafe impl Send for Resources {}

impl OpenJtalk {
// FIXME: この関数は廃止し、`Synthesizer`は`Option<OpenJtalk>`という形でこの構造体を持つ
pub fn new_without_dic() -> Self {
Self {
resources: Mutex::new(Resources {
mecab: ManagedResource::initialize(),
njd: ManagedResource::initialize(),
jpcommon: ManagedResource::initialize(),
}),
dict_dir: None,
}
}
pub fn new(open_jtalk_dict_dir: impl AsRef<Path>) -> crate::result::Result<Self> {
let mut s = Self::new_without_dic();
s.load(open_jtalk_dict_dir).map_err(|()| {
// FIXME: 「システム辞書を読もうとしたけど読めなかった」というエラーをちゃんと用意する
ErrorRepr::NotLoadedOpenjtalkDict
})?;
Ok(s)
let mut resources = Resources {
mecab: ManagedResource::initialize(),
njd: ManagedResource::initialize(),
jpcommon: ManagedResource::initialize(),
};
let dict_dir = open_jtalk_dict_dir
.as_ref()
.to_str()
.unwrap_or_else(|| todo!("Rust APIでは`Utf8Path`で受けるようにする"))
.to_owned();

let result = resources.mecab.load(&dict_dir);
if !result {
return Err(ErrorRepr::LoadOpenjtalkSystemDic(dict_dir).into());
}

Ok(Self {
resources: resources.into(),
dict_dir,
})
}

// 先に`load`を呼ぶ必要がある。
/// ユーザー辞書を設定する。
///
/// この関数を呼び出した後にユーザー辞書を変更した場合は、再度この関数を呼ぶ必要がある。
pub fn use_user_dict(&self, user_dict: &UserDict) -> crate::result::Result<()> {
let dict_dir = self
.dict_dir
.as_ref()
.and_then(|dict_dir| dict_dir.to_str())
.ok_or(ErrorRepr::NotLoadedOpenjtalkDict)?;

// ユーザー辞書用のcsvを作成
let mut temp_csv = NamedTempFile::new().map_err(|e| ErrorRepr::UseUserDict(e.into()))?;
temp_csv
Expand All @@ -80,7 +74,7 @@ impl OpenJtalk {
mecab_dict_index(&[
"mecab-dict-index",
"-d",
dict_dir,
&self.dict_dir,
"-u",
temp_dict_path.to_str().unwrap(),
"-f",
Expand All @@ -93,7 +87,8 @@ impl OpenJtalk {

let Resources { mecab, .. } = &mut *self.resources.lock().unwrap();

let result = mecab.load_with_userdic(Path::new(dict_dir), Some(Path::new(&temp_dict_path)));
let result =
mecab.load_with_userdic(self.dict_dir.as_ref(), Some(Path::new(&temp_dict_path)));

if !result {
return Err(ErrorRepr::UseUserDict(anyhow!("辞書のコンパイルに失敗しました")).into());
Expand Down Expand Up @@ -150,26 +145,6 @@ impl OpenJtalk {
})
}
}

fn load(&mut self, open_jtalk_dict_dir: impl AsRef<Path>) -> std::result::Result<(), ()> {
let result = self
.resources
.lock()
.unwrap()
.mecab
.load(open_jtalk_dict_dir.as_ref());
if result {
self.dict_dir = Some(open_jtalk_dict_dir.as_ref().into());
Ok(())
} else {
self.dict_dir = None;
Err(())
}
}

pub fn dict_loaded(&self) -> bool {
self.dict_dir.is_some()
}
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 04b3787

Please sign in to comment.