diff --git a/_typos.toml b/_typos.toml
index 4c3b0f48b..111d084fa 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -12,4 +12,4 @@ NdArray="NdArray" # onnxruntime::session::NdArray
 [default.extend-words]
 
 [files]
-extend-exclude = ["*.svg"]
+extend-exclude = ["*.svg", "*.onnx"]
diff --git a/crates/voicevox_core/src/infer.rs b/crates/voicevox_core/src/infer.rs
index 589c51777..49cbb476e 100644
--- a/crates/voicevox_core/src/infer.rs
+++ b/crates/voicevox_core/src/infer.rs
@@ -197,23 +197,33 @@ pub(crate) trait OutputScalar: Sized {
     fn extract(tensor: OutputTensor) -> std::result::Result<ArrayD<Self>, ExtractError>;
 }
 
-impl OutputScalar for f32 {
-    const KIND: OutputScalarKind = OutputScalarKind::Float32;
+#[duplicate_item(
+    T        Kind;
+    [ i64 ] [ Int64 ];
+    [ f32 ] [ Float32 ];
+)]
+impl OutputScalar for T {
+    const KIND: OutputScalarKind = OutputScalarKind::Kind;
 
     fn extract(tensor: OutputTensor) -> std::result::Result<ArrayD<Self>, ExtractError> {
         match tensor {
-            OutputTensor::Float32(tensor) => Ok(tensor),
+            OutputTensor::Kind(tensor) => Ok(tensor),
+            _ => Err(ExtractError::Datatype),
         }
     }
 }
 
 #[derive(Clone, Copy, PartialEq, derive_more::Display)]
 pub(crate) enum OutputScalarKind {
+    #[display("int64_t")]
+    Int64,
+
     #[display("float")]
     Float32,
 }
 
 pub(crate) enum OutputTensor {
+    Int64(ArrayD<i64>),
     Float32(ArrayD<f32>),
 }
 
@@ -246,8 +256,12 @@ pub(crate) struct InferenceSessionOptions {
     pub(crate) device: DeviceSpec,
 }
 
+// TODO: `ShapeError`を直接扱い、データ型違いはパニックにすべきでは？
 #[derive(Error, Debug)]
 pub(crate) enum ExtractError {
+    #[error("wrong datatype")]
+    Datatype,
+
     #[error(transparent)]
     Shape(#[from] ShapeError),
 }
diff --git a/crates/voicevox_core/src/infer/domains.rs b/crates/voicevox_core/src/infer/domains.rs
index 8383d931c..f2f3e607a 100644
--- a/crates/voicevox_core/src/infer/domains.rs
+++ b/crates/voicevox_core/src/infer/domains.rs
@@ -1,75 +1,134 @@
+mod frame_decode;
+mod singing_teacher;
 mod talk;
 
 use educe::Educe;
 use serde::{Deserialize, Deserializer};
 
-pub(crate) use self::talk::{
-    GenerateFullIntermediateInput, GenerateFullIntermediateOutput, PredictDurationInput,
-    PredictDurationOutput, PredictIntonationInput, PredictIntonationOutput,
-    RenderAudioSegmentInput, RenderAudioSegmentOutput, TalkDomain, TalkOperation,
+pub(crate) use self::{
+    frame_decode::{FrameDecodeDomain, FrameDecodeOperation, SfDecodeInput, SfDecodeOutput},
+    singing_teacher::{
+        PredictSingConsonantLengthInput, PredictSingConsonantLengthOutput, PredictSingF0Input,
+        PredictSingF0Output, PredictSingVolumeInput, PredictSingVolumeOutput, SingingTeacherDomain,
+        SingingTeacherOperation,
+    },
+    talk::{
+        GenerateFullIntermediateInput, GenerateFullIntermediateOutput, PredictDurationInput,
+        PredictDurationOutput, PredictIntonationInput, PredictIntonationOutput,
+        RenderAudioSegmentInput, RenderAudioSegmentOutput, TalkDomain, TalkOperation,
+    },
 };
 
 #[derive(Educe)]
 // TODO: `bounds`に`V: ?Sized`も入れようとすると、よくわからない理由で弾かれる。最新版のeduce
 // でもそうなのか？また最新版でも駄目だとしたら、弾いている理由は何なのか？
-#[educe(Clone(bound = "V: InferenceDomainMapValues, V::Talk: Clone"))]
+#[educe(Clone(
+    bound = "V: InferenceDomainMapValues, V::Talk: Clone, V::SingingTeacher: Clone, V::FrameDecode: Clone"
+))]
 pub(crate) struct InferenceDomainMap<V: InferenceDomainMapValues + ?Sized> {
     pub(crate) talk: V::Talk,
+    pub(crate) singing_teacher: V::SingingTeacher,
+    pub(crate) frame_decode: V::FrameDecode,
 }
 
-impl<T> InferenceDomainMap<(T,)> {
-    pub(crate) fn each_ref(&self) -> InferenceDomainMap<(&T,)> {
+impl<T, S, F> InferenceDomainMap<(T, S, F)> {
+    pub(crate) fn each_ref(&self) -> InferenceDomainMap<(&T, &S, &F)> {
         let talk = &self.talk;
-        InferenceDomainMap { talk }
+        let singing_teacher = &self.singing_teacher;
+        let frame_decode = &self.frame_decode;
+        InferenceDomainMap {
+            talk,
+            singing_teacher,
+            frame_decode,
+        }
     }
 
-    pub(crate) fn map<T2, Ft: FnOnce(T) -> T2>(
+    pub(crate) fn map<T2, S2, F2, Ft: FnOnce(T) -> T2, Fs: FnOnce(S) -> S2, Ff: FnOnce(F) -> F2>(
         self,
-        fs: InferenceDomainMap<(Ft,)>,
-    ) -> InferenceDomainMap<(T2,)> {
+        fs: InferenceDomainMap<(Ft, Fs, Ff)>,
+    ) -> InferenceDomainMap<(T2, S2, F2)> {
         let talk = (fs.talk)(self.talk);
-        InferenceDomainMap { talk }
+        let singing_teacher = (fs.singing_teacher)(self.singing_teacher);
+        let frame_decode = (fs.frame_decode)(self.frame_decode);
+        InferenceDomainMap {
+            talk,
+            singing_teacher,
+            frame_decode,
+        }
     }
 }
 
-impl<T, E> InferenceDomainMap<(Result<T, E>,)> {
-    pub(crate) fn collect(self) -> Result<InferenceDomainMap<(T,)>, E> {
+impl<T, S, F, E> InferenceDomainMap<(Result<T, E>, Result<S, E>, Result<F, E>)> {
+    pub(crate) fn collect(self) -> Result<InferenceDomainMap<(T, S, F)>, E> {
         let talk = self.talk?;
-        Ok(InferenceDomainMap { talk })
+        let singing_teacher = self.singing_teacher?;
+        let frame_decode = self.frame_decode?;
+        Ok(InferenceDomainMap {
+            talk,
+            singing_teacher,
+            frame_decode,
+        })
     }
 }
 
 impl<'de, V: InferenceDomainMapValues + ?Sized> Deserialize<'de> for InferenceDomainMap<V>
 where
     V::Talk: Deserialize<'de>,
+    V::SingingTeacher: Deserialize<'de>,
+    V::FrameDecode: Deserialize<'de>,
 {
     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     where
         D: Deserializer<'de>,
     {
-        let Repr { talk } = Repr::deserialize(deserializer)?;
-        return Ok(Self { talk });
+        let Repr {
+            talk,
+            singing_teacher,
+            frame_decode,
+        } = Repr::deserialize(deserializer)?;
+        return Ok(Self {
+            talk,
+            singing_teacher,
+            frame_decode,
+        });
 
         #[derive(Deserialize)]
-        struct Repr<T> {
+        struct Repr<T, S, F> {
             talk: T,
+            singing_teacher: S,
+            frame_decode: F,
         }
     }
 }
 
 pub(crate) trait InferenceDomainMapValues {
     type Talk;
+    type SingingTeacher;
+    type FrameDecode;
 }
 
-impl<T> InferenceDomainMapValues for (T,) {
+impl<T, S, F> InferenceDomainMapValues for (T, S, F) {
     type Talk = T;
+    type SingingTeacher = S;
+    type FrameDecode = F;
 }
 
 macro_rules! inference_domain_map_values {
     (for<$arg:ident> $body:ty) => {
-        (::macros::substitute_type!(
-            $body where $arg = crate::infer::domains::TalkDomain as crate::infer::InferenceDomain
-        ),)
+        (
+            ::macros::substitute_type!(
+                $body
+                where $arg = crate::infer::domains::TalkDomain as crate::infer::InferenceDomain
+            ),
+            ::macros::substitute_type!(
+                $body
+                where $arg = crate::infer::domains::SingingTeacherDomain as crate::infer::InferenceDomain
+            ),
+            ::macros::substitute_type!(
+                $body
+                where $arg = crate::infer::domains::FrameDecodeDomain as crate::infer::InferenceDomain
+            ),
+        )
     };
 }
 pub(crate) use inference_domain_map_values;
diff --git a/crates/voicevox_core/src/infer/domains/frame_decode.rs b/crates/voicevox_core/src/infer/domains/frame_decode.rs
new file mode 100644
index 000000000..c228196fc
--- /dev/null
+++ b/crates/voicevox_core/src/infer/domains/frame_decode.rs
@@ -0,0 +1,52 @@
+use std::{collections::BTreeSet, sync::LazyLock};
+
+use enum_map::Enum;
+use macros::{InferenceInputSignature, InferenceOperation, InferenceOutputSignature};
+use ndarray::{Array1, Array2};
+
+use crate::{manifest::FrameDecodeManifest, StyleType};
+
+use super::super::{
+    InferenceDomain, InferenceInputSignature as _, InferenceOutputSignature as _, OutputTensor,
+};
+
+pub(crate) enum FrameDecodeDomain {}
+
+impl InferenceDomain for FrameDecodeDomain {
+    type Operation = FrameDecodeOperation;
+    type Manifest = FrameDecodeManifest;
+
+    fn style_types() -> &'static BTreeSet<StyleType> {
+        static STYLE_TYPES: LazyLock<BTreeSet<StyleType>> =
+            LazyLock::new(|| [StyleType::FrameDecode, StyleType::Sing].into());
+        &STYLE_TYPES
+    }
+}
+
+#[derive(Clone, Copy, Enum, InferenceOperation)]
+#[inference_operation(
+    type Domain = FrameDecodeDomain;
+)]
+pub(crate) enum FrameDecodeOperation {
+    #[inference_operation(
+        type Input = SfDecodeInput;
+        type Output = SfDecodeOutput;
+    )]
+    SfDecode,
+}
+
+#[derive(InferenceInputSignature)]
+#[inference_input_signature(
+    type Signature = SfDecode;
+)]
+pub(crate) struct SfDecodeInput {
+    pub(crate) frame_phonemes: Array2<i64>,
+    pub(crate) frame_f0s: Array2<f32>,
+    pub(crate) frame_volumes: Array2<f32>,
+    pub(crate) speaker_id: Array1<i64>,
+}
+
+#[derive(InferenceOutputSignature)]
+pub(crate) struct SfDecodeOutput {
+    pub(crate) wav: Array2<f32>,
+}
diff --git a/crates/voicevox_core/src/infer/domains/singing_teacher.rs b/crates/voicevox_core/src/infer/domains/singing_teacher.rs
new file mode 100644
index 000000000..a2b9add80
--- /dev/null
+++ b/crates/voicevox_core/src/infer/domains/singing_teacher.rs
@@ -0,0 +1,95 @@
+use std::{collections::BTreeSet, sync::LazyLock};
+
+use enum_map::Enum;
+use macros::{InferenceInputSignature, InferenceOperation, InferenceOutputSignature};
+use ndarray::{Array1, Array2};
+
+use crate::{manifest::SingingTeacherManifest, StyleType};
+
+use super::super::{
+    InferenceDomain, InferenceInputSignature as _, InferenceOutputSignature as _, OutputTensor,
+};
+
+pub(crate) enum SingingTeacherDomain {}
+
+impl InferenceDomain for SingingTeacherDomain {
+    type Operation = SingingTeacherOperation;
+    type Manifest = SingingTeacherManifest;
+
+    fn style_types() -> &'static BTreeSet<StyleType> {
+        static STYLE_TYPES: LazyLock<BTreeSet<StyleType>> =
+            LazyLock::new(|| [StyleType::SingingTeacher, StyleType::Sing].into());
+        &STYLE_TYPES
+    }
+}
+
+#[derive(Clone, Copy, Enum, InferenceOperation)]
+#[inference_operation(
+    type Domain = SingingTeacherDomain;
+)]
+pub(crate) enum SingingTeacherOperation {
+    #[inference_operation(
+        type Input = PredictSingConsonantLengthInput;
+        type Output = PredictSingConsonantLengthOutput;
+    )]
+    PredictSingConsonantLength,
+
+    #[inference_operation(
+        type Input = PredictSingF0Input;
+        type Output = PredictSingF0Output;
+    )]
+    PredictSingF0,
+
+    #[inference_operation(
+        type Input = PredictSingVolumeInput;
+        type Output = PredictSingVolumeOutput;
+    )]
+    PredictSingVolume,
+}
+
+#[derive(InferenceInputSignature)]
+#[inference_input_signature(
+    type Signature = PredictSingConsonantLength;
+)]
+pub(crate) struct PredictSingConsonantLengthInput {
+    pub(crate) consonants: Array2<i64>,
+    pub(crate) vowels: Array2<i64>,
+    pub(crate) note_durations: Array2<i64>,
+    pub(crate) speaker_id: Array1<i64>,
+}
+
+#[derive(InferenceOutputSignature)]
+pub(crate) struct PredictSingConsonantLengthOutput {
+    pub(crate) consonant_lengths: Array2<i64>,
+}
+
+#[derive(InferenceInputSignature)]
+#[inference_input_signature(
+    type Signature = PredictSingF0;
+)]
+pub(crate) struct PredictSingF0Input {
+    pub(crate) phonemes: Array2<i64>,
+    pub(crate) notes: Array2<i64>,
+    pub(crate) speaker_id: Array1<i64>,
+}
+
+#[derive(InferenceOutputSignature)]
+pub(crate) struct PredictSingF0Output {
+    pub(crate) f0s: Array2<f32>,
+}
+
+#[derive(InferenceInputSignature)]
+#[inference_input_signature(
+    type Signature = PredictSingVolume;
+)]
+pub(crate) struct PredictSingVolumeInput {
+    pub(crate) phonemes: Array2<i64>,
+    pub(crate) notes: Array2<i64>,
+    pub(crate) frame_f0s: Array2<f32>,
+    pub(crate) speaker_id: Array1<i64>,
+}
+
+#[derive(InferenceOutputSignature)]
+pub(crate) struct PredictSingVolumeOutput {
+    pub(crate) volumes: Array2<f32>,
+}
diff --git a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs
index f2cc4fac7..3cfd11608 100644
--- a/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs
+++ b/crates/voicevox_core/src/infer/runtimes/onnxruntime.rs
@@ -158,7 +158,7 @@ impl InferenceRuntime for self::blocking::Onnxruntime {
                     TensorElementType::Uint16 => Err("ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16"),
                     TensorElementType::Int16 => Err("ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16"),
                     TensorElementType::Int32 => Err("ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32"),
-                    TensorElementType::Int64 => Err("ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64"),
+                    TensorElementType::Int64 => Ok(OutputScalarKind::Int64),
                     TensorElementType::String => Err("ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING"),
                     TensorElementType::Bfloat16 => Err("ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16"),
                     TensorElementType::Float16 => Err("ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16"),
@@ -253,6 +253,10 @@ fn extract_outputs(outputs: &ort::SessionOutputs<'_, '_>) -> anyhow::Result<Vec<
             };
 
             match ty {
+                TensorElementType::Int64 => {
+                    let output = output.try_extract_tensor::<i64>()?;
+                    Ok(OutputTensor::Int64(output.into_owned()))
+                }
                 TensorElementType::Float32 => {
                     let output = output.try_extract_tensor::<f32>()?;
                     Ok(OutputTensor::Float32(output.into_owned()))
diff --git a/crates/voicevox_core/src/manifest.rs b/crates/voicevox_core/src/manifest.rs
index 740254f6f..0c11c3647 100644
--- a/crates/voicevox_core/src/manifest.rs
+++ b/crates/voicevox_core/src/manifest.rs
@@ -13,7 +13,10 @@ use serde::{de, Deserialize, Deserializer, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 
 use crate::{
-    infer::domains::{inference_domain_map_values, InferenceDomainMap, TalkOperation},
+    infer::domains::{
+        inference_domain_map_values, FrameDecodeOperation, InferenceDomainMap,
+        SingingTeacherOperation, TalkOperation,
+    },
     StyleId, VoiceModelId,
 };
 
@@ -82,6 +85,7 @@ pub struct Manifest {
 
 pub(crate) type ManifestDomains = inference_domain_map_values!(for<D> Option<D::Manifest>);
 
+// TODO: #825 が終わったら`singing_teacher`と`frame_decode`のやつと統一する
 #[derive(Deserialize)]
 #[cfg_attr(test, derive(Default))]
 pub(crate) struct TalkManifest {
@@ -92,6 +96,26 @@ pub(crate) struct TalkManifest {
     pub(crate) style_id_to_inner_voice_id: StyleIdToInnerVoiceId,
 }
 
+#[derive(Deserialize)]
+#[cfg_attr(test, derive(Default))]
+pub(crate) struct SingingTeacherManifest {
+    #[serde(flatten)]
+    filenames: EnumMap<SingingTeacherOperationFilenameKey, Arc<str>>,
+
+    #[serde(default)]
+    pub(crate) style_id_to_inner_voice_id: StyleIdToInnerVoiceId,
+}
+
+#[derive(Deserialize)]
+#[cfg_attr(test, derive(Default))]
+pub(crate) struct FrameDecodeManifest {
+    #[serde(flatten)]
+    filenames: EnumMap<FrameDecodeOperationFilenameKey, Arc<str>>,
+
+    #[serde(default)]
+    pub(crate) style_id_to_inner_voice_id: StyleIdToInnerVoiceId,
+}
+
 // TODO: #825 では`TalkOperation`と統合する。`Index`の実装もderive_moreで委譲する
 #[derive(Enum, Deserialize)]
 pub(crate) enum TalkOperationFilenameKey {
@@ -121,6 +145,52 @@ impl Index<TalkOperation> for TalkManifest {
     }
 }
 
+#[derive(Enum, Deserialize)]
+pub(crate) enum SingingTeacherOperationFilenameKey {
+    #[serde(rename = "predict_sing_consonant_length_filename")]
+    PredictSingConsonantLength,
+    #[serde(rename = "predict_sing_f0_filename")]
+    PredictSingF0,
+    #[serde(rename = "predict_sing_volume_filename")]
+    PredictSingVolume,
+}
+
+impl Index<SingingTeacherOperation> for SingingTeacherManifest {
+    type Output = Arc<str>;
+
+    fn index(&self, index: SingingTeacherOperation) -> &Self::Output {
+        let key = match index {
+            SingingTeacherOperation::PredictSingConsonantLength => {
+                SingingTeacherOperationFilenameKey::PredictSingConsonantLength
+            }
+            SingingTeacherOperation::PredictSingF0 => {
+                SingingTeacherOperationFilenameKey::PredictSingF0
+            }
+            SingingTeacherOperation::PredictSingVolume => {
+                SingingTeacherOperationFilenameKey::PredictSingVolume
+            }
+        };
+        &self.filenames[key]
+    }
+}
+
+#[derive(Enum, Deserialize)]
+pub(crate) enum FrameDecodeOperationFilenameKey {
+    #[serde(rename = "sf_decode_filename")]
+    SfDecode,
+}
+
+impl Index<FrameDecodeOperation> for FrameDecodeManifest {
+    type Output = Arc<str>;
+
+    fn index(&self, index: FrameDecodeOperation) -> &Self::Output {
+        let key = match index {
+            FrameDecodeOperation::SfDecode => FrameDecodeOperationFilenameKey::SfDecode,
+        };
+        &self.filenames[key]
+    }
+}
+
 #[serde_as]
 #[derive(Default, Clone, Deref, Deserialize)]
 #[deref(forward)]
diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs
index c59573412..48ecb39d2 100644
--- a/crates/voicevox_core/src/status.rs
+++ b/crates/voicevox_core/src/status.rs
@@ -10,7 +10,10 @@ use crate::{
     error::{ErrorRepr, LoadModelError, LoadModelErrorKind, LoadModelResult},
     infer::{
         self,
-        domains::{inference_domain_map_values, InferenceDomainMap, TalkDomain},
+        domains::{
+            inference_domain_map_values, FrameDecodeDomain, InferenceDomainMap,
+            SingingTeacherDomain, TalkDomain,
+        },
         session_set::{InferenceSessionCell, InferenceSessionSet},
         InferenceDomain, InferenceInputSignature, InferenceRuntime, InferenceSessionOptions,
         InferenceSignature,
@@ -296,8 +299,10 @@ pub(crate) trait InferenceDomainExt: InferenceDomain {
 }
 
 #[duplicate_item(
-    T              field;
-    [ TalkDomain ] [ talk ];
+    T                        field;
+    [ TalkDomain ]           [ talk ];
+    [ SingingTeacherDomain ] [ singing_teacher ];
+    [ FrameDecodeDomain ]    [ frame_decode ];
 )]
 impl InferenceDomainExt for T {
     fn visit<R: InferenceRuntime>(
@@ -325,6 +330,8 @@ impl InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain> {
             [
                 field;
                 [ talk ];
+                [ singing_teacher ];
+                [ frame_decode ];
             ]
             let field = self
                 .field
@@ -336,7 +343,11 @@ impl InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain> {
                 .transpose()?;
         }
 
-        Ok(InferenceDomainMap { talk })
+        Ok(InferenceDomainMap {
+            talk,
+            singing_teacher,
+            frame_decode,
+        })
     }
 }
 
@@ -355,7 +366,9 @@ mod tests {
     use crate::{
         devices::{DeviceSpec, GpuSpec},
         infer::{
-            domains::{InferenceDomainMap, TalkOperation},
+            domains::{
+                FrameDecodeOperation, InferenceDomainMap, SingingTeacherOperation, TalkOperation,
+            },
             InferenceSessionOptions,
         },
         macros::tests::assert_debug_fmt_eq,
@@ -381,6 +394,14 @@ mod tests {
                 | TalkOperation::GenerateFullIntermediate => light_session_options,
                 TalkOperation::RenderAudioSegment => heavy_session_options,
             },
+            singing_teacher: enum_map! {
+                SingingTeacherOperation::PredictSingConsonantLength
+                | SingingTeacherOperation::PredictSingF0
+                | SingingTeacherOperation::PredictSingVolume => light_session_options,
+            },
+            frame_decode: enum_map! {
+                FrameDecodeOperation::SfDecode => heavy_session_options,
+            },
         };
         let status = Status::new(
             crate::blocking::Onnxruntime::from_test_util_data().unwrap(),
@@ -414,6 +435,8 @@ mod tests {
             crate::blocking::Onnxruntime::from_test_util_data().unwrap(),
             InferenceDomainMap {
                 talk: enum_map!(_ => InferenceSessionOptions::new(0, DeviceSpec::Cpu)),
+                singing_teacher: enum_map!(_ => InferenceSessionOptions::new(0, DeviceSpec::Cpu)),
+                frame_decode: enum_map!(_ => InferenceSessionOptions::new(0, DeviceSpec::Cpu)),
             },
         );
         let model = &crate::nonblocking::VoiceModelFile::sample().await.unwrap();
@@ -430,6 +453,8 @@ mod tests {
             crate::blocking::Onnxruntime::from_test_util_data().unwrap(),
             InferenceDomainMap {
                 talk: enum_map!(_ => InferenceSessionOptions::new(0, DeviceSpec::Cpu)),
+                singing_teacher: enum_map!(_ => InferenceSessionOptions::new(0, DeviceSpec::Cpu)),
+                frame_decode: enum_map!(_ => InferenceSessionOptions::new(0, DeviceSpec::Cpu)),
             },
         );
         let vvm = &crate::nonblocking::VoiceModelFile::sample().await.unwrap();
diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs
index 230a26294..7437a3ca5 100644
--- a/crates/voicevox_core/src/synthesizer.rs
+++ b/crates/voicevox_core/src/synthesizer.rs
@@ -101,6 +101,7 @@ impl AsyncExt for BlockingThreadPool {
 }
 
 mod inner {
+    use easy_ext::ext;
     use enum_map::enum_map;
     use std::{
         io::{Cursor, Write as _},
@@ -118,9 +119,13 @@ mod inner {
         infer::{
             self,
             domains::{
-                GenerateFullIntermediateInput, GenerateFullIntermediateOutput, InferenceDomainMap,
-                PredictDurationInput, PredictDurationOutput, PredictIntonationInput,
-                PredictIntonationOutput, RenderAudioSegmentInput, RenderAudioSegmentOutput,
+                FrameDecodeDomain, FrameDecodeOperation, GenerateFullIntermediateInput,
+                GenerateFullIntermediateOutput, InferenceDomainMap, PredictDurationInput,
+                PredictDurationOutput, PredictIntonationInput, PredictIntonationOutput,
+                PredictSingConsonantLengthInput, PredictSingConsonantLengthOutput,
+                PredictSingF0Input, PredictSingF0Output, PredictSingVolumeInput,
+                PredictSingVolumeOutput, RenderAudioSegmentInput, RenderAudioSegmentOutput,
+                SfDecodeInput, SfDecodeOutput, SingingTeacherDomain, SingingTeacherOperation,
                 TalkDomain, TalkOperation,
             },
             InferenceRuntime, InferenceSessionOptions,
@@ -174,7 +179,7 @@ mod inner {
     }
 
     pub struct Inner<O, A: Async> {
-        status: Arc<Status<crate::blocking::Onnxruntime>>,
+        pub(super) status: Arc<Status<crate::blocking::Onnxruntime>>,
         open_jtalk_analyzer: OpenJTalkAnalyzer<O>,
         kana_analyzer: KanaAnalyzer,
         use_gpu: bool,
@@ -250,6 +255,14 @@ mod inner {
                         | TalkOperation::GenerateFullIntermediate => light_session_options,
                         TalkOperation::RenderAudioSegment => heavy_session_options,
                     },
+                    singing_teacher: enum_map! {
+                        SingingTeacherOperation::PredictSingConsonantLength
+                        | SingingTeacherOperation::PredictSingF0
+                        | SingingTeacherOperation::PredictSingVolume => light_session_options,
+                    },
+                    frame_decode: enum_map! {
+                        FrameDecodeOperation::SfDecode => heavy_session_options,
+                    },
                 },
             )
             .into();
@@ -1053,6 +1066,108 @@ mod inner {
             let output = trim_margin_from_wave(output_with_margin);
             Ok(output.to_vec())
         }
+
+        pub(super) async fn predict_sing_consonant_length<A: infer::AsyncExt>(
+            &self,
+            consonant: ndarray::Array1<i64>,
+            vowel: ndarray::Array1<i64>,
+            note_duration: ndarray::Array1<i64>,
+            style_id: StyleId,
+        ) -> Result<ndarray::Array2<i64>> {
+            let (model_id, inner_voice_id) = self.ids_for::<SingingTeacherDomain>(style_id)?;
+
+            let PredictSingConsonantLengthOutput { consonant_lengths } = self
+                .run_session::<A, _>(
+                    model_id,
+                    PredictSingConsonantLengthInput {
+                        consonants: consonant.into_one_row(),
+                        vowels: vowel.into_one_row(),
+                        note_durations: note_duration.into_one_row(),
+                        speaker_id: ndarray::array![inner_voice_id.raw_id().into()],
+                    },
+                )
+                .await?;
+
+            Ok(consonant_lengths)
+        }
+
+        pub(super) async fn predict_sing_f0<A: infer::AsyncExt>(
+            &self,
+            phoneme: ndarray::Array1<i64>,
+            note: ndarray::Array1<i64>,
+            style_id: StyleId,
+        ) -> Result<ndarray::Array2<f32>> {
+            let (model_id, inner_voice_id) = self.ids_for::<SingingTeacherDomain>(style_id)?;
+
+            let PredictSingF0Output { f0s } = self
+                .run_session::<A, _>(
+                    model_id,
+                    PredictSingF0Input {
+                        phonemes: phoneme.into_one_row(),
+                        notes: note.into_one_row(),
+                        speaker_id: ndarray::array![inner_voice_id.raw_id().into()],
+                    },
+                )
+                .await?;
+
+            Ok(f0s)
+        }
+
+        pub(super) async fn predict_sing_volume<A: infer::AsyncExt>(
+            &self,
+            phoneme: ndarray::Array1<i64>,
+            note: ndarray::Array1<i64>,
+            f0: ndarray::Array1<f32>,
+            style_id: StyleId,
+        ) -> Result<ndarray::Array2<f32>> {
+            let (model_id, inner_voice_id) = self.ids_for::<SingingTeacherDomain>(style_id)?;
+
+            let PredictSingVolumeOutput { volumes } = self
+                .run_session::<A, _>(
+                    model_id,
+                    PredictSingVolumeInput {
+                        phonemes: phoneme.into_one_row(),
+                        notes: note.into_one_row(),
+                        frame_f0s: f0.into_one_row(),
+                        speaker_id: ndarray::array![inner_voice_id.raw_id().into()],
+                    },
+                )
+                .await?;
+
+            Ok(volumes)
+        }
+
+        pub(super) async fn sf_decode<A: infer::AsyncExt>(
+            &self,
+            phoneme: ndarray::Array1<i64>,
+            f0: ndarray::Array1<f32>,
+            volume: ndarray::Array1<f32>,
+            style_id: StyleId,
+        ) -> Result<ndarray::Array2<f32>> {
+            let (model_id, inner_voice_id) = self.ids_for::<FrameDecodeDomain>(style_id)?;
+
+            let SfDecodeOutput { wav } = self
+                .run_session::<A, _>(
+                    model_id,
+                    SfDecodeInput {
+                        frame_phonemes: phoneme.into_one_row(),
+                        frame_f0s: f0.into_one_row(),
+                        frame_volumes: volume.into_one_row(),
+                        speaker_id: ndarray::array![inner_voice_id.raw_id().into()],
+                    },
+                )
+                .await?;
+
+            Ok(wav)
+        }
+    }
+
+    #[ext]
+    impl<T> ndarray::Array1<T> {
+        fn into_one_row(self) -> ndarray::Array2<T> {
+            let n = self.len();
+            self.into_shape([1, n]).expect("should be ok")
+        }
     }
 
     #[cfg(windows)]
@@ -1590,6 +1705,62 @@ pub(crate) mod blocking {
                 .decode(length, phoneme_size, f0, phoneme_vector, style_id)
                 .block_on()
         }
+
+        pub fn predict_sing_consonant_length(
+            &self,
+            consonant: ndarray::Array1<i64>,
+            vowel: ndarray::Array1<i64>,
+            note_duration: ndarray::Array1<i64>,
+            style_id: StyleId,
+        ) -> crate::Result<ndarray::Array2<i64>> {
+            self.0
+                .status
+                .predict_sing_consonant_length::<SingleTasked>(
+                    consonant,
+                    vowel,
+                    note_duration,
+                    style_id,
+                )
+                .block_on()
+        }
+
+        pub fn predict_sing_f0(
+            &self,
+            phoneme: ndarray::Array1<i64>,
+            note: ndarray::Array1<i64>,
+            style_id: StyleId,
+        ) -> crate::Result<ndarray::Array2<f32>> {
+            self.0
+                .status
+                .predict_sing_f0::<SingleTasked>(phoneme, note, style_id)
+                .block_on()
+        }
+
+        pub fn predict_sing_volume(
+            &self,
+            phoneme: ndarray::Array1<i64>,
+            note: ndarray::Array1<i64>,
+            f0: ndarray::Array1<f32>,
+            style_id: StyleId,
+        ) -> crate::Result<ndarray::Array2<f32>> {
+            self.0
+                .status
+                .predict_sing_volume::<SingleTasked>(phoneme, note, f0, style_id)
+                .block_on()
+        }
+
+        pub fn sf_decode(
+            &self,
+            phoneme: ndarray::Array1<i64>,
+            f0: ndarray::Array1<f32>,
+            volume: ndarray::Array1<f32>,
+            style_id: StyleId,
+        ) -> crate::Result<ndarray::Array2<f32>> {
+            self.0
+                .status
+                .sf_decode::<SingleTasked>(phoneme, f0, volume, style_id)
+                .block_on()
+        }
     }
 }
 
@@ -1903,7 +2074,10 @@ pub(crate) mod nonblocking {
 #[cfg(test)]
 mod tests {
     use super::{AccelerationMode, InitializeOptions};
-    use crate::{engine::Mora, macros::tests::assert_debug_fmt_eq, AccentPhrase, Result, StyleId};
+    use crate::{
+        asyncs::BlockingThreadPool, engine::Mora, macros::tests::assert_debug_fmt_eq, AccentPhrase,
+        Result, StyleId,
+    };
     use ::test_util::OPEN_JTALK_DIC_DIR;
     use rstest::rstest;
 
@@ -2114,6 +2288,145 @@ mod tests {
         assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
     }
 
+    #[rstest]
+    #[tokio::test]
+    async fn predict_sing_f0_works() {
+        let syntesizer = super::nonblocking::Synthesizer::new(
+            crate::nonblocking::Onnxruntime::from_test_util_data()
+                .await
+                .unwrap(),
+            (),
+            &InitializeOptions {
+                acceleration_mode: AccelerationMode::Cpu,
+                ..Default::default()
+            },
+        )
+        .unwrap();
+        syntesizer
+            .load_voice_model(&crate::nonblocking::VoiceModelFile::sample().await.unwrap())
+            .await
+            .unwrap();
+
+        // 「テスト」という文章に対応する入力
+        let phoneme_vector = ndarray::array![0, 37, 14, 35, 6, 37, 30, 0];
+        let note_vector = ndarray::array![0, 30, 30, 40, 40, 50, 50, 0];
+
+        let sing_teacher_style_id = StyleId::new(6000);
+        let result = syntesizer
+            .0
+            .status
+            .predict_sing_f0::<BlockingThreadPool>(
+                phoneme_vector.clone(),
+                note_vector,
+                sing_teacher_style_id,
+            )
+            .await;
+
+        assert!(result.is_ok(), "{result:?}");
+        assert_eq!(result.unwrap().len(), phoneme_vector.len());
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn predict_sing_volume_works() {
+        let syntesizer = super::nonblocking::Synthesizer::new(
+            crate::nonblocking::Onnxruntime::from_test_util_data()
+                .await
+                .unwrap(),
+            (),
+            &InitializeOptions {
+                acceleration_mode: AccelerationMode::Cpu,
+                ..Default::default()
+            },
+        )
+        .unwrap();
+        syntesizer
+            .load_voice_model(&crate::nonblocking::VoiceModelFile::sample().await.unwrap())
+            .await
+            .unwrap();
+
+        // 「テスト」という文章に対応する入力
+        let phoneme_vector = ndarray::array![0, 37, 14, 35, 6, 37, 30, 0];
+        let note_vector = ndarray::array![0, 30, 30, 40, 40, 50, 50, 0];
+        let f0_vector = ndarray::array![0., 5.905218, 5.905218, 0., 0., 5.565851, 5.565851, 0.];
+
+        let sing_teacher_style_id = StyleId::new(6000);
+        let result = syntesizer
+            .0
+            .status
+            .predict_sing_volume::<BlockingThreadPool>(
+                phoneme_vector.clone(),
+                note_vector,
+                f0_vector,
+                sing_teacher_style_id,
+            )
+            .await;
+
+        assert!(result.is_ok(), "{result:?}");
+        assert_eq!(result.unwrap().len(), phoneme_vector.len());
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn sf_decode_works() {
+        let syntesizer = super::nonblocking::Synthesizer::new(
+            crate::nonblocking::Onnxruntime::from_test_util_data()
+                .await
+                .unwrap(),
+            (),
+            &InitializeOptions {
+                acceleration_mode: AccelerationMode::Cpu,
+                ..Default::default()
+            },
+        )
+        .unwrap();
+        syntesizer
+            .load_voice_model(&crate::nonblocking::VoiceModelFile::sample().await.unwrap())
+            .await
+            .unwrap();
+
+        // 「テスト」という文章に対応する入力
+        const F0_LENGTH: usize = 69;
+        let mut f0 = [0.; F0_LENGTH];
+        f0[9..24].fill(5.905218);
+        f0[37..60].fill(5.565851);
+
+        let mut volume = [0.; F0_LENGTH];
+        volume[9..24].fill(0.5);
+        volume[24..37].fill(0.2);
+        volume[37..60].fill(1.0);
+
+        let mut phoneme = [0; F0_LENGTH];
+        let mut set_one = |index, range| {
+            for i in range {
+                phoneme[i] = index;
+            }
+        };
+        set_one(0, 0..9);
+        set_one(37, 9..13);
+        set_one(14, 13..24);
+        set_one(35, 24..30);
+        set_one(6, 30..37);
+        set_one(37, 37..45);
+        set_one(30, 45..60);
+        set_one(0, 60..69);
+
+        let sf_decode_style_id = StyleId::new(3000);
+        let result = syntesizer
+            .0
+            .status
+            .sf_decode::<BlockingThreadPool>(
+                ndarray::arr1(&phoneme),
+                ndarray::arr1(&f0),
+                ndarray::arr1(&volume),
+                sf_decode_style_id,
+            )
+            .await;
+
+        assert!(result.is_ok(), "{result:?}");
+        assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
+    }
+
     type TextConsonantVowelData =
         [(&'static [(&'static str, &'static str, &'static str)], usize)];
 
diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs
index d0dde486f..754064a29 100644
--- a/crates/voicevox_core/src/voice_model.rs
+++ b/crates/voicevox_core/src/voice_model.rs
@@ -23,7 +23,10 @@ use crate::{
     asyncs::{Async, Mutex as _},
     error::{LoadModelError, LoadModelErrorKind, LoadModelResult},
     infer::{
-        domains::{inference_domain_map_values, InferenceDomainMap, TalkDomain},
+        domains::{
+            inference_domain_map_values, FrameDecodeDomain, InferenceDomainMap,
+            SingingTeacherDomain, TalkDomain,
+        },
         InferenceDomain,
     },
     manifest::{Manifest, ManifestDomains, StyleIdToInnerVoiceId},
@@ -153,6 +156,42 @@ impl<A: Async> Inner<A> {
                                     )
                                 })
                         },
+                        singing_teacher: |singing_teacher| {
+                            singing_teacher
+                                .as_ref()
+                                .map(|manifest| {
+                                    let indices = EnumMap::from_fn(|k| &manifest[k])
+                                        .try_map(|_, s| find_entry_index(s))?;
+                                    Ok(InferenceModelEntry { indices, manifest })
+                                })
+                                .transpose()
+                                .map_err(move |source| {
+                                    error(
+                                        LoadModelErrorKind::ReadZipEntry {
+                                            filename: MANIFEST_FILENAME.to_owned(),
+                                        },
+                                        source,
+                                    )
+                                })
+                        },
+                        frame_decode: |frame_decode| {
+                            frame_decode
+                                .as_ref()
+                                .map(|manifest| {
+                                    let indices = EnumMap::from_fn(|k| &manifest[k])
+                                        .try_map(|_, s| find_entry_index(s))?;
+                                    Ok(InferenceModelEntry { indices, manifest })
+                                })
+                                .transpose()
+                                .map_err(move |source| {
+                                    error(
+                                        LoadModelErrorKind::ReadZipEntry {
+                                            filename: MANIFEST_FILENAME.to_owned(),
+                                        },
+                                        source,
+                                    )
+                                })
+                        },
                     })
                     .collect()
                     .map_err(crate::Error::from)
@@ -225,36 +264,78 @@ impl<A: Async> Inner<A> {
             }};
         }
 
-        let InferenceDomainMap { talk } =
-            self.with_inference_model_entries(|inference_model_entries| {
-                inference_model_entries.each_ref().map(InferenceDomainMap {
-                    talk: |talk| {
-                        talk.as_ref()
-                            .map(|InferenceModelEntry { indices, manifest }| {
-                                (
-                                    indices.map(|op, i| (i, manifest[op].clone())),
-                                    manifest.style_id_to_inner_voice_id.clone(),
-                                )
-                            })
-                    },
-                })
-            });
+        let InferenceDomainMap {
+            talk,
+            singing_teacher,
+            frame_decode,
+        } = self.with_inference_model_entries(|inference_model_entries| {
+            inference_model_entries.each_ref().map(InferenceDomainMap {
+                talk: |talk| {
+                    talk.as_ref()
+                        .map(|InferenceModelEntry { indices, manifest }| {
+                            (
+                                indices.map(|op, i| (i, manifest[op].clone())),
+                                manifest.style_id_to_inner_voice_id.clone(),
+                            )
+                        })
+                },
+                singing_teacher: |singing_teacher| {
+                    singing_teacher
+                        .as_ref()
+                        .map(|InferenceModelEntry { indices, manifest }| {
+                            (
+                                indices.map(|op, i| (i, manifest[op].clone())),
+                                manifest.style_id_to_inner_voice_id.clone(),
+                            )
+                        })
+                },
+                frame_decode: |frame_decode| {
+                    frame_decode
+                        .as_ref()
+                        .map(|InferenceModelEntry { indices, manifest }| {
+                            (
+                                indices.map(|op, i| (i, manifest[op].clone())),
+                                manifest.style_id_to_inner_voice_id.clone(),
+                            )
+                        })
+                },
+            })
+        });
+
+        let talk = OptionFuture::from(talk.map(|(entries, style_id_to_inner_voice_id)| async {
+            let [predict_duration, predict_intonation, predict_spectrogram, run_vocoder] =
+                entries.into_array();
+
+            let predict_duration = read_file!(predict_duration);
+            let predict_intonation = read_file!(predict_intonation);
+            let predict_spectrogram = read_file!(predict_spectrogram);
+            let run_vocoder = read_file!(run_vocoder);
+
+            let model_bytes = EnumMap::from_array([
+                predict_duration,
+                predict_intonation,
+                predict_spectrogram,
+                run_vocoder,
+            ]);
+
+            Ok((style_id_to_inner_voice_id, model_bytes))
+        }))
+        .await
+        .transpose()?;
 
-        let talk = OptionFuture::from(talk.map(
-            |(entries, style_id_to_inner_voice_id)| async move {
-                let [predict_duration, predict_intonation, predict_spectrogram, run_vocoder] =
+        let singing_teacher = OptionFuture::from(singing_teacher.map(
+            |(entries, style_id_to_inner_voice_id)| async {
+                let [predict_sing_consonant_length, predict_sing_f0, predict_sing_volume] =
                     entries.into_array();
 
-                let predict_duration = read_file!(predict_duration);
-                let predict_intonation = read_file!(predict_intonation);
-                let predict_spectrogram = read_file!(predict_spectrogram);
-                let run_vocoder = read_file!(run_vocoder);
+                let predict_sing_consonant_length = read_file!(predict_sing_consonant_length);
+                let predict_sing_f0 = read_file!(predict_sing_f0);
+                let predict_sing_volume = read_file!(predict_sing_volume);
 
                 let model_bytes = EnumMap::from_array([
-                    predict_duration,
-                    predict_intonation,
-                    predict_spectrogram,
-                    run_vocoder,
+                    predict_sing_consonant_length,
+                    predict_sing_f0,
+                    predict_sing_volume,
                 ]);
 
                 Ok((style_id_to_inner_voice_id, model_bytes))
@@ -263,7 +344,25 @@ impl<A: Async> Inner<A> {
         .await
         .transpose()?;
 
-        Ok(InferenceDomainMap { talk })
+        let frame_decode = OptionFuture::from(frame_decode.map(
+            |(entries, style_id_to_inner_voice_id)| async {
+                let [sf_decode] = entries.into_array();
+
+                let sf_decode = read_file!(sf_decode);
+
+                let model_bytes = EnumMap::from_array([sf_decode]);
+
+                Ok((style_id_to_inner_voice_id, model_bytes))
+            },
+        ))
+        .await
+        .transpose()?;
+
+        Ok(InferenceDomainMap {
+            talk,
+            singing_teacher,
+            frame_decode,
+        })
     }
 }
 
@@ -396,9 +495,15 @@ impl InferenceDomainMap<ManifestDomains> {
     ///
     /// 例えば`self.talk`が`None`のとき、`StyleType::Talk`に対して`false`を返す。
     fn accepts(&self, style_type: StyleType) -> bool {
-        let Self { talk } = self;
+        let Self {
+            talk,
+            singing_teacher,
+            frame_decode,
+        } = self;
 
-        return TalkDomain::contains(style_type).implies(|| talk.is_some());
+        return TalkDomain::contains(style_type).implies(|| talk.is_some())
+            && SingingTeacherDomain::contains(style_type).implies(|| singing_teacher.is_some())
+            && FrameDecodeDomain::contains(style_type).implies(|| frame_decode.is_some());
 
         #[ext]
         impl<D: InferenceDomain> D {
@@ -503,7 +608,7 @@ mod tests {
 
     use crate::{
         infer::domains::InferenceDomainMap,
-        manifest::{ManifestDomains, TalkManifest},
+        manifest::{FrameDecodeManifest, ManifestDomains, SingingTeacherManifest, TalkManifest},
         SpeakerMeta, StyleType,
     };
 
@@ -511,6 +616,8 @@ mod tests {
     #[case(
         &InferenceDomainMap {
             talk: None,
+            singing_teacher: None,
+            frame_decode: None,
         },
         &[],
         Ok(())
@@ -518,6 +625,8 @@ mod tests {
     #[case(
         &InferenceDomainMap {
             talk: Some(TalkManifest::default()),
+            singing_teacher: Some(SingingTeacherManifest::default()),
+            frame_decode: Some(FrameDecodeManifest::default()),
         },
         &[speaker(&[StyleType::Talk])],
         Ok(())
@@ -525,6 +634,8 @@ mod tests {
     #[case(
         &InferenceDomainMap {
             talk: Some(TalkManifest::default()),
+            singing_teacher: Some(SingingTeacherManifest::default()),
+            frame_decode: Some(FrameDecodeManifest::default()),
         },
         &[speaker(&[StyleType::Talk, StyleType::Sing])],
         Ok(())
@@ -532,6 +643,8 @@ mod tests {
     #[case(
         &InferenceDomainMap {
             talk: None,
+            singing_teacher: None,
+            frame_decode: None,
         },
         &[speaker(&[StyleType::Talk])],
         Err(())
diff --git a/crates/voicevox_core_c_api/src/compatible_engine.rs b/crates/voicevox_core_c_api/src/compatible_engine.rs
index 78805369e..ab191164c 100644
--- a/crates/voicevox_core_c_api/src/compatible_engine.rs
+++ b/crates/voicevox_core_c_api/src/compatible_engine.rs
@@ -468,6 +468,214 @@ pub unsafe extern "C" fn render_audio_segment(
     }
 }
 
+/// # Safety
+///
+/// - `consonant`はRustの`&[i64; length as usize]`として解釈できなければならない。
+/// - `vowel`はRustの`&[i64; length as usize]`として解釈できなければならない。
+/// - `note_duration`はRustの`&[i64; length as usize]`として解釈できなければならない。
+/// - `speaker_id`はRustの`&[i64; 1]`として解釈できなければならない。
+/// - `output`はRustの`&mut [MaybeUninit<i64>; length as usize]`として解釈できなければならない。
+#[unsafe(no_mangle)] // SAFETY: voicevox_core_c_apiを構成するライブラリの中に、これと同名のシンボルは存在しない
+pub unsafe extern "C" fn predict_sing_consonant_length_forward(
+    length: i64,
+    consonant: *mut i64,
+    vowel: *mut i64,
+    note_duration: *mut i64,
+    speaker_id: *mut i64,
+    output: *mut i64,
+) -> bool {
+    init_logger_once();
+    assert_aligned(consonant);
+    assert_aligned(vowel);
+    assert_aligned(note_duration);
+    assert_aligned(speaker_id);
+    assert_aligned(output);
+    let length = length as usize;
+    let synthesizer = &*lock_synthesizer();
+    let result = ensure_initialized!(synthesizer).predict_sing_consonant_length(
+        // SAFETY: The safety contract must be upheld by the caller.
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], consonant) }.to_owned(),
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], vowel) }.to_owned(),
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], note_duration) }.to_owned(),
+        StyleId::new(unsafe { *speaker_id as u32 }),
+    );
+    match result {
+        Ok(output_arr) => {
+            let output_len = length;
+            if output_arr.len() != output_len {
+                panic!("expected {}, got {}", output_len, output_arr.len());
+            }
+            let output_arr = output_arr.as_standard_layout();
+            // SAFETY: The safety contract must be upheld by the caller.
+            unsafe {
+                output_arr
+                    .as_ptr()
+                    .copy_to_nonoverlapping(output, output_len);
+            }
+            true
+        }
+        Err(err) => {
+            set_message(&format!("{err}"));
+            false
+        }
+    }
+}
+
+/// # Safety
+///
+/// - `phoneme`はRustの`&[i64; length as usize]`として解釈できなければならない。
+/// - `note`はRustの`&[i64; length as usize]`として解釈できなければならない。
+/// - `speaker_id`はRustの`&[i64; 1]`として解釈できなければならない。
+/// - `output`はRustの`&mut [MaybeUninit<f32>; length as usize]`として解釈できなければならない。
+#[unsafe(no_mangle)] // SAFETY: voicevox_core_c_apiを構成するライブラリの中に、これと同名のシンボルは存在しない
+pub unsafe extern "C" fn predict_sing_f0_forward(
+    length: i64,
+    phoneme: *mut i64,
+    note: *mut i64,
+    speaker_id: *mut i64,
+    output: *mut f32,
+) -> bool {
+    init_logger_once();
+    assert_aligned(phoneme);
+    assert_aligned(note);
+    assert_aligned(speaker_id);
+    assert_aligned(output);
+    let length = length as usize;
+    let synthesizer = &*lock_synthesizer();
+    let result = ensure_initialized!(synthesizer).predict_sing_f0(
+        // SAFETY: The safety contract must be upheld by the caller.
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], phoneme) }.to_owned(),
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], note) }.to_owned(),
+        StyleId::new(unsafe { *speaker_id as u32 }),
+    );
+    match result {
+        Ok(output_arr) => {
+            let output_len = length;
+            if output_arr.len() != output_len {
+                panic!("expected {}, got {}", output_len, output_arr.len());
+            }
+            let output_arr = output_arr.as_standard_layout();
+            // SAFETY: The safety contract must be upheld by the caller.
+            unsafe {
+                output_arr
+                    .as_ptr()
+                    .copy_to_nonoverlapping(output, output_len);
+            }
+            true
+        }
+        Err(err) => {
+            set_message(&format!("{err}"));
+            false
+        }
+    }
+}
+
+/// # Safety
+///
+/// - `phoneme`はRustの`&[i64; length as usize]`として解釈できなければならない。
+/// - `note`はRustの`&[i64; length as usize]`として解釈できなければならない。
+/// - `f0`はRustの`&[f32; length as usize]`として解釈できなければならない。
+/// - `speaker_id`はRustの`&[i64; 1]`として解釈できなければならない。
+/// - `output`はRustの`&mut [MaybeUninit<f32>; length as usize]`として解釈できなければならない。
+#[unsafe(no_mangle)] // SAFETY: voicevox_core_c_apiを構成するライブラリの中に、これと同名のシンボルは存在しない
+pub unsafe extern "C" fn predict_sing_volume_forward(
+    length: i64,
+    phoneme: *mut i64,
+    note: *mut i64,
+    f0: *mut f32,
+    speaker_id: *mut i64,
+    output: *mut f32,
+) -> bool {
+    init_logger_once();
+    assert_aligned(phoneme);
+    assert_aligned(note);
+    assert_aligned(f0);
+    assert_aligned(speaker_id);
+    assert_aligned(output);
+    let length = length as usize;
+    let synthesizer = &*lock_synthesizer();
+    let result = ensure_initialized!(synthesizer).predict_sing_volume(
+        // SAFETY: The safety contract must be upheld by the caller.
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], phoneme) }.to_owned(),
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], note) }.to_owned(),
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], f0) }.to_owned(),
+        StyleId::new(unsafe { *speaker_id as u32 }),
+    );
+    match result {
+        Ok(output_arr) => {
+            let output_len = length;
+            if output_arr.len() != output_len {
+                panic!("expected {}, got {}", output_len, output_arr.len());
+            }
+            let output_arr = output_arr.as_standard_layout();
+            // SAFETY: The safety contract must be upheld by the caller.
+            unsafe {
+                output_arr
+                    .as_ptr()
+                    .copy_to_nonoverlapping(output, output_len);
+            }
+            true
+        }
+        Err(err) => {
+            set_message(&format!("{err}"));
+            false
+        }
+    }
+}
+
+/// # Safety
+///
+/// - `phoneme`はRustの`&[i64; length as usize]`として解釈できなければならない。
+/// - `f0`はRustの`&[f32; length as usize]`として解釈できなければならない。
+/// - `volume`はRustの`&[f32; length as usize]`として解釈できなければならない。
+/// - `speaker_id`はRustの`&[i64; 1]`として解釈できなければならない。
+/// - `output`はRustの`&mut [MaybeUninit<f32>; length as usize]`として解釈できなければならない。
+#[unsafe(no_mangle)] // SAFETY: voicevox_core_c_apiを構成するライブラリの中に、これと同名のシンボルは存在しない
+pub unsafe extern "C" fn sf_decode_forward(
+    length: i64,
+    phoneme: *mut i64,
+    f0: *mut f32,
+    volume: *mut f32,
+    speaker_id: *mut i64,
+    output: *mut f32,
+) -> bool {
+    init_logger_once();
+    assert_aligned(phoneme);
+    assert_aligned(f0);
+    assert_aligned(volume);
+    assert_aligned(speaker_id);
+    assert_aligned(output);
+    let length = length as usize;
+    let synthesizer = &*lock_synthesizer();
+    let result = ensure_initialized!(synthesizer).sf_decode(
+        // SAFETY: The safety contract must be upheld by the caller.
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], phoneme) }.to_owned(),
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], f0) }.to_owned(),
+        unsafe { ndarray::ArrayView::from_shape_ptr([length], volume) }.to_owned(),
+        StyleId::new(unsafe { *speaker_id as u32 }),
+    );
+    match result {
+        Ok(output_arr) => {
+            let output_len = length * 256;
+            if output_arr.len() != output_len {
+                panic!("expected {}, got {}", output_len, output_arr.len());
+            }
+            let output_arr = output_arr.as_standard_layout();
+            // SAFETY: The safety contract must be upheld by the caller.
+            unsafe {
+                output_arr
+                    .as_ptr()
+                    .copy_to_nonoverlapping(output, output_len);
+            }
+            true
+        }
+        Err(err) => {
+            set_message(&format!("{err}"));
+            false
+        }
+    }
+}
+
 #[track_caller]
 fn assert_aligned(ptr: *mut impl Sized) {
     assert!(
diff --git a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml
index 218ec2a7b..c45292fad 100644
--- a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml
+++ b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml
@@ -48,6 +48,26 @@ metas = '''
     "version": "0.0.1",
     "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3",
     "order": null
+  },
+  {
+    "name": "dummy4",
+    "styles": [
+      {
+        "id": 3000,
+        "name": "style4-1",
+        "type": "frame_decode",
+        "order": null
+      },
+      {
+        "id": 6000,
+        "name": "style4-2",
+        "type": "singing_teacher",
+        "order": null
+      }
+    ],
+    "version": "0.0.1",
+    "speaker_uuid": "32478dc2-4c8b-44f7-b041-c836e0df6d56",
+    "order": null
   }
 ]'''
 stderr.windows = '''
@@ -174,6 +194,26 @@ metas = '''
     "version": "0.0.1",
     "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3",
     "order": null
+  },
+  {
+    "name": "dummy4",
+    "styles": [
+      {
+        "id": 3000,
+        "name": "style4-1",
+        "type": "frame_decode",
+        "order": null
+      },
+      {
+        "id": 6000,
+        "name": "style4-2",
+        "type": "singing_teacher",
+        "order": null
+      }
+    ],
+    "version": "0.0.1",
+    "speaker_uuid": "32478dc2-4c8b-44f7-b041-c836e0df6d56",
+    "order": null
   }
 ]'''
 stderr.windows = '''
diff --git a/model/sample.vvm/manifest.json b/model/sample.vvm/manifest.json
index 1075a0797..6161d204a 100644
--- a/model/sample.vvm/manifest.json
+++ b/model/sample.vvm/manifest.json
@@ -11,5 +11,19 @@
       "302": 2,
       "303": 3
     }
+  },
+  "singing_teacher": {
+    "predict_sing_consonant_length_filename": "predict_sing_consonant_length.onnx",
+    "predict_sing_f0_filename": "predict_sing_f0.onnx",
+    "predict_sing_volume_filename": "predict_sing_volume.onnx",
+    "style_id_to_inner_voice_id": {
+      "6000": 0
+    }
+  },
+  "frame_decode": {
+    "sf_decode_filename": "sf_decode.onnx",
+    "style_id_to_inner_voice_id": {
+      "3000": 0
+    }
   }
 }
diff --git a/model/sample.vvm/metas.json b/model/sample.vvm/metas.json
index 08f273fd7..e27a015dd 100644
--- a/model/sample.vvm/metas.json
+++ b/model/sample.vvm/metas.json
@@ -35,5 +35,22 @@
     ],
     "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3",
     "version": "0.0.1"
+  },
+  {
+    "name": "dummy4",
+    "styles": [
+      {
+        "name": "style4-1",
+        "id": 3000,
+        "type": "frame_decode"
+      },
+      {
+        "name": "style4-2",
+        "id": 6000,
+        "type": "singing_teacher"
+      }
+    ],
+    "speaker_uuid": "32478dc2-4c8b-44f7-b041-c836e0df6d56",
+    "version": "0.0.1"
   }
 ]
diff --git a/model/sample.vvm/predict_sing_consonant_length.onnx b/model/sample.vvm/predict_sing_consonant_length.onnx
new file mode 100644
index 000000000..88a85df7a
Binary files /dev/null and b/model/sample.vvm/predict_sing_consonant_length.onnx differ
diff --git a/model/sample.vvm/predict_sing_f0.onnx b/model/sample.vvm/predict_sing_f0.onnx
new file mode 100644
index 000000000..026c3fb1f
Binary files /dev/null and b/model/sample.vvm/predict_sing_f0.onnx differ
diff --git a/model/sample.vvm/predict_sing_volume.onnx b/model/sample.vvm/predict_sing_volume.onnx
new file mode 100644
index 000000000..d80f97cba
Binary files /dev/null and b/model/sample.vvm/predict_sing_volume.onnx differ
diff --git a/model/sample.vvm/sf_decode.onnx b/model/sample.vvm/sf_decode.onnx
new file mode 100644
index 000000000..169285cb4
Binary files /dev/null and b/model/sample.vvm/sf_decode.onnx differ