Merge pull request #5 from YuzukiTsuru/Shine

Shine
YuzukiTsuru · Jun 3, 2022 · 8bfd3a6 · 8bfd3a6
2 parents c7dc28d + 7acadd2
commit 8bfd3a6
Show file tree

Hide file tree

Showing 26 changed files with 412 additions and 180 deletions.
diff --git a/.github/workflows/cmake-macos.yml b/.github/workflows/cmake-macos.yml
@@ -2,9 +2,9 @@ name: CMake macOS
 
 on:
   push:
-    branches: [ master ]
+    branches: "*"
   pull_request:
-    branches: [ master ]
+    branches: "*"
 
 env:
   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)

diff --git a/.github/workflows/cmake-ubuntu.yml b/.github/workflows/cmake-ubuntu.yml
@@ -2,9 +2,9 @@ name: CMake Ubuntu
 
 on:
   push:
-    branches: [ master ]
+    branches: "*"
   pull_request:
-    branches: [ master ]
+    branches: "*"
 
 env:
   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)

diff --git a/.github/workflows/cmake-windows.yml b/.github/workflows/cmake-windows.yml
@@ -2,9 +2,9 @@ name: CMake Windows
 
 on:
   push:
-    branches: [ master ]
+    branches: "*"
   pull_request:
-    branches: [ master ]
+    branches: "*"
 
 env:
   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)

diff --git a/src/AudioModel/CMakeLists.txt b/src/AudioModel/CMakeLists.txt
@@ -1,6 +1,7 @@
 # Library Audio Model
 
 add_subdirectory(WorldModule)
+add_subdirectory(Synthesis)
 
 file(GLOB audio_model_source *.cpp *.h)
 add_library(AudioModel ${audio_model_source})

diff --git a/src/AudioModel/Synthesis/Synthesis.cpp b/src/AudioModel/Synthesis/Synthesis.cpp
@@ -17,3 +17,51 @@
 //
 
 #include "Synthesis.h"
+
+#include <world/synthesis.h>
+
+#include "Utils/LOG.h"
+#include "world/synthesisrealtime.h"
+
+Synthesis::Synthesis(lessAudioModel audioModel, int x_length) : x_length(x_length) {
+    YALL_DEBUG_ << "Allocate Memory for output wav, length: " + std::to_string(x_length);
+    AllocateMemory();
+
+}
+
+void Synthesis::AllocateMemory() {
+    x = new double[x_length];
+}
+
+void Synthesis::SynthesisWav() const {
+    WorldSynthesizer synthesizer = {0};
+    int buffer_size = 64;
+    InitializeSynthesizer(audioModel.fs, audioModel.frame_period,
+                          audioModel.fft_size, buffer_size, 100, &synthesizer);
+
+    int offset = 0;
+    int index = 0;
+    for (int i = 0; i < audioModel.f0_length;) {
+        // Add one frame ('i' shows the frame index that should be added)
+        if (AddParameters(&audioModel.f0[i], 1, &audioModel.spectrogram[i], &audioModel.aperiodicity[i], &synthesizer) == 1) {
+            ++i;
+        }
+
+        // Synthesize speech with length of buffer_size sample.
+        // It is repeated until the function returns 0
+        // (it suggests that the synthesizer cannot generate speech).
+        while (Synthesis2(&synthesizer) != 0) {
+            index = offset * buffer_size;
+            for (int j = 0; j < buffer_size; ++j)
+                x[j + index] = synthesizer.buffer[j];
+            offset++;
+        }
+
+        // Check the "Lock" (Please see synthesisrealtime.h)
+        if (IsLocked(&synthesizer) == 1) {
+            YALL_WARN_ << "Synthesis Buffer Locked";
+            break;
+        }
+    }
+    DestroySynthesizer(&synthesizer);
+}
diff --git a/src/AudioModel/Synthesis/Synthesis.h b/src/AudioModel/Synthesis/Synthesis.h
@@ -19,9 +19,33 @@
 #ifndef LESSAMPLER_SYNTHESIS_H
 #define LESSAMPLER_SYNTHESIS_H
 
+#include <iostream>
+
+#include "AudioModel/lessAudioModel.h"
+
+class SynthesisPara {
+public:
+    int fs;
+    int f0_length;
+    double *f0;
+    double **spectrogram;
+    double **aperiodicity;
+};
 
 class Synthesis {
+public:
+    explicit Synthesis(lessAudioModel audioModel, int x_length);
+
+private:
+    double *x = nullptr;
+    int x_length = 0;
+
+    lessAudioModel audioModel{};
+
+private:
+    void AllocateMemory();
 
+    void SynthesisWav() const;
 };
 
 

diff --git a/src/AudioModel/lessAudioModel.h b/src/AudioModel/lessAudioModel.h
@@ -38,12 +38,4 @@ class lessAudioModel {
     int fft_size = 0;
 };
 
-class TransAudioModel {
-public:
-    int t_f0_length;
-    double *t_f0;
-    double **t_spectrogram;
-    double **t_aperiodicity;
-};
-
 #endif //LESSAMPLER_LESSAUDIOMODEL_H
diff --git a/src/AudioProcess/AduioProcess.cpp b/src/AudioProcess/AduioProcess.cpp
@@ -18,80 +18,45 @@
 //
 #include <cmath>
 #include <utility>
-#include <cstring>
 
 #include "Utils/exception.h"
 #include "Utils/LOG.h"
 #include "AudioProcess.h"
-#include "libUTAU/PitchBendDecoder.h"
 
-AduioProcess::AduioProcess(lessAudioModel audioModel, UTAUPara utauPara, UTAUFlags flags) : audioModel(audioModel), utauPara(std::move(utauPara)),
-                                                                                            flags(flags) {
+AudioProcess::AudioProcess(lessAudioModel audioModel, ShinePara shine) : audioModel(audioModel), shine(std::move(shine)) {
     YALL_DEBUG_ << "Equalizing Picth...";
     PicthEqualizing();
-    YALL_DEBUG_ << "Decode Pitch Bend...";
-    DecodePitchBend();
     YALL_DEBUG_ << "Time Stretch...";
     TimeStretch();
 }
 
-TransAudioModel AduioProcess::GetTransAudioModel() {
+lessAudioModel AudioProcess::GetTransAudioModel() {
     return transAudioModel;
 }
 
-void AduioProcess::PicthEqualizing() {
+void AudioProcess::PicthEqualizing() {
     auto freq_avg = GetAvgFreq();
     YALL_DEBUG_ << "The average frequency is " + std::to_string(freq_avg);
     if (freq_avg == 0.0) {
         for (int i = 0; i < audioModel.f0_length; ++i) {
             if (audioModel.f0[i] != 0.0) {
-                audioModel.f0[i] = utauPara.scaleNum;
+                audioModel.f0[i] = shine.scale_num;
             } else {
                 audioModel.f0[i] = 0;
             }
         }
     } else {
         for (int i = 0; i < audioModel.f0_length; ++i) {
             if (audioModel.f0[i] != 0.0) {
-                audioModel.f0[i] = ((audioModel.f0[i] - freq_avg) * utauPara.modulation / 100.0 + freq_avg) * (utauPara.scaleNum / freq_avg);
+                audioModel.f0[i] = ((audioModel.f0[i] - freq_avg) * shine.modulation / 100.0 + freq_avg) * (shine.scale_num / freq_avg);
             } else {
                 audioModel.f0[i] = 0;
             }
         }
     }
 }
 
-void AduioProcess::DecodePitchBend() {
-    if (utauPara.tempoNum == 0)
-        utauPara.tempoNum = 120;
-
-    if (utauPara.isCustomPitch) {
-        pitch_step = static_cast<int>(lround(60.0 / 96.0 / utauPara.tempoNum * audioModel.fs));
-        pitch_length = utauPara.output_samples / pitch_step + 1;
-
-        YALL_DEBUG_ << "The Pitch Length is: " + std::to_string(pitch_length);
-
-        PitchBendDecoder pitchBendDecoder(utauPara.pitch, pitch_length);
-
-        utauPara.pitch_bend = new int[pitch_length + 1];
-        for (int i = 0; i < pitch_length + 1; ++i) {
-            utauPara.pitch_bend[i] = 0;
-        }
-
-        std::memcpy(utauPara.pitch_bend, pitchBendDecoder.getPitchBend(), sizeof(int) * pitch_length);
-    } else {
-        utauPara.pitch_bend = new int[pitch_length + 1];
-        for (int i = 0; i < pitch_length + 1; ++i) {
-            utauPara.pitch_bend[i] = 0;
-        }
-    }
-
-    required_frame = static_cast<int>(1000.0 * utauPara.output_samples / audioModel.fs / audioModel.frame_period) + 1;
-    YALL_DEBUG_ << "The required frame is: " + std::to_string(required_frame);
-    transAudioModel.t_f0_length = required_frame;
-}
-
-double AduioProcess::GetAvgFreq() const {
+double AudioProcess::GetAvgFreq() const {
     double freq_avg = 0.0, timePercent, r, p[6], q, base_timePercent = 0;
     for (int i = 0; i < audioModel.f0_length; ++i) {
         timePercent = audioModel.f0[i];
@@ -114,25 +79,27 @@ double AduioProcess::GetAvgFreq() const {
     return freq_avg;
 }
 
-void AduioProcess::TimeStretch() {
+void AudioProcess::TimeStretch() {
     YALL_DEBUG_ << "Allocate memory for target audio f0, sp, ap";
 
-    if (transAudioModel.t_f0_length == 0)
+    if (shine.required_frame == 0)
         throw parameter_error("The target audio frame length is 0");
 
-    transAudioModel.t_f0 = new double[transAudioModel.t_f0_length];
-    for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
-        transAudioModel.t_f0[i] = 0.0;
+    transAudioModel.f0_length = shine.required_frame;
+
+    transAudioModel.f0 = new double[transAudioModel.f0_length];
+    for (int i = 0; i < transAudioModel.f0_length; ++i) {
+        transAudioModel.f0[i] = 0.0;
     }
 
-    transAudioModel.t_spectrogram = new double *[transAudioModel.t_f0_length];
-    transAudioModel.t_aperiodicity = new double *[transAudioModel.t_f0_length];
-    for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
-        transAudioModel.t_spectrogram[i] = new double[audioModel.w_length];
-        transAudioModel.t_aperiodicity[i] = new double[audioModel.w_length];
+    transAudioModel.spectrogram = new double *[transAudioModel.f0_length];
+    transAudioModel.aperiodicity = new double *[transAudioModel.f0_length];
+    for (int i = 0; i < transAudioModel.f0_length; ++i) {
+        transAudioModel.spectrogram[i] = new double[audioModel.w_length];
+        transAudioModel.aperiodicity[i] = new double[audioModel.w_length];
         for (int j = 0; j < audioModel.w_length; ++j) {
-            transAudioModel.t_spectrogram[i][j] = 0.0;
-            transAudioModel.t_aperiodicity[i][j] = 0.0;
+            transAudioModel.spectrogram[i][j] = 0.0;
+            transAudioModel.aperiodicity[i][j] = 0.0;
         }
     }
 
@@ -142,12 +109,12 @@ void AduioProcess::TimeStretch() {
     double _sample_sp_trans_index, _sample_ap_trans_index, _out_sample_index, _in_sample_index;
     int _sp_trans_index, _ap_trans_index;
 
-    for (int i = 0; i < transAudioModel.t_f0_length; ++i) {
+    for (int i = 0; i < transAudioModel.f0_length; ++i) {
         _out_sample_index = audioModel.frame_period * i;
-        if (_out_sample_index < utauPara.base_length) {
-            _in_sample_index = utauPara.offset + _out_sample_index * utauPara.velocity;
+        if (_out_sample_index < shine.base_length) {
+            _in_sample_index = shine.offset + _out_sample_index * shine.velocity;
         } else {
-            _in_sample_index = utauPara.offset + utauPara.firstHalfFixedPart + (_out_sample_index - utauPara.base_length) * utauPara.stretch_length;
+            _in_sample_index = shine.offset + shine.first_half_fixed_part + (_out_sample_index - shine.base_length) * shine.stretch_length;
         }
         YALL_DEBUG_ << "_in_sample_index -> " + std::to_string(_in_sample_index);
         YALL_DEBUG_ << "_out_sample_index -> " + std::to_string(_out_sample_index);
@@ -173,34 +140,34 @@ void AduioProcess::TimeStretch() {
             }
         }
 
-        _sample_ap_trans_index = _out_sample_index * 0.001 * audioModel.fs / pitch_step;
+        _sample_ap_trans_index = _out_sample_index * 0.001 * audioModel.fs / shine.pitch_step;
         _ap_trans_index = static_cast<int>(floor(_sample_ap_trans_index));
         _sample_ap_trans_index -= _ap_trans_index;
 
-        if (_ap_trans_index >= pitch_length) {
-            _ap_trans_index = pitch_length - 1;
+        if (_ap_trans_index >= shine.pitch_length) {
+            _ap_trans_index = shine.pitch_length - 1;
             _sample_sp_trans_index = 0.0;
         }
 
         YALL_DEBUG_ << "_ap_trans_index -> " + std::to_string(_ap_trans_index);
         YALL_DEBUG_ << "_sample_ap_trans_index -> " + std::to_string(_ap_trans_index + _sample_ap_trans_index);
 
         YALL_DEBUG_ << "Apply Pitch Shift With Pitch Bend";
-        auto pitch_base = utauPara.scaleNum * pow(2, (utauPara.pitch_bend[_ap_trans_index] * (1.0 - _sample_ap_trans_index) +
-                                                      utauPara.pitch_bend[_ap_trans_index + 1] * _sample_ap_trans_index) / 1200.0);
+        auto pitch_base = shine.scale_num * pow(2, (shine.pitch_bend[_ap_trans_index] * (1.0 - _sample_ap_trans_index) +
+                                                    shine.pitch_bend[_ap_trans_index + 1] * _sample_ap_trans_index) / 1200.0);
 
-        YALL_DEBUG_ << "Trans F0 " + std::to_string(transAudioModel.t_f0[i]) + " Add " + std::to_string(pitch_base);
-        transAudioModel.t_f0[i] = pitch_base;
+        YALL_DEBUG_ << "Trans F0 " + std::to_string(transAudioModel.f0[i]) + " Add " + std::to_string(pitch_base);
+        transAudioModel.f0[i] = pitch_base;
 
-        transAudioModel.t_f0[i] = transAudioModel.t_f0[i] * pow(temp_f0 / avg_freq, utauPara.modulation * 0.01);
+        transAudioModel.f0[i] = transAudioModel.f0[i] * pow(temp_f0 / avg_freq, shine.modulation * 0.01);
 
         YALL_DEBUG_ << "Trans SP ";
         for (int j = 0; j < audioModel.w_length; ++j) {
             if (_sp_trans_index < audioModel.f0_length - 1) {
-                transAudioModel.t_spectrogram[i][j] = audioModel.spectrogram[_sp_trans_index][j] * (1.0 - _sample_sp_trans_index) +
+                transAudioModel.spectrogram[i][j] = audioModel.spectrogram[_sp_trans_index][j] * (1.0 - _sample_sp_trans_index) +
                                                       audioModel.spectrogram[_sp_trans_index + 1][j] * _sample_sp_trans_index;
             } else {
-                transAudioModel.t_spectrogram[i][j] = audioModel.spectrogram[audioModel.f0_length - 1][j];
+                transAudioModel.spectrogram[i][j] = audioModel.spectrogram[audioModel.f0_length - 1][j];
             }
         }
 
@@ -212,15 +179,15 @@ void AduioProcess::TimeStretch() {
 
         for (int j = 0; j < audioModel.w_length; ++j) {
             if (_ap_trans_index < audioModel.f0_length) {
-                transAudioModel.t_aperiodicity[i][j] = audioModel.aperiodicity[_ap_trans_index][j];
+                transAudioModel.aperiodicity[i][j] = audioModel.aperiodicity[_ap_trans_index][j];
             } else {
-                transAudioModel.t_aperiodicity[i][j] = audioModel.aperiodicity[audioModel.f0_length - 1][j];
+                transAudioModel.aperiodicity[i][j] = audioModel.aperiodicity[audioModel.f0_length - 1][j];
             }
         }
     }
 }
 
-void AduioProcess::interp1(const double *x, const double *y, int x_length, const double *xi, int xi_length, double *yi) {
+void AudioProcess::interp1(const double *x, const double *y, int x_length, const double *xi, int xi_length, double *yi) {
     auto *h = new double[x_length - 1];
     int *k = new int[xi_length];
 
@@ -243,7 +210,7 @@ void AduioProcess::interp1(const double *x, const double *y, int x_length, const
     delete[] h;
 }
 
-void AduioProcess::histc(const double *x, int x_length, const double *edges, int edges_length, int *index) {
+void AudioProcess::histc(const double *x, int x_length, const double *edges, int edges_length, int *index) {
     int count = 1;
 
     int i = 0;

diff --git a/src/AudioProcess/AudioProcess.h b/src/AudioProcess/AudioProcess.h
@@ -21,29 +21,22 @@
 #define LESSAMPLER_AUDIOPROCESS_H
 
 #include "AudioModel/lessAudioModel.h"
-#include "libUTAU/libUTAU.h"
+#include "Shine/ShinePara.h"
 
-class AduioProcess {
+class AudioProcess {
 public:
-    AduioProcess(lessAudioModel audioModel, UTAUPara utauPara, UTAUFlags flags);
+    AudioProcess(lessAudioModel audioModel, ShinePara shine);
 
-    TransAudioModel GetTransAudioModel();
+    lessAudioModel GetTransAudioModel();
 
 private:
     lessAudioModel audioModel{};
-    TransAudioModel transAudioModel{};
-    UTAUPara utauPara{};
-    UTAUFlags flags;
-
-    int pitch_length = 0;
-    int pitch_step = 256;
-    int required_frame = 0;
+    lessAudioModel transAudioModel{};
+    ShinePara shine;
 
 private:
     void PicthEqualizing();
 
-    void DecodePitchBend();
-
     [[nodiscard]] double GetAvgFreq() const;
 
     void TimeStretch();