Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

メモリリークしていた部分を修正した #94

Merged
merged 1 commit into from
Mar 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions core/src/engine.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <cstdlib>
#include <exception>
#include <memory>
#include <vector>

#include "core.h"
Expand All @@ -9,21 +10,22 @@

using namespace voicevox::core::engine;

static OpenJTalk *openjtalk = nullptr;
static SynthesisEngine *engine = nullptr;
// TODO:SynthesisEngineにopenjtalkを持たせるためshared_ptrにしているが、やめたい
static std::shared_ptr<OpenJTalk> openjtalk;
static std::unique_ptr<SynthesisEngine> engine;

VoicevoxResultCode voicevox_initialize_openjtalk(const char *dict_path) {
// TODO: error handling
openjtalk = new OpenJTalk(dict_path);
openjtalk = std::make_shared<OpenJTalk>(dict_path);
return VOICEVOX_RESULT_SUCCEED;
}

VoicevoxResultCode voicevox_tts(const char *text, int64_t speaker_id, int *output_binary_size, uint8_t **output_wav) {
if (openjtalk == nullptr) {
if (!openjtalk) {
return VOICEVOX_RESULT_NOT_INITIALIZE_OPEN_JTALK_ERR;
}
if (engine == nullptr) {
engine = new SynthesisEngine(openjtalk);
if (!engine) {
engine = std::make_unique<SynthesisEngine>(openjtalk);
}

std::vector<AccentPhraseModel> accent_phrases = engine->create_accent_phrases(std::string(text), &speaker_id);
Expand Down
136 changes: 69 additions & 67 deletions core/src/engine/full_context_label.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ std::string string_feature_by_regex(std::string pattern, std::string label) {
}
}

Phoneme *Phoneme::from_label(const std::string &label) {
Phoneme Phoneme::from_label(const std::string &label) {
std::map<std::string, std::string> contexts;
contexts["p3"] = string_feature_by_regex(R"(\-(.*?)\+)", label);
contexts["a2"] = string_feature_by_regex(R"(\+(\d+|xx)\+)", label);
Expand All @@ -26,22 +26,25 @@ Phoneme *Phoneme::from_label(const std::string &label) {
contexts["i3"] = string_feature_by_regex(R"(\@(\d+|xx)\+)", label);
contexts["j1"] = string_feature_by_regex(R"(/J\:(\d+|xx)_)", label);

return new Phoneme(contexts, label);
return Phoneme(contexts, label);
}

std::string Phoneme::phoneme() { return contexts.at("p3"); }
std::string Phoneme::phoneme() const { return contexts.at("p3"); }

bool Phoneme::is_pause() { return contexts.at("f1") == "xx"; }
bool Phoneme::is_pause() const { return contexts.at("f1") == "xx"; }

void Mora::set_context(const std::string &key, const std::string &value) const {
vowel->contexts[key] = value;
if (consonant != nullptr) consonant->contexts[key] = value;
void Mora::set_context(const std::string &key, const std::string &value) {
vowel.contexts[key] = value;

if (!consonant.has_value()) {
consonant.value().contexts[key] = value;
}
}

std::vector<Phoneme *> Mora::phonemes() {
std::vector<Phoneme *> phonemes;
if (consonant != nullptr) {
phonemes = {consonant, vowel};
std::vector<Phoneme> Mora::phonemes() const {
std::vector<Phoneme> phonemes;
if (consonant.has_value()) {
phonemes = {consonant.value(), vowel};
} else {
phonemes = {vowel};
}
Expand All @@ -50,115 +53,113 @@ std::vector<Phoneme *> Mora::phonemes() {

std::vector<std::string> Mora::labels() {
std::vector<std::string> labels;
for (Phoneme *phoneme : phonemes()) {
labels.push_back(phoneme->label);
for (auto &phoneme : phonemes()) {
labels.push_back(phoneme.label);
}
return labels;
}

AccentPhrase *AccentPhrase::from_phonemes(std::vector<Phoneme *> phonemes) {
std::vector<Mora *> moras;
std::vector<Phoneme *> mora_phonemes;
AccentPhrase AccentPhrase::from_phonemes(std::vector<Phoneme> phonemes) {
std::vector<Mora> moras;
std::vector<Phoneme> mora_phonemes;

for (size_t i = 0; i < phonemes.size(); i++) {
// workaround for Hihosiba/voicevox_engine#57
if (phonemes[i]->contexts.at("a2") == "49") break;
if (phonemes[i].contexts.at("a2") == "49") break;

mora_phonemes.push_back(phonemes[i]);
if (i + 1 == phonemes.size() || phonemes[i]->contexts.at("a2") != phonemes[i + 1]->contexts.at("a2")) {
Mora *mora;
if (i + 1 == phonemes.size() || phonemes[i].contexts.at("a2") != phonemes[i + 1].contexts.at("a2")) {
if (mora_phonemes.size() == 1) {
mora = new Mora(mora_phonemes[0]);
moras.push_back(Mora(mora_phonemes[0]));
} else if (mora_phonemes.size() == 2) {
mora = new Mora(mora_phonemes[0], mora_phonemes[1]);
moras.push_back(Mora(mora_phonemes[0], mora_phonemes[1]));
} else {
throw std::runtime_error("too long mora");
}
moras.push_back(mora);
mora_phonemes.clear();
}
}

int accent = std::stoi(moras[0]->vowel->contexts.at("f2"));
bool is_interrogative = moras[moras.size() - 1]->vowel->contexts.at("f3") == "1";
int accent = std::stoi(moras[0].vowel.contexts.at("f2"));
bool is_interrogative = moras[moras.size() - 1].vowel.contexts.at("f3") == "1";
// workaround for VOICEVOX/voicevox_engine#55
if (accent > moras.size()) accent = moras.size();
return new AccentPhrase(moras, accent, is_interrogative);
return AccentPhrase(moras, accent, is_interrogative);
}

void AccentPhrase::set_context(std::string key, std::string value) {
for (Mora *mora : moras) mora->set_context(key, value);
for (auto &mora : moras) mora.set_context(key, value);
}

std::vector<Phoneme *> AccentPhrase::phonemes() {
std::vector<Phoneme *> phonemes;
for (Mora *mora : moras) {
std::vector<Phoneme *> mora_phonemes = mora->phonemes();
std::vector<Phoneme> AccentPhrase::phonemes() const {
std::vector<Phoneme> phonemes;
for (auto &mora : moras) {
std::vector<Phoneme> mora_phonemes = mora.phonemes();
std::copy(mora_phonemes.begin(), mora_phonemes.end(), std::back_inserter(phonemes));
}
return phonemes;
}

std::vector<std::string> AccentPhrase::labels() {
std::vector<std::string> labels;
for (Phoneme *phoneme : phonemes()) {
labels.push_back(phoneme->label);
for (auto &phoneme : phonemes()) {
labels.push_back(phoneme.label);
}
return labels;
}

AccentPhrase *AccentPhrase::merge(AccentPhrase *accent_phrase) {
std::vector<Mora *> moras;
AccentPhrase AccentPhrase::merge(AccentPhrase &accent_phrase) {
std::vector<Mora> moras;
std::copy(this->moras.begin(), this->moras.end(), std::back_inserter(moras));
std::copy(accent_phrase->moras.begin(), accent_phrase->moras.end(), std::back_inserter(moras));
return new AccentPhrase(moras, this->accent, accent_phrase->is_interrogative);
std::copy(accent_phrase.moras.begin(), accent_phrase.moras.end(), std::back_inserter(moras));
return AccentPhrase(moras, this->accent, accent_phrase.is_interrogative);
}

BreathGroup *BreathGroup::from_phonemes(std::vector<Phoneme *> phonemes) {
std::vector<AccentPhrase *> accent_phrases;
std::vector<Phoneme *> accent_phonemes;
BreathGroup BreathGroup::from_phonemes(std::vector<Phoneme> &phonemes) {
std::vector<AccentPhrase> accent_phrases;
std::vector<Phoneme> accent_phonemes;

for (size_t i = 0; i < phonemes.size(); i++) {
accent_phonemes.push_back(phonemes[i]);

if (i + 1 == phonemes.size() || phonemes[i]->contexts.at("i3") != phonemes[i + 1]->contexts.at("i3") ||
phonemes[i]->contexts.at("f5") != phonemes[i + 1]->contexts.at("f5")) {
if (i + 1 == phonemes.size() || phonemes[i].contexts.at("i3") != phonemes[i + 1].contexts.at("i3") ||
phonemes[i].contexts.at("f5") != phonemes[i + 1].contexts.at("f5")) {
accent_phrases.push_back(AccentPhrase::from_phonemes(accent_phonemes));
accent_phonemes.clear();
}
}

return new BreathGroup(accent_phrases);
return BreathGroup(accent_phrases);
};

void BreathGroup::set_context(std::string key, std::string value) {
for (AccentPhrase *accent_phrase : accent_phrases) accent_phrase->set_context(key, value);
for (auto &accent_phrase : accent_phrases) accent_phrase.set_context(key, value);
}

std::vector<Phoneme *> BreathGroup::phonemes() {
std::vector<Phoneme *> phonemes;
for (AccentPhrase *accent_phrase : accent_phrases) {
std::vector<Phoneme *> accent_phrase_phonemes = accent_phrase->phonemes();
std::vector<Phoneme> BreathGroup::phonemes() const {
std::vector<Phoneme> phonemes;
for (auto &accent_phrase : accent_phrases) {
const auto &accent_phrase_phonemes = accent_phrase.phonemes();
std::copy(accent_phrase_phonemes.begin(), accent_phrase_phonemes.end(), std::back_inserter(phonemes));
}
return phonemes;
}

std::vector<std::string> BreathGroup::labels() {
std::vector<std::string> labels;
for (Phoneme *phoneme : phonemes()) {
labels.push_back(phoneme->label);
for (auto &phoneme : phonemes()) {
labels.push_back(phoneme.label);
}
return labels;
}

Utterance Utterance::from_phonemes(const std::vector<Phoneme *> &phonemes) {
std::vector<BreathGroup *> breath_groups;
std::vector<Phoneme *> group_phonemes;
std::vector<Phoneme *> pauses;
Utterance Utterance::from_phonemes(const std::vector<Phoneme> &phonemes) {
std::vector<BreathGroup> breath_groups;
std::vector<Phoneme> group_phonemes;
std::vector<Phoneme> pauses;

for (Phoneme *phoneme : phonemes) {
if (!phoneme->is_pause()) {
for (auto &phoneme : phonemes) {
if (!phoneme.is_pause()) {
group_phonemes.push_back(phoneme);
} else {
pauses.push_back(phoneme);
Expand All @@ -173,38 +174,39 @@ Utterance Utterance::from_phonemes(const std::vector<Phoneme *> &phonemes) {
}

void Utterance::set_context(const std::string &key, const std::string &value) {
for (BreathGroup *breath_group : breath_groups) breath_group->set_context(key, value);
for (auto &breath_group : breath_groups) breath_group.set_context(key, value);
}

std::vector<Phoneme *> Utterance::phonemes() {
std::vector<AccentPhrase *> accent_phrases;
for (BreathGroup *breath_group : breath_groups) {
std::vector<AccentPhrase *> b_accent_phrases = breath_group->accent_phrases;
std::vector<Phoneme> Utterance::phonemes() {
std::vector<AccentPhrase> accent_phrases;
for (const auto &breath_group : breath_groups) {
const auto &b_accent_phrases = breath_group.accent_phrases;
std::copy(b_accent_phrases.begin(), b_accent_phrases.end(), std::back_inserter(accent_phrases));
}

std::vector<Phoneme *> phonemes;
std::vector<Phoneme> phonemes;
for (size_t i = 0; i < pauses.size(); i++) {
// if (pauses[i])
phonemes.push_back(pauses[i]);
if (i < pauses.size() - 1) {
std::copy(breath_groups[i]->phonemes().begin(), breath_groups[i]->phonemes().end(), std::back_inserter(phonemes));
const auto &p = breath_groups[i].phonemes();
std::copy(p.begin(), p.end(), std::back_inserter(phonemes));
}
}
return phonemes;
}

std::vector<std::string> Utterance::labels() {
std::vector<std::string> labels;
for (Phoneme *phoneme : phonemes()) {
labels.push_back(phoneme->label);
for (const auto &phoneme : phonemes()) {
labels.push_back(phoneme.label);
}
return labels;
}

Utterance extract_full_context_label(OpenJTalk *openjtalk, std::string text) {
std::vector<std::string> labels = openjtalk->extract_fullcontext(text);
std::vector<Phoneme *> phonemes;
Utterance extract_full_context_label(OpenJTalk &openjtalk, std::string text) {
std::vector<std::string> labels = openjtalk.extract_fullcontext(text);
std::vector<Phoneme> phonemes;
for (std::string label : labels) phonemes.push_back(Phoneme::from_label(label));
return Utterance::from_phonemes(phonemes);
}
Expand Down
Loading