Add the ablilty to generate creature-specific sound files from the API

opsnlops · Jun 2, 2024 · c7977c7 · c7977c7
1 parent b14b093
commit c7977c7
Show file tree

Hide file tree

Showing 21 changed files with 853 additions and 37 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.25)
 
 project(creature-server
-        VERSION "2.1.0"
+        VERSION "2.1.1"
         DESCRIPTION "Server for April's Creatures"
         HOMEPAGE_URL https://github.com/opsnlops/creature-server
         LANGUAGES C CXX)

diff --git a/lib/CreatureVoicesLib/CMakeLists.txt b/lib/CreatureVoicesLib/CMakeLists.txt
@@ -65,21 +65,32 @@ find_package(oatpp           REQUIRED PATHS ${CMAKE_SOURCE_DIR}/../../externals/
 # Define our library
 add_library(CreatureVoicesLib STATIC
 
+        src/model/CreatureSpeechRequest.h
+        src/model/CreatureSpeechRequest.cpp
+        src/model/CreatureSpeechResponse.h
+        src/model/CreatureSpeechResponse.cpp
         src/model/HttpMethod.h
         src/model/HttpMethod.cpp
+        src/model/Subscription.h
+        src/model/Subscription.cpp
         src/model/Voice.h
         src/model/Voice.cpp
 
 
         src/CreatureVoices.h
-        src/CreatureVoices.cpp
         src/VoiceResult.h
         src/CurlBase.h
         src/CurlBase.cpp
         src/CurlHandle.h
         src/CurlHandle.cpp
 
 
+        src/methods/CreatureVoices.cpp
+        src/methods/generateCreatureSpeech.cpp
+        src/methods/getSubscriptionStatus.cpp
+        src/methods/listAllAvailableVoices.cpp
+
+
 )
 
 # Include directories for this library

diff --git a/lib/CreatureVoicesLib/src/CreatureVoices.h b/lib/CreatureVoicesLib/src/CreatureVoices.h
@@ -1,17 +1,21 @@
 
 #pragma once
 
-
-#include <curl/curl.h>
-
+#include <filesystem>
 #include <string>
 #include <vector>
 
+#include <curl/curl.h>
+
 #include "CurlBase.h"
 #include "VoiceResult.h"
+#include "model/CreatureSpeechResponse.h"
+#include "model/CreatureSpeechRequest.h"
 #include "model/HttpMethod.h"
+#include "model/Subscription.h"
 #include "model/Voice.h"
 
+#define VOICES_API_BASE_URL "https://api.elevenlabs.io"
 
 namespace creatures :: voice {
 
@@ -20,9 +24,24 @@ namespace creatures :: voice {
         CreatureVoices(std::string apiKey);
         VoiceResult<std::vector<Voice>> listAllAvailableVoices();
 
+        VoiceResult<Subscription> getSubscriptionStatus();
+
+        /**
+         * Create a new sound file for a creature based on the text given
+         *
+         * @param fileSavePath the location to save the file
+         * @param speechRequest the request to generate the speech
+         * @return A VoiceResult with information about what happened
+         */
+        VoiceResult<CreatureSpeechResponse> generateCreatureSpeech(const std::filesystem::path &fileSavePath,
+                                                                   const CreatureSpeechRequest &speechRequest);
+
     private:
         std::string apiKey;
 
+        std::string makeFileName(const CreatureSpeechRequest &speechRequest);
+        std::string toLowerAndReplaceSpaces(std::string str);
+
     };
 
 }
diff --git a/lib/CreatureVoicesLib/src/CurlBase.cpp b/lib/CreatureVoicesLib/src/CurlBase.cpp
@@ -7,6 +7,7 @@
 #include <fmt/format.h>
 #include <spdlog/spdlog.h>
 
+#include "CreatureVoices.h"
 
 #include "model/HttpMethod.h"
 #include "VoiceResult.h"
@@ -24,7 +25,11 @@ namespace creatures::voice {
 
     CurlHandle CurlBase::createCurlHandle(const std::string& url) {
         debug("Creating a curl handle for URL: {}", url);
-        return CurlHandle(url);
+
+        auto fullUrl = fmt::format("{}{}", VOICES_API_BASE_URL, url);
+        debug("fullUrl: {}", fullUrl);
+
+        return CurlHandle(fullUrl);
     }
 
     VoiceResult<std::string> CurlBase::performRequest(CurlHandle& curlHandle,
@@ -47,7 +52,6 @@ namespace creatures::voice {
         // Set headers
         std::string apiKeyHeader = fmt::format("xi-api-key: {}", apiKey);
         curlHandle.addHeader(apiKeyHeader);
-        curlHandle.addHeader("Content-Type: application/json");
 
         curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEDATA, &response);
         trace("CURL handle set up for writing");

diff --git a/lib/CreatureVoicesLib/src/methods/CreatureVoices.cpp b/lib/CreatureVoicesLib/src/methods/CreatureVoices.cpp
@@ -0,0 +1,25 @@
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <curl/curl.h>
+#include <fmt/format.h>
+#include <nlohmann/json.hpp>
+#include <spdlog/spdlog.h>
+
+
+#include "model/HttpMethod.h"
+#include "VoiceResult.h"
+#include "CreatureVoices.h"
+
+using json = nlohmann::json;
+
+namespace creatures::voice {
+
+
+    CreatureVoices::CreatureVoices(std::string apiKey) : apiKey(std::move(apiKey)) {}
+
+
+}
diff --git a/lib/CreatureVoicesLib/src/methods/generateCreatureSpeech.cpp b/lib/CreatureVoicesLib/src/methods/generateCreatureSpeech.cpp
@@ -0,0 +1,205 @@
+
+#include <chrono>
+#include <filesystem>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <curl/curl.h>
+#include <fmt/format.h>
+#include <nlohmann/json.hpp>
+#include <spdlog/spdlog.h>
+
+
+#include "model/CreatureSpeechResponse.h"
+#include "model/CreatureSpeechRequest.h"
+#include "model/HttpMethod.h"
+#include "VoiceResult.h"
+#include "CreatureVoices.h"
+
+using json = nlohmann::json;
+
+namespace creatures::voice {
+
+    /*
+     * Notes:
+     *
+     * This is defined in: https://elevenlabs.io/docs/api-reference/text-to-speech
+     *
+     */
+
+
+    VoiceResult<CreatureSpeechResponse> CreatureVoices::generateCreatureSpeech(const std::filesystem::path &fileSavePath,
+                                                                               const CreatureSpeechRequest &speechRequest) {
+
+        // Before we get going, let's validate our inputs. Since the API charges per character, we should make sure
+        // that everything is good to go.
+        if(fileSavePath.empty()) {
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, "File save path cannot be empty")};
+        }
+
+        if(speechRequest.text.empty()) {
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, "Text cannot be empty")};
+        }
+
+        if(speechRequest.voice_id.empty()) {
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, "Voice ID cannot be empty")};
+        }
+
+        if(speechRequest.model_id.empty()) {
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, "Model ID cannot be empty")};
+        }
+
+        if(speechRequest.stability < 0 || speechRequest.stability > 1) {
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, "Stability must be between 0 and 1")};
+        }
+
+        if(speechRequest.similarity_boost < 0 || speechRequest.similarity_boost > 1) {
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, "Similarity boost must be between 0 and 1")};
+        }
+
+
+        // Make sure that the file path exists and can be written to
+        if (!std::filesystem::exists(fileSavePath)) {
+            std::string errorMessage = fmt::format("File path does not exist: {}", fileSavePath.string());
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, errorMessage)};
+        }
+
+        if (!std::filesystem::is_directory(fileSavePath)) {
+            std::string errorMessage = fmt::format("File path is not a directory: {}", fileSavePath.string());
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, errorMessage)};
+        }
+
+        // Let's test that it's writable by writing out the transcript
+        auto fileBaseName = makeFileName(speechRequest);
+        auto transcriptPath = fileSavePath / fmt::format("{}.txt", fileBaseName);
+
+        info("Transcript path: {}", transcriptPath.string());
+
+        std::ofstream transcriptFile(transcriptPath);
+        if (!transcriptFile.is_open()) {
+            std::string errorMessage = fmt::format("Failed to open transcript file for writing: {}", transcriptPath.string());
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, errorMessage)};
+        }
+
+        transcriptFile << speechRequest.text;
+        if(!transcriptFile.good()) {
+            std::string errorMessage = fmt::format("Failed to write to transcript file: {}", transcriptPath.string());
+            transcriptFile.close();
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InvalidData, errorMessage)};
+        }
+        transcriptFile.close();
+
+
+        // If we've made it this far, we're good to go. Let's generate the sound file.
+        auto soundFilePath = fileSavePath / fmt::format("{}.mp3", fileBaseName);
+        debug("Generating speech for creature: {} to file {}", speechRequest.creature_name);
+
+
+
+        const std::string url = fmt::format("/v1/text-to-speech/{}", speechRequest.voice_id);
+        auto curlHandle = createCurlHandle(url);
+        curlHandle.addHeader("Content-Type: application/json");
+        curlHandle.addHeader("Accept: audio/mpeg");
+
+        // Create the JSON request according to the API
+        json requestJson = {
+                {"text", speechRequest.text},
+                {"model_id", speechRequest.model_id},
+                {"voice_settings", {
+                    {"stability", speechRequest.stability},
+                    {"similarity_boost", speechRequest.similarity_boost}
+                }}
+        };
+        std::string requestBody = requestJson.dump();
+        debug("Request body: {}", requestBody);
+
+
+        auto result = performRequest(curlHandle, apiKey, HttpMethod::POST, requestBody);
+        if(!result.isSuccess()) {
+            auto error = result.getError();
+            std::string errorMessage = fmt::format("Unable to generate audio file: {}", error->getMessage());
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(error->getCode(), error->getMessage())};
+        }
+
+        // Load the sound file into memory
+        auto httpResponse = result.getValue().value();
+        debug("loaded {} bytes of audio data", httpResponse.size());
+
+        debug("attempting to open file: {}", soundFilePath.string());
+        std::ofstream soundFile(soundFilePath, std::ios::binary);
+        if (!soundFile.is_open()) {
+            std::string errorMessage = fmt::format("Failed to open sound file for writing: {}", soundFilePath.string());
+            error(errorMessage);
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InternalError, errorMessage)};
+        }
+
+        debug("writing sound data...");
+        soundFile << httpResponse;
+        if(!soundFile.good()) {
+            std::string errorMessage = fmt::format("Failed to write to sound file: {}", soundFilePath.string());
+            error(errorMessage);
+            soundFile.close();
+            return VoiceResult<CreatureSpeechResponse>{VoiceError(VoiceError::InternalError, errorMessage)};
+        }
+        soundFile.close();
+        debug("done! sound file written to {}", soundFilePath.string());
+
+        CreatureSpeechResponse response;
+        response.sound_file_name = soundFilePath.string();
+        response.transcript_file_name = transcriptPath.string();
+        response.sound_file_size = httpResponse.size();
+        response.success = true;
+
+        debug("woo! all done!");
+        return VoiceResult<CreatureSpeechResponse>{response};
+    }
+
+
+    std::string CreatureVoices::makeFileName(const CreatureSpeechRequest &speechRequest) {
+        std::string fileName;
+
+        // Start with the creature's name, if it exists, in all lower case
+        if (!speechRequest.creature_name.empty()) {
+            fileName = toLowerAndReplaceSpaces(speechRequest.creature_name);
+        } else {
+            // If there's no creature name, use the voice ID
+            fileName = speechRequest.voice_id;
+        }
+
+        // Add a file-system save timestamp that'll sort nicely
+        auto now = std::chrono::system_clock::now();
+        auto nowTime = std::chrono::system_clock::to_time_t(now);
+        std::ostringstream oss;
+        oss << std::put_time(std::localtime(&nowTime), "%Y-%m-%d_%H-%M-%S");
+        fileName += fmt::format("_{}", oss.str());
+
+        // Add the title, if it exists, in all lower case, and spaces turned into dashes
+        if (!speechRequest.title.empty()) {
+            fileName += fmt::format("_{}", toLowerAndReplaceSpaces(speechRequest.title));
+        } else {
+            // If there's no title, use the model ID
+            fileName += fmt::format("_{}", speechRequest.model_id);
+        }
+
+        // Ensure characters are valid for a file name
+        std::replace(fileName.begin(), fileName.end(), ' ', '_');
+        std::replace(fileName.begin(), fileName.end(), '/', '_');
+        std::replace(fileName.begin(), fileName.end(), '\\', '_');
+
+        spdlog::debug("Generated file name: {}", fileName);
+
+        return fileName;
+    }
+
+
+    // Utility function to convert a string to lowercase and replace spaces
+    std::string CreatureVoices::toLowerAndReplaceSpaces(std::string str) {
+        std::transform(str.begin(), str.end(), str.begin(), ::tolower);
+        std::replace(str.begin(), str.end(), ' ', '-');
+        return str;
+    }
+}