Streaming audio/mp3 response from POST request #687

simon-archer · 2024-02-21T20:07:28Z

simon-archer
Feb 21, 2024

Hi

I'm trying to stream the audio/mp3 response (Chunk-Transfer: Chunked) from a TTS API I've made, but no success.
My attempts have been using connecttohost() and appending the text as the url, but the limit on url length made this not a good solution. Also limited control over when the stream was finished. The decoding and streaming of the response works fine.

I have a hunch I could just use the MP3 decoder used by the library somehow, but lack the understanding to figure out how to get started.

My dream scenario:

Make a POST request with the a header that includes the text for the TTS audio/mp3 I want to generate.
The mp3 audio is recieved using Chunk-Transfer: Chunked , and decoded from mp3 and played through I2S when the first chunk is recieved, and only played once,
The connection is kept alive, for further POST requests.

This is my code at the moment using an different API for recieving the text to generate the TTS. It's a bit of a mess at the moment.
`#include <driver/i2s.h>
#include <WiFi.h>
#include <HTTPClient.h>
#include <Audio.h>
#include <LittleFS.h>
#include <AudioGeneratorMP3.h>
#include <AudioFileSourcePROGMEM.h>
#include <AudioOutputI2S.h>

// WiFi credentials
const char* ssid = "SSID";
const char* password = "PASSWORD";

#define SAMPLE_RATE 8000U
#define SAMPLE_BITS 16
#define MAX_RECORD_TIME 60 // Maximum record time in seconds
#define BUTTON_PIN 4 // Button connected to pin 4
#define WAV_HEADER_SIZE 44

#define I2S_DOUT 9
#define I2S_BCLK 8
#define I2S_LRC 7

Audio audio;
AudioGeneratorMP3 *mp3;
AudioFileSourcePROGMEM *file;
AudioOutputI2S *out;

// Adjust the buffer size to accommodate maximum recording time
#define MAX_AUDIO_BUFFER_SIZE (SAMPLE_RATE * SAMPLE_BITS / 8 * MAX_RECORD_TIME + WAV_HEADER_SIZE)

HTTPClient http;
uint8_t *audioBuffer = nullptr;
bool isRecording = false;
bool sendPostFlag = false;
bool requestSwitchToRxMode = false;
unsigned long lastDebounceTime = 0;
const unsigned long debounceDelay = 100;
size_t audioBufferIndex = 0;
QueueHandle_t xQueue;

// Function prototypes
void setup_wifi();
void setup_button();
void setup_i2s(i2s_mode_t mode);
void switch_i2s_mode(i2s_mode_t mode);
void IRAM_ATTR button_isr_handler();
void record_audio_task(void *param);
void send_audio_data(uint8_t *data, size_t length);
void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate);

void setup() {
Serial.begin(115200);
while (!Serial);

audioBuffer = (uint8_t *)ps_malloc(MAX_AUDIO_BUFFER_SIZE);
if (audioBuffer == nullptr) {
Serial.println("Failed to allocate memory for audio buffer");
return;
}

setup_wifi();
setup_button();
setup_i2s(I2S_MODE_RX);
audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
audio.setVolume(200);

out = new AudioOutputI2S();
out->begin();

if (!LittleFS.begin(true)) {
Serial.println("An error has occurred while mounting LittleFS");
return;
}

xQueue = xQueueCreate(10, sizeof(bool));
xTaskCreate(record_audio_task, "RecordAudioTask", 16384, NULL, 1, NULL);
}

void loop() {
audio.loop();
}

void setup_wifi() {
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED) {
delay(500);
Serial.println("Connecting to WiFi...");
}
Serial.println("Connected to WiFi");
}

void setup_button() {
pinMode(BUTTON_PIN, INPUT_PULLUP);
attachInterrupt(digitalPinToInterrupt(BUTTON_PIN), button_isr_handler, CHANGE);
}

void setup_i2s(i2s_mode_t mode) {

i2s_config_t i2s_config; 

if (mode & I2S_MODE_TX) {
    // Speaker pin configuration
    i2s_config  = {
        .mode                 = (i2s_mode_t)(I2S_MODE_MASTER | mode),
        .sample_rate          = 24000,
        .bits_per_sample      = I2S_BITS_PER_SAMPLE_32BIT,
        .channel_format       = I2S_CHANNEL_FMT_ONLY_LEFT,
        .communication_format = I2S_COMM_FORMAT_STAND_I2S,
        .intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1,
        .dma_buf_count        = 8,
        .dma_buf_len          = 512,
        .use_apll             = false,
        .tx_desc_auto_clear   = false,  // Only applicable in TX mode
        .fixed_mclk           = 0
    };
} else if (mode & I2S_MODE_RX) {
    // Microphone pin configuration
    i2s_config  = {
        .mode                 = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_PDM | mode),
        .sample_rate          = SAMPLE_RATE,
        .bits_per_sample      = I2S_BITS_PER_SAMPLE_16BIT,
        .channel_format       = I2S_CHANNEL_FMT_ONLY_LEFT,
        .communication_format = I2S_COMM_FORMAT_STAND_I2S,
        .intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1,
        .dma_buf_count        = 8,
        .dma_buf_len          = 512,
        .use_apll             = false,
        .tx_desc_auto_clear   = false,  // Only applicable in TX mode
        .fixed_mclk           = 0
    };
}

i2s_pin_config_t pin_config;

if (mode & I2S_MODE_TX) {
    // Speaker pin configuration
    pin_config = {
        .bck_io_num = I2S_BCLK,
        .ws_io_num = I2S_LRC,
        .data_out_num = I2S_DOUT,
        .data_in_num = -1  // Not used
    };
} else if (mode & I2S_MODE_RX) {
    // Microphone pin configuration
    pin_config = {
        .bck_io_num = -1,  // Not used
        .ws_io_num = 42,  // IIS_LCLK for microphone
        .data_out_num = -1,  // Not used
        .data_in_num = 41   // IIS_DOUT for microphone
    };
}

// Uninstall the existing driver before setting a new configuration
i2s_driver_uninstall((i2s_port_t)0);
i2s_driver_install((i2s_port_t)0, &i2s_config, 0, NULL);
i2s_set_pin((i2s_port_t)0, &pin_config);
i2s_zero_dma_buffer((i2s_port_t)0);

}

void IRAM_ATTR button_isr_handler() {
unsigned long interruptTime = millis();
if (interruptTime - lastDebounceTime > debounceDelay) {
bool currentButtonState = digitalRead(BUTTON_PIN) == LOW;
if (currentButtonState != isRecording) {
isRecording = currentButtonState;
lastDebounceTime = interruptTime;

        if (isRecording) {
            requestSwitchToRxMode = true; // Request to switch to RX mode
        }
        xQueueSendFromISR(xQueue, &isRecording, NULL);
    }
}

}

void record_audio_task(void *param) {
bool shouldRecord = false;
bool currentlyRecording = false;
Serial.println("Record audio task started.");

while (true) {
    // Handle mode switching request outside ISR
    if (requestSwitchToRxMode) {
        setup_i2s(I2S_MODE_RX); // Perform the mode switching
        LittleFS.remove("/response.mp3"); 
        requestSwitchToRxMode = false; // Reset the request flag
    }

    // Check for recording state updates
    while (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE) {
        if (shouldRecord && !currentlyRecording) {
            currentlyRecording = true;
            Serial.println("Starting recording...");
            audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for new recording
        } else if (!shouldRecord && currentlyRecording) {
            currentlyRecording = false;
            Serial.println("Stopping recording.");
            setup_i2s(I2S_MODE_TX); // Switch back to TX mode after recording stops

            // Update WAV header and prepare to send data
            generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
            sendPostFlag = true;
        }
    }

    if (currentlyRecording) {
        size_t bytesRead = 0;
        TickType_t i2sReadTimeoutTicks = 1;  // 1 tick timeout for minimal blocking

        // Attempt to read audio data from I2S with minimal blocking
        esp_err_t result = i2s_read((i2s_port_t)0, audioBuffer + audioBufferIndex, MAX_AUDIO_BUFFER_SIZE - audioBufferIndex, &bytesRead, i2sReadTimeoutTicks);

        if (result == ESP_OK && bytesRead > 0) {
            audioBufferIndex += bytesRead;
            // Check for buffer overflow
            if (audioBufferIndex >= MAX_AUDIO_BUFFER_SIZE) {
                currentlyRecording = false;
                Serial.println("Max recording length reached, stopping recording.");
                // Update WAV header with actual data siz e and prepare to send data
                generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
                sendPostFlag = true;  // Set flag to indicate data is ready to be sent
            }
        }

        // Immediately check the queue again to see if recording should stop
        if (xQueueReceive(xQueue, &shouldRecord, 0) == pdTRUE && !shouldRecord) {
            currentlyRecording = false;
            Serial.println("Stopping recording via queue message.");
            generate_wav_header(audioBuffer, audioBufferIndex - WAV_HEADER_SIZE, SAMPLE_RATE);
            sendPostFlag = true;  // Prepare to send data
        }

        // Use a short delay to yield to other tasks
        vTaskDelay(1 / portTICK_PERIOD_MS);
    } else {
        // If not recording, check less frequently
        vTaskDelay(10 / portTICK_PERIOD_MS);
    }

    // Check if the audio data is ready to be sent
    if (sendPostFlag) {
        send_audio_data(audioBuffer, audioBufferIndex);  // Send the recorded audio data
        audioBufferIndex = WAV_HEADER_SIZE;  // Reset index for the next recording
        sendPostFlag = false;  // Reset the flag
    }
}

}

String urlEncode(const String& str) {
String encodedString = "";
char c;
char code0;
char code1;
for (unsigned int i = 0; i < str.length(); i++) {
c = str.charAt(i);
if (c == ' ') {
encodedString += "%20"; // Change this from '+' to '%20'
} else if (isalnum(c)) {
encodedString += c;
} else {
code1 = (c & 0xf) + '0';
if ((c & 0xf) > 9) {
code1 = (c & 0xf) - 10 + 'A';
}
c = (c >> 4) & 0xf;
code0 = c + '0';
if (c > 9) {
code0 = c - 10 + 'A';
}
encodedString += '%';
encodedString += code0;
encodedString += code1;
}
}
return encodedString;
}

void send_audio_data(uint8_t *data, size_t length) {
if (WiFi.status() == WL_CONNECTED) {
if (!http.connected()) { // Only begin a new connection if not already connected
setup_i2s(I2S_MODE_TX);
http.begin("http://192.168.1.137:8000/api/text"); // Adjust this to your server's endpoint
http.addHeader("Content-Type", "audio/wav");
}

http.setTimeout(30000); // Set timeout for the request
Serial.println("Sending audio data...");
int httpResponseCode = http.POST(data, length);

 if (httpResponseCode > 0) {
  String response = http.getString(); // Read the response body
  Serial.println("Response received:");
  Serial.println(response); // Print the plain text response

  // URL-encode the received plain text response
  String encodedResponse = urlEncode(response);
  Serial.println("Encoded response: " + encodedResponse);

  // Append the encoded text as a query parameter to the api/live-audio endpoint URL
  String liveAudioUrl = "http://192.168.1.137:8000/api/live-audio?text=" + encodedResponse;
  Serial.println("Live Audio Url" + liveAudioUrl);

  // Use the modified URL to stream the audio response
  audio.connecttohost(liveAudioUrl.c_str());
  Serial.println("Streaming audio...");
} else {
  Serial.print("Error on sending POST: ");
  Serial.println(httpResponseCode);
}

}
}

void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
uint32_t file_size = wav_size + WAV_HEADER_SIZE - 8;
uint32_t byte_rate = SAMPLE_RATE * SAMPLE_BITS / 8;
const uint8_t set_wav_header[] = {
'R', 'I', 'F', 'F', // ChunkID
file_size, file_size >> 8, file_size >> 16, file_size >> 24, // ChunkSize
'W', 'A', 'V', 'E', // Format
'f', 'm', 't', ' ', // Subchunk1ID
0x10, 0x00, 0x00, 0x00, // Subchunk1Size (16 for PCM)
0x01, 0x00, // AudioFormat (1 for PCM)
0x01, 0x00, // NumChannels (1 channel)
sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24, // ByteRate
0x02, 0x00, // BlockAlign
0x10, 0x00, // BitsPerSample (16 bits)
'd', 'a', 't', 'a', // Subchunk2ID
wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24, // Subchunk2Size
};
memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Streaming audio/mp3 response from POST request #687

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 1 comment

Select a reply

Streaming audio/mp3 response from POST request #687

simon-archer Feb 21, 2024

Replies: 1 comment

simon-archer
Feb 21, 2024