Skip to content

Commit

Permalink
fix write file error
Browse files Browse the repository at this point in the history
  • Loading branch information
litongmacos committed Nov 25, 2023
1 parent 089924c commit 1ad886d
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 42 deletions.
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ else ()
target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE libuv::uv)
endif ()

target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE whisper ZLIB::ZLIB ${USOCKETS_LIBRARY} SampleRate::samplerate)


target_link_libraries(whisper_server_base_on_uwebsockets PRIVATE whisper ZLIB::ZLIB ${USOCKETS_LIBRARY}
SampleRate::samplerate ${SPEEXDSP_LIBRARY})
# 链接头文件
target_include_directories(whisper_server_base_on_uwebsockets PRIVATE ${SPEEXDSP_INCLUDE_DIRS})

1 change: 0 additions & 1 deletion examples/audio_vad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ int main() {
std::vector<float> audio_buffer;
int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧
SpeexPreprocessState *st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE);

int vad = 1;
speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad);

Expand Down
2 changes: 1 addition & 1 deletion examples/simplest.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "../common/common.h"

#include "whisper.h"
#include "common/utils.h"
#include "../common/utils.h"

#include <cmath>
#include <cstdio>
Expand Down
2 changes: 1 addition & 1 deletion handler/inference_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "../params/whisper_params.h"
#include "../nlohmann/json.hpp"
#include "../common/utils.h"
#include "common/common-m4a.h"
#include "../common/common-m4a.h"

using json = nlohmann::json;

Expand Down
82 changes: 46 additions & 36 deletions whisper_server_base_on_uwebsockets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
#include <string>
#include <whisper.h>
#include <sstream>

#include <speex/speex_preprocess.h>
using namespace stream_components;

bool processAudio(WhisperService service, std::vector<float> pcm32, const whisper_local_stream_params &params);

int main(int argc, char **argv) {
// Read parameters...
whisper_local_stream_params params;
Expand All @@ -39,6 +37,8 @@ int main(int argc, char **argv) {
stream_components::WhisperService whisperService(params.service, params.audio, cparams);

const int port = 8090;
std::mutex whisper_mutex;


// started handler
auto started_handler = [](auto *token) {
Expand All @@ -65,6 +65,7 @@ int main(int argc, char **argv) {
thread_local wav_writer wavWriter;
thread_local std::string filename;


nlohmann::json response;
if (opCode == uWS::OpCode::TEXT) {
// printf("%s: Received message on /streaming/save: %s\n", get_current_time().c_str(),std::string(message).c_str());
Expand Down Expand Up @@ -93,7 +94,7 @@ int main(int argc, char **argv) {
// process binary message(PCM16 data)
auto size = message.size();
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
// printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
// add received PCM16 to audio cache
std::vector<int16_t> pcm16(size / 2);
std::memcpy(pcm16.data(), data, size);
Expand All @@ -104,14 +105,17 @@ int main(int argc, char **argv) {
};

// WebSocket /paddlespeech/asr/streaming handler
auto ws_streaming_handler = [&whisperService, &params](auto *ws, std::string_view message, uWS::OpCode opCode) {
auto ws_streaming_handler = [&whisperService, &params, &whisper_mutex](auto *ws, std::string_view message, uWS::OpCode opCode) {
thread_local std::vector<float> audioBuffer; //thread-localized variable
thread_local wav_writer wavWriter;
thread_local std::string filename;
thread_local bool is_last_active = false;
thread_local bool last_is_speech = false;
thread_local int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧
thread_local SpeexPreprocessState *st;

//std::unique_ptr<nlohmann::json> results(new nlohmann::json(nlohmann::json::array()));
thread_local nlohmann::json final_results;
auto thread_id = std::this_thread::get_id();
// auto thread_id = std::this_thread::get_id();
// std::cout << get_current_time().c_str() << ": Handling a message in thread: " << thread_id << std::endl;
nlohmann::json response;
if (opCode == uWS::OpCode::TEXT) {
Expand All @@ -122,45 +126,50 @@ int main(int argc, char **argv) {
auto jsonMsg = nlohmann::json::parse(message);
std::string signal = jsonMsg["signal"];
if (signal == "start") {
printf("%s start\n",get_current_time().c_str());

if (jsonMsg["name"].is_string()) {
filename = jsonMsg["name"];
} else {
filename = std::to_string(get_current_time_millis()) + ".wav";
}
final_results = nlohmann::json(nlohmann::json::array());
// 发送服务器准备好的消息
response = {{"status", "ok"},
{"signal", "server_ready"}};
ws->send(response.dump(), uWS::OpCode::TEXT);
wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE);
int vad = 1;
speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad);

}
if (signal == "end") {
printf("%s end\n");
wavWriter.close();
printf("%s end\n",get_current_time().c_str());
// nlohmann::json response = {{"name",filename},{"signal", signal}};
response = {{"name", filename},
{"signal", signal}};
printf("%s:buffer size:%d\n",get_current_time().c_str(),audioBuffer.size());
printf("%s:buffer size:%lu\n",get_current_time().c_str(),audioBuffer.size());
bool isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
if (isOk) {
final_results = get_result(whisperService.ctx);
response["result"] = final_results;
}
ws->send(response.dump(), uWS::OpCode::TEXT);
wavWriter.close();
speex_preprocess_state_destroy(st);
}
// other process logic...
} catch (const std::exception &e) {
std::cerr << "JSON parse error: " << e.what() << std::endl;
auto size = message.size();
}
} else if (opCode == uWS::OpCode::BINARY) {
int size = message.size();
// process binary message(PCM16 data)
auto size = message.size();
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
// add received PCM16 to audio cache
std::vector<int16_t> pcm16(size / 2);

std::memcpy(pcm16.data(), data, size);
//write to file
wavWriter.write(pcm16.data(), size / 2);
Expand All @@ -172,28 +181,41 @@ int main(int argc, char **argv) {
//insert to audio_buffer
audioBuffer.insert(audioBuffer.end(), temp.begin(), temp.end());

printf("%s:buffer size:%d\n",get_current_time().c_str(),audioBuffer.size());
// printf("%s:buffer size:% ld\n",get_current_time().c_str(),audioBuffer.size());
// 如果开启了VAD
bool isOk;
// printf("%s: use_vad: %d\n", get_current_time().c_str(), params.audio.use_vad);
if (params.audio.use_vad) {
whisper_mutex.lock();
for (size_t i = 0; i < pcm16.size(); i += chunk_size) {
spx_int16_t frame[chunk_size];
for (int j = 0; j < chunk_size; ++j) {
if (i + j < pcm16.size()) {
frame[j] = (spx_int16_t)(pcm16[i + j]);
} else {
frame[j] = 0; // 对于超出范围的部分填充 0
}
}
int is_speech = speex_preprocess_run(st, frame);

// printf("%s: is_active: %d,is_last_active %d\n", get_current_time().c_str(), is_speech, last_is_speech);
if (!is_speech && last_is_speech) {
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
audioBuffer.clear();
break;
}
last_is_speech = is_speech != 0;

bool is_active = ::vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold,
params.audio.freq_thold, false);
printf("%s: is_active: %d,is_last_active %d\n", get_current_time().c_str(), is_active, is_last_active);
if (!is_active && is_last_active) {
is_last_active = false;
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
audioBuffer.clear();
} else {
is_last_active = is_active;
}
whisper_mutex.unlock();
} else {
// asr
whisper_mutex.lock();
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
audioBuffer.clear();
whisper_mutex.unlock();
}
printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
// printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
if (isOk) {
final_results = get_result(whisperService.ctx);
response["result"] = final_results;
Expand Down Expand Up @@ -221,18 +243,6 @@ int main(int argc, char **argv) {
.listen(port, started_handler).run();
}

bool processAudio(WhisperService whisperService, std::vector<float> pcm32, const whisper_local_stream_params &params) {
if (params.audio.use_vad) {
// printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
// TODO: 实现VAD处理,
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
return whisperService.process(pcm32.data(), pcm32.size());
} else {
// asr
return whisperService.process(pcm32.data(), pcm32.size());
}
}




0 comments on commit 1ad886d

Please sign in to comment.