Skip to content

Commit

Permalink
set params.audio.use_vad true
Browse files Browse the repository at this point in the history
  • Loading branch information
litongmacos committed Nov 25, 2023
1 parent 71f12d3 commit d7a3292
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 11 deletions.
2 changes: 1 addition & 1 deletion common/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ nlohmann::json get_result(whisper_context *ctx) {
int64_t t1 = whisper_full_get_segment_t1(ctx, i);
const char *sentence = whisper_full_get_segment_text(ctx, i);
auto result = std::to_string(t0) + "-->" + std::to_string(t1) + ":" + sentence + "\n";
//printf("%s: result:%s\n", get_current_time().c_str(), result.c_str());
// printf("%s: result:%s\n", get_current_time().c_str(), result.c_str());
segment["t0"] = to_timestamp(t0);
segment["t1"] = to_timestamp(t1);
segment["sentence"] = sentence;
Expand Down
2 changes: 1 addition & 1 deletion stream/stream_components_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace stream_components {
n_samples_keep = int32_t(1e-3 * keep_ms * WHISPER_SAMPLE_RATE);
n_samples_len = int32_t(1e-3 * length_ms * WHISPER_SAMPLE_RATE);
n_samples_30s = int32_t(1e-3 * 30000.0 * WHISPER_SAMPLE_RATE);
//use_vad = n_samples_step <= 0;
// use_vad = n_samples_step <= 0;
}
};

Expand Down
24 changes: 15 additions & 9 deletions whisper_server_base_on_uwebsockets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,9 @@ int main(int argc, char **argv) {
// std::cout << get_current_time().c_str() << ": Handling a message in thread: " << thread_id << std::endl;
nlohmann::json response;
if (opCode == uWS::OpCode::TEXT) {
// printf("%s: Received message on /paddlespeech/asr/streaming: %s\n", get_current_time().c_str(),std::string(message).c_str());
// process text message
printf("%s: Received message on /paddlespeech/asr/streaming: %s\n", get_current_time().c_str(),std::string(message).c_str());

try {
auto jsonMsg = nlohmann::json::parse(message);
std::string signal = jsonMsg["signal"];
Expand All @@ -134,12 +135,13 @@ int main(int argc, char **argv) {
wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
}
if (signal == "end") {
printf("%s end\n");
wavWriter.close();
// nlohmann::json response = {{"name",filename},{"signal", signal}};
response = {{"name", filename},
{"signal", signal}};
printf("%s:buffer size:%d\n",get_current_time().c_str(),audioBuffer.size());
bool isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
audioBuffer.clear();
if (isOk) {
final_results = get_result(whisperService.ctx);
response["result"] = final_results;
Expand All @@ -163,18 +165,22 @@ int main(int argc, char **argv) {
//write to file
wavWriter.write(pcm16.data(), size / 2);
//convert flost
for (int16_t sample: pcm16) {
float floatSample = static_cast<float>(sample);
audioBuffer.push_back(floatSample);
}
std::vector<float> temp(size / 2);
std::transform(pcm16.begin(), pcm16.end(), temp.begin(), [](int16_t sample) {
return static_cast<float>(sample) / 32768.0f;
});
//insert to audio_buffer
audioBuffer.insert(audioBuffer.end(), temp.begin(), temp.end());

printf("%s:buffer size:%d\n",get_current_time().c_str(),audioBuffer.size());
// 如果开启了VAD
bool isOk;
printf("%s: use_vad: %d\n", get_current_time().c_str(), params.audio.use_vad);
// printf("%s: use_vad: %d\n", get_current_time().c_str(), params.audio.use_vad);
if (params.audio.use_vad) {

bool is_active = ::vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold,
params.audio.freq_thold, false);
printf("%s: is_active: %d,is_last_active \n", get_current_time().c_str(), is_active, is_last_active);
printf("%s: is_active: %d,is_last_active %d\n", get_current_time().c_str(), is_active, is_last_active);
if (!is_active && is_last_active) {
is_last_active = false;
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
Expand All @@ -185,7 +191,7 @@ int main(int argc, char **argv) {
} else {
// asr
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
audioBuffer.clear();
// audioBuffer.clear();
}
printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
if (isOk) {
Expand Down

0 comments on commit d7a3292

Please sign in to comment.