Skip to content

Commit

Permalink
add audio_vad on macos
Browse files Browse the repository at this point in the history
  • Loading branch information
litongmacos committed Nov 25, 2023
1 parent d7a3292 commit 089924c
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 26 deletions.
23 changes: 11 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,10 @@ include_directories(${SDL2_INCLUDE_DIRS})

find_package(SampleRate CONFIG REQUIRED)
find_package(FFMPEG REQUIRED)
# webrtc
include_directories(webrtc)
include_directories(.)
# find cpp files
file(GLOB VAD_FILES simplevad/*.c simplevad/*.h
webrtc/common_audio/*/*.c webrtc/rtc_base/*.c*)

add_executable(audio_vad examples/audio_vad.cpp ${VAD_FILES})
target_link_libraries(audio_vad pthread)

# 查找 SpeexDSP 库
find_library(SPEEXDSP_LIBRARY NAMES speexdsp)
# 查找头文件
find_path(SPEEXDSP_INCLUDE_DIRS "speex/speex_preprocess.h")

# Detecting Operating Systems
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
Expand All @@ -48,12 +42,17 @@ elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
link_directories(E:\\code\\cpp\\project-ping\\whisper.cpp\\cmake-build-release\\bin)
endif ()

add_executable(audio_vad examples/audio_vad.cpp common/common.cpp
stream/stream_components_service.cpp common/utils.cpp)
target_link_libraries(audio_vad PRIVATE whisper SampleRate::samplerate ${SPEEXDSP_LIBRARY})
# 链接头文件
target_include_directories(audio_vad PRIVATE ${SPEEXDSP_INCLUDE_DIRS})

add_executable(sdl_version examples/sdl_version.cpp)
target_link_libraries(sdl_version ${SDL2_LIBRARIES})
target_link_libraries(sdl_version PRIVATE ${SDL2_LIBRARIES})

add_executable(simplest examples/simplest.cpp common/common.cpp common/utils.cpp)
target_link_libraries(simplest whisper SampleRate::samplerate)
target_link_libraries(simplest PRIVATE whisper SampleRate::samplerate)

add_executable(stream_local examples/stream_local.cpp common/common.cpp common/common-sdl.cpp common/utils.cpp
stream/stream_components_service.cpp stream/stream_components_audio.cpp
Expand Down
2 changes: 1 addition & 1 deletion common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#define DR_MP3_IMPLEMENTATION

#include "dr_libs/dr_mp3.h"
#include "../dr_libs/dr_mp3.h"
#include <samplerate.h>
#include <cmath>
#include <cstring>
Expand Down
77 changes: 65 additions & 12 deletions examples/audio_vad.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,68 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <filesystem>

int main(int argc, char **argv) {
//default cmake-build-debug/main
const char filename[] = "../pcm/16k_1.pcm";
const char output_dir[] = "output_pcm";
const char output_filename_prefix[] = "16k_1.pcm";
if (!std::filesystem::exists(output_dir)) {
std::filesystem::create_directories(output_dir);
#include <vector>
#include <cstdint>
#include <whisper.h>

#include "../stream/stream_components_service.h"
#include "../stream/stream_components.h"
#include "../common/utils.h"
#include "../common/common.h"
#include <speex/speex_preprocess.h>

using namespace stream_components;


int main() {
std::string wav_file_path = "../samples/jfk.wav"; // 替换为您的 WAV 文件路径
// audio arrays
std::vector<float> pcmf32; // mono-channel F32 PCM
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
::read_wav(wav_file_path, pcmf32, pcmf32s, false);

printf("size of samples %lu\n", pcmf32.size());


whisper_local_stream_params params;
struct whisper_context_params cparams{};
cparams.use_gpu = params.service.use_gpu;
//Instantiate the service
stream_components::WhisperService whisperService(params.service, params.audio, cparams);

//Simulate websokcet by adding 1500 data each time.
std::vector<float> audio_buffer;
int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧
SpeexPreprocessState *st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE);

int vad = 1;
speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad);

bool last_is_speech = false;
// 处理音频帧
for (size_t i = 0; i < pcmf32.size(); i += chunk_size) {
spx_int16_t frame[chunk_size];
for (int j = 0; j < chunk_size; ++j) {
if (i + j < pcmf32.size()) {
frame[j] = (spx_int16_t)(pcmf32[i + j] * 32768);
} else {
frame[j] = 0; // 对于超出范围的部分填充 0
}
}
int is_speech = speex_preprocess_run(st, frame);

// 将当前帧添加到 audio_buffer
audio_buffer.insert(audio_buffer.end(), pcmf32.begin() + i, pcmf32.begin() + std::min(i + chunk_size, pcmf32.size()));
printf("is_speech %d \n",is_speech);
if (!is_speech && last_is_speech) {
bool b = whisperService.process(pcmf32.data(), pcmf32.size());
const nlohmann::json &json_array = get_result(whisperService.ctx);
const std::basic_string<char, std::char_traits<char>, std::allocator<char>> &string = json_array.dump();
printf("%s\n",string.c_str());
return 0;
audio_buffer.clear();
}

last_is_speech = is_speech != 0;
}

speex_preprocess_state_destroy(st);
}
3 changes: 3 additions & 0 deletions vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,8 @@
}, {
"name" : "ffmpeg",
"version>=" : "6.1"
}, {
"name" : "speexdsp",
"version>=" : "1.2.1#1"
} ]
}
2 changes: 1 addition & 1 deletion whisper_server_base_on_uwebsockets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ int main(int argc, char **argv) {
} else {
// asr
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
// audioBuffer.clear();
audioBuffer.clear();
}
printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
if (isOk) {
Expand Down

0 comments on commit 089924c

Please sign in to comment.