-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
litongmacos
committed
Nov 25, 2023
1 parent
d7a3292
commit 089924c
Showing
5 changed files
with
81 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,68 @@ | ||
#include <cstdio> | ||
#include <cstdlib> | ||
#include <iostream> | ||
#include <memory> | ||
#include <filesystem> | ||
|
||
int main(int argc, char **argv) { | ||
//default cmake-build-debug/main | ||
const char filename[] = "../pcm/16k_1.pcm"; | ||
const char output_dir[] = "output_pcm"; | ||
const char output_filename_prefix[] = "16k_1.pcm"; | ||
if (!std::filesystem::exists(output_dir)) { | ||
std::filesystem::create_directories(output_dir); | ||
#include <vector> | ||
#include <cstdint> | ||
#include <whisper.h> | ||
|
||
#include "../stream/stream_components_service.h" | ||
#include "../stream/stream_components.h" | ||
#include "../common/utils.h" | ||
#include "../common/common.h" | ||
#include <speex/speex_preprocess.h> | ||
|
||
using namespace stream_components; | ||
|
||
|
||
int main() { | ||
std::string wav_file_path = "../samples/jfk.wav"; // 替换为您的 WAV 文件路径 | ||
// audio arrays | ||
std::vector<float> pcmf32; // mono-channel F32 PCM | ||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM | ||
::read_wav(wav_file_path, pcmf32, pcmf32s, false); | ||
|
||
printf("size of samples %lu\n", pcmf32.size()); | ||
|
||
|
||
whisper_local_stream_params params; | ||
struct whisper_context_params cparams{}; | ||
cparams.use_gpu = params.service.use_gpu; | ||
//Instantiate the service | ||
stream_components::WhisperService whisperService(params.service, params.audio, cparams); | ||
|
||
//Simulate websokcet by adding 1500 data each time. | ||
std::vector<float> audio_buffer; | ||
int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧 | ||
SpeexPreprocessState *st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE); | ||
|
||
int vad = 1; | ||
speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad); | ||
|
||
bool last_is_speech = false; | ||
// 处理音频帧 | ||
for (size_t i = 0; i < pcmf32.size(); i += chunk_size) { | ||
spx_int16_t frame[chunk_size]; | ||
for (int j = 0; j < chunk_size; ++j) { | ||
if (i + j < pcmf32.size()) { | ||
frame[j] = (spx_int16_t)(pcmf32[i + j] * 32768); | ||
} else { | ||
frame[j] = 0; // 对于超出范围的部分填充 0 | ||
} | ||
} | ||
int is_speech = speex_preprocess_run(st, frame); | ||
|
||
// 将当前帧添加到 audio_buffer | ||
audio_buffer.insert(audio_buffer.end(), pcmf32.begin() + i, pcmf32.begin() + std::min(i + chunk_size, pcmf32.size())); | ||
printf("is_speech %d \n",is_speech); | ||
if (!is_speech && last_is_speech) { | ||
bool b = whisperService.process(pcmf32.data(), pcmf32.size()); | ||
const nlohmann::json &json_array = get_result(whisperService.ctx); | ||
const std::basic_string<char, std::char_traits<char>, std::allocator<char>> &string = json_array.dump(); | ||
printf("%s\n",string.c_str()); | ||
return 0; | ||
audio_buffer.clear(); | ||
} | ||
|
||
last_is_speech = is_speech != 0; | ||
} | ||
|
||
speex_preprocess_state_destroy(st); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,5 +16,8 @@ | |
}, { | ||
"name" : "ffmpeg", | ||
"version>=" : "6.1" | ||
}, { | ||
"name" : "speexdsp", | ||
"version>=" : "1.2.1#1" | ||
} ] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters