mirror of
https://github.com/ggml-org/whisper.cpp.git
synced 2025-09-15 13:28:35 +08:00
Some checks failed
Bindings Tests (Ruby) / ubuntu-22 (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main-cuda.Dockerfile platform:linux/amd64 tag:main-cuda]) (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main-intel.Dockerfile platform:linux/amd64 tag:main-intel]) (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main-musa.Dockerfile platform:linux/amd64 tag:main-musa]) (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main.Dockerfile platform:linux/amd64 tag:main]) (push) Has been cancelled
Examples WASM / deploy-wasm-github-pages (push) Has been cancelled
This commit modifies the test-vad and test-vad-full tests to use CMake definitions for the model and sample paths. The motivation for this is that currently the tests use relative paths which might not always be correct depending on the working directory. With the changes in this commit the tests can be run usins ctest: ```console $ ctest -R ^test-vad$ --test-dir build ``` Or directly (which is not currently possible without this fix): ``` ./build/bin/test-vad ``` Resolves: https://github.com/ggml-org/whisper.cpp/issues/3404
84 lines
2.6 KiB
C++
84 lines
2.6 KiB
C++
#include "whisper.h"
|
|
#include "common-whisper.h"
|
|
|
|
#include <cstdio>
|
|
#include <string>
|
|
|
|
#ifdef NDEBUG
|
|
#undef NDEBUG
|
|
#endif
|
|
#include <cassert>
|
|
|
|
void assert_default_params(const struct whisper_vad_params & params) {
|
|
assert(params.threshold == 0.5);
|
|
assert(params.min_speech_duration_ms == 250);
|
|
assert(params.min_silence_duration_ms == 100);
|
|
assert(params.samples_overlap == 0.1f);
|
|
}
|
|
|
|
void assert_default_context_params(const struct whisper_vad_context_params & params) {
|
|
assert(params.n_threads == 4);
|
|
assert(params.use_gpu == false);
|
|
assert(params.gpu_device == 0);
|
|
}
|
|
|
|
void test_detect_speech(
|
|
struct whisper_vad_context * vctx,
|
|
struct whisper_vad_params params,
|
|
const float * pcmf32,
|
|
int n_samples) {
|
|
assert(whisper_vad_detect_speech(vctx, pcmf32, n_samples));
|
|
assert(whisper_vad_n_probs(vctx) == 344);
|
|
assert(whisper_vad_probs(vctx) != nullptr);
|
|
}
|
|
|
|
struct whisper_vad_segments * test_detect_timestamps(
|
|
struct whisper_vad_context * vctx,
|
|
struct whisper_vad_params params) {
|
|
struct whisper_vad_segments * timestamps = whisper_vad_segments_from_probs(vctx, params);
|
|
assert(whisper_vad_segments_n_segments(timestamps) == 5);
|
|
|
|
for (int i = 0; i < whisper_vad_segments_n_segments(timestamps); ++i) {
|
|
printf("VAD segment %d: start = %.2f, end = %.2f\n", i,
|
|
whisper_vad_segments_get_segment_t0(timestamps, i),
|
|
whisper_vad_segments_get_segment_t1(timestamps, i));
|
|
}
|
|
|
|
return timestamps;
|
|
}
|
|
|
|
int main() {
|
|
std::string vad_model_path = VAD_MODEL_PATH;
|
|
std::string sample_path = SAMPLE_PATH;
|
|
|
|
// Load the sample audio file
|
|
std::vector<float> pcmf32;
|
|
std::vector<std::vector<float>> pcmf32s;
|
|
assert(read_audio_data(sample_path.c_str(), pcmf32, pcmf32s, false));
|
|
assert(pcmf32.size() > 0);
|
|
assert(pcmf32s.size() == 0); // no stereo vector
|
|
|
|
// Load the VAD model
|
|
struct whisper_vad_context_params ctx_params = whisper_vad_default_context_params();
|
|
assert_default_context_params(ctx_params);
|
|
|
|
struct whisper_vad_context * vctx = whisper_vad_init_from_file_with_params(
|
|
vad_model_path.c_str(),
|
|
ctx_params);
|
|
assert(vctx != nullptr);
|
|
|
|
struct whisper_vad_params params = whisper_vad_default_params();
|
|
assert_default_params(params);
|
|
|
|
// Test speech probabilites
|
|
test_detect_speech(vctx, params, pcmf32.data(), pcmf32.size());
|
|
|
|
// Test speech timestamps (uses speech probabilities from above)
|
|
struct whisper_vad_segments * timestamps = test_detect_timestamps(vctx, params);
|
|
|
|
whisper_vad_free_segments(timestamps);
|
|
whisper_vad_free(vctx);
|
|
|
|
return 0;
|
|
}
|