fix fsmn-vad

This commit is contained in:
雾聪 2023-09-01 16:58:22 +08:00
parent de6979c31d
commit beceb14fec
2 changed files with 33 additions and 17 deletions

View File

@ -279,12 +279,15 @@ std::vector<std::vector<int>>
FsmnVad::Infer(std::vector<float> &waves, bool input_finished) { FsmnVad::Infer(std::vector<float> &waves, bool input_finished) {
std::vector<std::vector<float>> vad_feats; std::vector<std::vector<float>> vad_feats;
std::vector<std::vector<float>> vad_probs; std::vector<std::vector<float>> vad_probs;
std::vector<std::vector<int>> vad_segments;
FbankKaldi(vad_sample_rate_, vad_feats, waves); FbankKaldi(vad_sample_rate_, vad_feats, waves);
if(vad_feats.size() == 0){
return vad_segments;
}
LfrCmvn(vad_feats); LfrCmvn(vad_feats);
Forward(vad_feats, &vad_probs, &in_cache_, input_finished); Forward(vad_feats, &vad_probs, &in_cache_, input_finished);
E2EVadModel vad_scorer = E2EVadModel(); E2EVadModel vad_scorer = E2EVadModel();
std::vector<std::vector<int>> vad_segments;
vad_segments = vad_scorer(vad_probs, waves, true, false, vad_silence_duration_, vad_max_len_, vad_segments = vad_scorer(vad_probs, waves, true, false, vad_silence_duration_, vad_max_len_,
vad_speech_noise_thres_, vad_sample_rate_); vad_speech_noise_thres_, vad_sample_rate_);
return vad_segments; return vad_segments;

View File

@ -224,12 +224,18 @@ extern "C" {
return nullptr; return nullptr;
funasr::Audio audio(1); funasr::Audio audio(1);
if(wav_format == "pcm" || wav_format == "PCM"){ try{
if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate)) if(wav_format == "pcm" || wav_format == "PCM"){
return nullptr; if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
}else{ return nullptr;
if (!audio.FfmpegLoad(sz_buf, n_len)) }else{
return nullptr; if (!audio.FfmpegLoad(sz_buf, n_len))
return nullptr;
}
}catch (std::exception const &e)
{
LOG(ERROR)<<e.what();
return nullptr;
} }
funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
@ -288,17 +294,24 @@ extern "C" {
return nullptr; return nullptr;
funasr::Audio audio(1); funasr::Audio audio(1);
if(funasr::is_target_file(sz_filename, "wav")){ try{
int32_t sampling_rate_ = -1; if(funasr::is_target_file(sz_filename, "wav")){
if(!audio.LoadWav(sz_filename, &sampling_rate_)) int32_t sampling_rate_ = -1;
return nullptr; if(!audio.LoadWav(sz_filename, &sampling_rate_))
}else if(funasr::is_target_file(sz_filename, "pcm")){ return nullptr;
if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) }else if(funasr::is_target_file(sz_filename, "pcm")){
return nullptr; if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
}else{ return nullptr;
if (!audio.FfmpegLoad(sz_filename)) }else{
return nullptr; if (!audio.FfmpegLoad(sz_filename))
return nullptr;
}
}catch (std::exception const &e)
{
LOG(ERROR)<<e.what();
return nullptr;
} }
funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
p_result->snippet_time = audio.GetTimeLen(); p_result->snippet_time = audio.GetTimeLen();
if(p_result->snippet_time == 0){ if(p_result->snippet_time == 0){