mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
rename src/e2e_vad.h
This commit is contained in:
parent
0535db1c65
commit
fa0356b81d
@ -4,7 +4,7 @@
|
||||
//#include "glog/logging.h"
|
||||
|
||||
|
||||
void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
|
||||
void FsmnVad::InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
|
||||
float vad_speech_noise_thres) {
|
||||
session_options_.SetIntraOpNumThreads(1);
|
||||
session_options_.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
|
||||
@ -14,9 +14,9 @@ void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn
|
||||
this->vad_max_len_=vad_max_len;
|
||||
this->vad_speech_noise_thres_=vad_speech_noise_thres;
|
||||
|
||||
read_model(vad_model);
|
||||
load_cmvn(vad_cmvn.c_str());
|
||||
init_cache();
|
||||
ReadModel(vad_model);
|
||||
LoadCmvn(vad_cmvn.c_str());
|
||||
InitCache();
|
||||
|
||||
fbank_opts.frame_opts.dither = 0;
|
||||
fbank_opts.mel_opts.num_bins = 80;
|
||||
@ -29,7 +29,7 @@ void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn
|
||||
|
||||
}
|
||||
|
||||
void FsmnVad::read_model(const std::string &vad_model) {
|
||||
void FsmnVad::ReadModel(const std::string &vad_model) {
|
||||
try {
|
||||
vad_session_ = std::make_shared<Ort::Session>(
|
||||
env_, vad_model.c_str(), session_options_);
|
||||
@ -148,7 +148,6 @@ void FsmnVad::Forward(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void FsmnVad::FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
|
||||
const std::vector<float> &waves) {
|
||||
knf::OnlineFbank fbank(fbank_opts);
|
||||
@ -162,7 +161,7 @@ void FsmnVad::FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad
|
||||
}
|
||||
}
|
||||
|
||||
void FsmnVad::load_cmvn(const char *filename)
|
||||
void FsmnVad::LoadCmvn(const char *filename)
|
||||
{
|
||||
using namespace std;
|
||||
ifstream cmvn_stream(filename);
|
||||
@ -240,7 +239,7 @@ std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>
|
||||
}
|
||||
|
||||
std::vector<std::vector<int>>
|
||||
FsmnVad::infer(const std::vector<float> &waves) {
|
||||
FsmnVad::Infer(const std::vector<float> &waves) {
|
||||
std::vector<std::vector<float>> vad_feats;
|
||||
std::vector<std::vector<float>> vad_probs;
|
||||
FbankKaldi(vad_sample_rate_, vad_feats, waves);
|
||||
@ -255,7 +254,7 @@ FsmnVad::infer(const std::vector<float> &waves) {
|
||||
|
||||
}
|
||||
|
||||
void FsmnVad::init_cache(){
|
||||
void FsmnVad::InitCache(){
|
||||
std::vector<float> cache_feats(128 * 19 * 1, 0);
|
||||
for (int i=0;i<4;i++){
|
||||
in_cache_.emplace_back(cache_feats);
|
||||
@ -264,13 +263,11 @@ void FsmnVad::init_cache(){
|
||||
|
||||
void FsmnVad::Reset(){
|
||||
in_cache_.clear();
|
||||
init_cache();
|
||||
InitCache();
|
||||
};
|
||||
|
||||
void FsmnVad::test() {
|
||||
|
||||
void FsmnVad::Test() {
|
||||
}
|
||||
|
||||
FsmnVad::FsmnVad():env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options_{} {
|
||||
|
||||
}
|
||||
|
||||
@ -7,16 +7,16 @@
|
||||
class FsmnVad {
|
||||
public:
|
||||
FsmnVad();
|
||||
void test();
|
||||
void init_vad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
|
||||
void Test();
|
||||
void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
|
||||
float vad_speech_noise_thres);
|
||||
|
||||
std::vector<std::vector<int>> infer(const std::vector<float> &waves);
|
||||
std::vector<std::vector<int>> Infer(const std::vector<float> &waves);
|
||||
void Reset();
|
||||
|
||||
private:
|
||||
|
||||
void read_model(const std::string &vad_model);
|
||||
void ReadModel(const std::string &vad_model);
|
||||
|
||||
static void GetInputOutputInfo(
|
||||
const std::shared_ptr<Ort::Session> &session,
|
||||
@ -31,8 +31,8 @@ private:
|
||||
const std::vector<std::vector<float>> &chunk_feats,
|
||||
std::vector<std::vector<float>> *out_prob);
|
||||
|
||||
void load_cmvn(const char *filename);
|
||||
void init_cache();
|
||||
void LoadCmvn(const char *filename);
|
||||
void InitCache();
|
||||
|
||||
std::shared_ptr<Ort::Session> vad_session_ = nullptr;
|
||||
Ort::Env env_;
|
||||
|
||||
@ -14,7 +14,7 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize, bool use_vad,
|
||||
string vad_path = pathAppend(path, "vad_model.onnx");
|
||||
string mvn_path = pathAppend(path, "vad.mvn");
|
||||
vadHandle = make_unique<FsmnVad>();
|
||||
vadHandle->init_vad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
|
||||
vadHandle->InitVad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
|
||||
}
|
||||
|
||||
// PUNC model
|
||||
@ -85,7 +85,7 @@ void ModelImp::reset()
|
||||
}
|
||||
|
||||
vector<std::vector<int>> ModelImp::vad_seg(std::vector<float>& pcm_data){
|
||||
return vadHandle->infer(pcm_data);
|
||||
return vadHandle->Infer(pcm_data);
|
||||
}
|
||||
|
||||
string ModelImp::AddPunc(const char* szInput){
|
||||
|
||||
@ -31,7 +31,7 @@ using namespace std;
|
||||
#include "tokenizer.h"
|
||||
#include "CT-transformer.h"
|
||||
#include "FsmnVad.h"
|
||||
#include "e2e_vad.h"
|
||||
#include "E2EVad.h"
|
||||
#include "Vocab.h"
|
||||
#include "CommonStruct.h"
|
||||
#include "Audio.h"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user