diff --git a/funasr/runtime/onnxruntime/src/e2e_vad.h b/funasr/runtime/onnxruntime/src/E2EVad.h similarity index 100% rename from funasr/runtime/onnxruntime/src/e2e_vad.h rename to funasr/runtime/onnxruntime/src/E2EVad.h diff --git a/funasr/runtime/onnxruntime/src/FsmnVad.cc b/funasr/runtime/onnxruntime/src/FsmnVad.cc index fb6b1680d..0f87cb2f1 100644 --- a/funasr/runtime/onnxruntime/src/FsmnVad.cc +++ b/funasr/runtime/onnxruntime/src/FsmnVad.cc @@ -4,7 +4,7 @@ //#include "glog/logging.h" -void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len, +void FsmnVad::InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len, float vad_speech_noise_thres) { session_options_.SetIntraOpNumThreads(1); session_options_.SetGraphOptimizationLevel(ORT_ENABLE_ALL); @@ -14,9 +14,9 @@ void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn this->vad_max_len_=vad_max_len; this->vad_speech_noise_thres_=vad_speech_noise_thres; - read_model(vad_model); - load_cmvn(vad_cmvn.c_str()); - init_cache(); + ReadModel(vad_model); + LoadCmvn(vad_cmvn.c_str()); + InitCache(); fbank_opts.frame_opts.dither = 0; fbank_opts.mel_opts.num_bins = 80; @@ -29,7 +29,7 @@ void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn } -void FsmnVad::read_model(const std::string &vad_model) { +void FsmnVad::ReadModel(const std::string &vad_model) { try { vad_session_ = std::make_shared( env_, vad_model.c_str(), session_options_); @@ -148,7 +148,6 @@ void FsmnVad::Forward( } } - void FsmnVad::FbankKaldi(float sample_rate, std::vector> &vad_feats, const std::vector &waves) { knf::OnlineFbank fbank(fbank_opts); @@ -162,7 +161,7 @@ void FsmnVad::FbankKaldi(float sample_rate, std::vector> &vad } } -void FsmnVad::load_cmvn(const char *filename) +void FsmnVad::LoadCmvn(const char *filename) { using namespace std; ifstream cmvn_stream(filename); @@ -240,7 +239,7 @@ std::vector> &FsmnVad::LfrCmvn(std::vector } std::vector> -FsmnVad::infer(const std::vector &waves) { +FsmnVad::Infer(const std::vector &waves) { std::vector> vad_feats; std::vector> vad_probs; FbankKaldi(vad_sample_rate_, vad_feats, waves); @@ -255,7 +254,7 @@ FsmnVad::infer(const std::vector &waves) { } -void FsmnVad::init_cache(){ +void FsmnVad::InitCache(){ std::vector cache_feats(128 * 19 * 1, 0); for (int i=0;i<4;i++){ in_cache_.emplace_back(cache_feats); @@ -264,13 +263,11 @@ void FsmnVad::init_cache(){ void FsmnVad::Reset(){ in_cache_.clear(); - init_cache(); + InitCache(); }; -void FsmnVad::test() { - +void FsmnVad::Test() { } FsmnVad::FsmnVad():env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options_{} { - } diff --git a/funasr/runtime/onnxruntime/src/FsmnVad.h b/funasr/runtime/onnxruntime/src/FsmnVad.h index 1fd6a2129..874460418 100644 --- a/funasr/runtime/onnxruntime/src/FsmnVad.h +++ b/funasr/runtime/onnxruntime/src/FsmnVad.h @@ -7,16 +7,16 @@ class FsmnVad { public: FsmnVad(); - void test(); - void init_vad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len, + void Test(); + void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len, float vad_speech_noise_thres); - std::vector> infer(const std::vector &waves); + std::vector> Infer(const std::vector &waves); void Reset(); private: - void read_model(const std::string &vad_model); + void ReadModel(const std::string &vad_model); static void GetInputOutputInfo( const std::shared_ptr &session, @@ -31,8 +31,8 @@ private: const std::vector> &chunk_feats, std::vector> *out_prob); - void load_cmvn(const char *filename); - void init_cache(); + void LoadCmvn(const char *filename); + void InitCache(); std::shared_ptr vad_session_ = nullptr; Ort::Env env_; diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp index 289eab1c0..2dd7439e7 100644 --- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp +++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp @@ -14,7 +14,7 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize, bool use_vad, string vad_path = pathAppend(path, "vad_model.onnx"); string mvn_path = pathAppend(path, "vad.mvn"); vadHandle = make_unique(); - vadHandle->init_vad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES); + vadHandle->InitVad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES); } // PUNC model @@ -85,7 +85,7 @@ void ModelImp::reset() } vector> ModelImp::vad_seg(std::vector& pcm_data){ - return vadHandle->infer(pcm_data); + return vadHandle->Infer(pcm_data); } string ModelImp::AddPunc(const char* szInput){ diff --git a/funasr/runtime/onnxruntime/src/precomp.h b/funasr/runtime/onnxruntime/src/precomp.h index 7bfa1a695..fbef174cf 100644 --- a/funasr/runtime/onnxruntime/src/precomp.h +++ b/funasr/runtime/onnxruntime/src/precomp.h @@ -31,7 +31,7 @@ using namespace std; #include "tokenizer.h" #include "CT-transformer.h" #include "FsmnVad.h" -#include "e2e_vad.h" +#include "E2EVad.h" #include "Vocab.h" #include "CommonStruct.h" #include "Audio.h"