rename src/e2e_vad.h

This commit is contained in:
lyblsgo 2023-04-24 11:23:40 +08:00
parent 0535db1c65
commit fa0356b81d
5 changed files with 19 additions and 22 deletions

View File

@ -4,7 +4,7 @@
//#include "glog/logging.h"
void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
void FsmnVad::InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
float vad_speech_noise_thres) {
session_options_.SetIntraOpNumThreads(1);
session_options_.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
@ -14,9 +14,9 @@ void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn
this->vad_max_len_=vad_max_len;
this->vad_speech_noise_thres_=vad_speech_noise_thres;
read_model(vad_model);
load_cmvn(vad_cmvn.c_str());
init_cache();
ReadModel(vad_model);
LoadCmvn(vad_cmvn.c_str());
InitCache();
fbank_opts.frame_opts.dither = 0;
fbank_opts.mel_opts.num_bins = 80;
@ -29,7 +29,7 @@ void FsmnVad::init_vad(const std::string &vad_model, const std::string &vad_cmvn
}
void FsmnVad::read_model(const std::string &vad_model) {
void FsmnVad::ReadModel(const std::string &vad_model) {
try {
vad_session_ = std::make_shared<Ort::Session>(
env_, vad_model.c_str(), session_options_);
@ -148,7 +148,6 @@ void FsmnVad::Forward(
}
}
void FsmnVad::FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
const std::vector<float> &waves) {
knf::OnlineFbank fbank(fbank_opts);
@ -162,7 +161,7 @@ void FsmnVad::FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad
}
}
void FsmnVad::load_cmvn(const char *filename)
void FsmnVad::LoadCmvn(const char *filename)
{
using namespace std;
ifstream cmvn_stream(filename);
@ -240,7 +239,7 @@ std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>
}
std::vector<std::vector<int>>
FsmnVad::infer(const std::vector<float> &waves) {
FsmnVad::Infer(const std::vector<float> &waves) {
std::vector<std::vector<float>> vad_feats;
std::vector<std::vector<float>> vad_probs;
FbankKaldi(vad_sample_rate_, vad_feats, waves);
@ -255,7 +254,7 @@ FsmnVad::infer(const std::vector<float> &waves) {
}
void FsmnVad::init_cache(){
void FsmnVad::InitCache(){
std::vector<float> cache_feats(128 * 19 * 1, 0);
for (int i=0;i<4;i++){
in_cache_.emplace_back(cache_feats);
@ -264,13 +263,11 @@ void FsmnVad::init_cache(){
void FsmnVad::Reset(){
in_cache_.clear();
init_cache();
InitCache();
};
void FsmnVad::test() {
void FsmnVad::Test() {
}
FsmnVad::FsmnVad():env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options_{} {
}

View File

@ -7,16 +7,16 @@
class FsmnVad {
public:
FsmnVad();
void test();
void init_vad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
void Test();
void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
float vad_speech_noise_thres);
std::vector<std::vector<int>> infer(const std::vector<float> &waves);
std::vector<std::vector<int>> Infer(const std::vector<float> &waves);
void Reset();
private:
void read_model(const std::string &vad_model);
void ReadModel(const std::string &vad_model);
static void GetInputOutputInfo(
const std::shared_ptr<Ort::Session> &session,
@ -31,8 +31,8 @@ private:
const std::vector<std::vector<float>> &chunk_feats,
std::vector<std::vector<float>> *out_prob);
void load_cmvn(const char *filename);
void init_cache();
void LoadCmvn(const char *filename);
void InitCache();
std::shared_ptr<Ort::Session> vad_session_ = nullptr;
Ort::Env env_;

View File

@ -14,7 +14,7 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize, bool use_vad,
string vad_path = pathAppend(path, "vad_model.onnx");
string mvn_path = pathAppend(path, "vad.mvn");
vadHandle = make_unique<FsmnVad>();
vadHandle->init_vad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
vadHandle->InitVad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
}
// PUNC model
@ -85,7 +85,7 @@ void ModelImp::reset()
}
vector<std::vector<int>> ModelImp::vad_seg(std::vector<float>& pcm_data){
return vadHandle->infer(pcm_data);
return vadHandle->Infer(pcm_data);
}
string ModelImp::AddPunc(const char* szInput){

View File

@ -31,7 +31,7 @@ using namespace std;
#include "tokenizer.h"
#include "CT-transformer.h"
#include "FsmnVad.h"
#include "e2e_vad.h"
#include "E2EVad.h"
#include "Vocab.h"
#include "CommonStruct.h"
#include "Audio.h"