This commit is contained in:
lyblsgo 2023-04-26 16:48:23 +08:00
parent 6ef5ccc784
commit 3c4227466e
10 changed files with 131 additions and 92 deletions

View File

@ -15,6 +15,7 @@
// model path
#define VAD_MODEL_PATH "vad-model"
#define VAD_CMVN_PATH "vad-cmvn"
#define VAD_CONFIG_PATH "vad-config"
#define AM_MODEL_PATH "am-model"
#define AM_CMVN_PATH "am-cmvn"
#define AM_CONFIG_PATH "am-config"
@ -25,29 +26,37 @@
#define THREAD_NUM "thread-num"
// vad
#ifndef VAD_SILENCE_DYRATION
#define VAD_SILENCE_DYRATION 15000
#ifndef VAD_SILENCE_DURATION
#define VAD_SILENCE_DURATION 800
#endif
#ifndef VAD_MAX_LEN
#define VAD_MAX_LEN 800
#define VAD_MAX_LEN 15000
#endif
#ifndef VAD_SPEECH_NOISE_THRES
#define VAD_SPEECH_NOISE_THRES 0.9
#endif
#ifndef VAD_LFR_M
#define VAD_LFR_M 5
#endif
#ifndef VAD_LFR_N
#define VAD_LFR_N 1
#endif
// punc
#define UNK_CHAR "<unk>"
#define TOKEN_LEN 20
#define TOKEN_LEN 20
#define CANDIDATE_NUM 6
#define CANDIDATE_NUM 6
#define UNKNOW_INDEX 0
#define NOTPUNC_INDEX 1
#define COMMA_INDEX 2
#define PERIOD_INDEX 3
#define QUESTION_INDEX 4
#define DUN_INDEX 5
#define CACHE_POP_TRIGGER_LIMIT 200
#define CACHE_POP_TRIGGER_LIMIT 200
#endif

View File

@ -153,8 +153,7 @@ int AudioFrame::GetLen()
int AudioFrame::Disp()
{
printf("not imp!!!!\n");
LOG(ERROR) << "Not imp!!!!";
return 0;
};
@ -187,8 +186,7 @@ Audio::~Audio()
void Audio::Disp()
{
printf("Audio time is %f s. len is %d\n", (float)speech_len / MODEL_SAMPLE_RATE,
speech_len);
LOG(INFO) << "Audio time is " << (float)speech_len / MODEL_SAMPLE_RATE << " s. len is " << speech_len;
}
float Audio::GetTimeLen()
@ -199,19 +197,15 @@ float Audio::GetTimeLen()
void Audio::WavResample(int32_t sampling_rate, const float *waveform,
int32_t n)
{
printf(
"Creating a resampler:\n"
" in_sample_rate: %d\n"
" output_sample_rate: %d\n",
sampling_rate, static_cast<int32_t>(MODEL_SAMPLE_RATE));
LOG(INFO) << "Creating a resampler:\n"
<< " in_sample_rate: "<< sampling_rate << "\n"
<< " output_sample_rate: " << static_cast<int32_t>(MODEL_SAMPLE_RATE);
float min_freq =
std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
//FIXME
//auto resampler = new LinearResample(
// sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
auto resampler = std::make_unique<LinearResample>(
sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width);
std::vector<float> samples;
@ -240,7 +234,7 @@ bool Audio::LoadWav(const char *filename, int32_t* sampling_rate)
std::ifstream is(filename, std::ifstream::binary);
is.read(reinterpret_cast<char *>(&header), sizeof(header));
if(!is){
fprintf(stderr, "Failed to read %s\n", filename);
LOG(ERROR) << "Failed to read " << filename;
return false;
}
@ -255,7 +249,7 @@ bool Audio::LoadWav(const char *filename, int32_t* sampling_rate)
memset(speech_buff, 0, sizeof(int16_t) * speech_len);
is.read(reinterpret_cast<char *>(speech_buff), header.subchunk2_size);
if (!is) {
fprintf(stderr, "Failed to read %s\n", filename);
LOG(ERROR) << "Failed to read " << filename;
return false;
}
speech_data = (float*)malloc(sizeof(float) * speech_len);
@ -386,6 +380,7 @@ bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate)
FILE* fp;
fp = fopen(filename, "rb");
if (fp == nullptr)
LOG(ERROR) << "Failed to read " << filename;
return false;
fseek(fp, 0, SEEK_END);
uint32_t n_file_len = ftell(fp);

View File

@ -40,7 +40,6 @@ void CTTransformer::InitPunc(const std::string &punc_model, const std::string &p
m_tokenizer.OpenYaml(punc_config.c_str());
}
CTTransformer::~CTTransformer()
{
}
@ -180,10 +179,9 @@ vector<int> CTTransformer::Infer(vector<int64_t> input_data)
}
catch (std::exception const &e)
{
printf(e.what());
LOG(ERROR) << "Error when run punc onnx forword: " << (e.what());
exit(0);
}
return punction;
}

View File

@ -6,40 +6,58 @@
#include <fstream>
#include "precomp.h"
void FsmnVad::InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
float vad_speech_noise_thres) {
void FsmnVad::InitVad(const std::string &vad_model, const std::string &vad_cmvn, const std::string &vad_config) {
session_options_.SetIntraOpNumThreads(1);
session_options_.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
session_options_.DisableCpuMemArena();
this->vad_sample_rate_ = vad_sample_rate;
this->vad_silence_duration_=vad_silence_duration;
this->vad_max_len_=vad_max_len;
this->vad_speech_noise_thres_=vad_speech_noise_thres;
ReadModel(vad_model);
ReadModel(vad_model.c_str());
LoadCmvn(vad_cmvn.c_str());
LoadConfigFromYaml(vad_config.c_str());
InitCache();
fbank_opts.frame_opts.dither = 0;
fbank_opts.mel_opts.num_bins = 80;
fbank_opts.frame_opts.samp_freq = vad_sample_rate;
fbank_opts.frame_opts.window_type = "hamming";
fbank_opts.frame_opts.frame_shift_ms = 10;
fbank_opts.frame_opts.frame_length_ms = 25;
fbank_opts.energy_floor = 0;
fbank_opts.mel_opts.debug_mel = false;
}
void FsmnVad::ReadModel(const std::string &vad_model) {
void FsmnVad::LoadConfigFromYaml(const char* filename){
YAML::Node config;
try{
config = YAML::LoadFile(filename);
}catch(exception const &e){
LOG(ERROR) << "Error loading file, yaml file error or not exist.";
exit(-1);
}
try{
YAML::Node frontend_conf = config["frontend_conf"];
YAML::Node post_conf = config["vad_post_conf"];
this->vad_sample_rate_ = frontend_conf["fs"].as<int>();
this->vad_silence_duration_ = post_conf["max_end_silence_time"].as<int>();
this->vad_max_len_ = post_conf["max_single_segment_time"].as<int>();
this->vad_speech_noise_thres_ = post_conf["speech_noise_thres"].as<double>();
fbank_opts.frame_opts.dither = frontend_conf["dither"].as<float>();
fbank_opts.mel_opts.num_bins = frontend_conf["n_mels"].as<int>();
fbank_opts.frame_opts.samp_freq = (float)vad_sample_rate_;
fbank_opts.frame_opts.window_type = frontend_conf["window"].as<string>();
fbank_opts.frame_opts.frame_shift_ms = frontend_conf["frame_shift"].as<float>();
fbank_opts.frame_opts.frame_length_ms = frontend_conf["frame_length"].as<float>();
fbank_opts.energy_floor = 0;
fbank_opts.mel_opts.debug_mel = false;
}catch(exception const &e){
LOG(ERROR) << "Error when load argument from vad config YAML.";
exit(-1);
}
}
void FsmnVad::ReadModel(const char* vad_model) {
try {
vad_session_ = std::make_shared<Ort::Session>(
env_, vad_model.c_str(), session_options_);
env_, vad_model, session_options_);
} catch (std::exception const &e) {
LOG(ERROR) << "Error when load vad onnx model: " << e.what();
exit(0);
}
LOG(INFO) << "vad onnx:";
GetInputOutputInfo(vad_session_, &vad_in_names_, &vad_out_names_);
}
@ -61,8 +79,8 @@ void FsmnVad::GetInputOutputInfo(
shape << j;
shape << " ";
}
LOG(INFO) << "\tInput " << i << " : name=" << name.get() << " type=" << type
<< " dims=" << shape.str();
// LOG(INFO) << "\tInput " << i << " : name=" << name.get() << " type=" << type
// << " dims=" << shape.str();
(*in_names)[i] = name.get();
name.release();
}
@ -80,8 +98,8 @@ void FsmnVad::GetInputOutputInfo(
shape << j;
shape << " ";
}
LOG(INFO) << "\tOutput " << i << " : name=" << name.get() << " type=" << type
<< " dims=" << shape.str();
// LOG(INFO) << "\tOutput " << i << " : name=" << name.get() << " type=" << type
// << " dims=" << shape.str();
(*out_names)[i] = name.get();
name.release();
}
@ -121,13 +139,12 @@ void FsmnVad::Forward(
// 4. Onnx infer
std::vector<Ort::Value> vad_ort_outputs;
try {
VLOG(3) << "Start infer";
vad_ort_outputs = vad_session_->Run(
Ort::RunOptions{nullptr}, vad_in_names_.data(), vad_inputs.data(),
vad_inputs.size(), vad_out_names_.data(), vad_out_names_.size());
} catch (std::exception const &e) {
LOG(ERROR) << e.what();
return;
LOG(ERROR) << "Error when run vad onnx forword: " << (e.what());
exit(0);
}
// 5. Change infer result to output shapes
@ -168,6 +185,10 @@ void FsmnVad::LoadCmvn(const char *filename)
try{
using namespace std;
ifstream cmvn_stream(filename);
if (!cmvn_stream.is_open()) {
LOG(ERROR) << "Failed to open file: " << filename;
exit(0);
}
string line;
while (getline(cmvn_stream, line)) {
@ -203,7 +224,7 @@ void FsmnVad::LoadCmvn(const char *filename)
}
}
std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats, int lfr_m, int lfr_n) {
std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats) {
std::vector<std::vector<float>> out_feats;
int T = vad_feats.size();
@ -250,7 +271,7 @@ FsmnVad::Infer(const std::vector<float> &waves) {
std::vector<std::vector<float>> vad_feats;
std::vector<std::vector<float>> vad_probs;
FbankKaldi(vad_sample_rate_, vad_feats, waves);
vad_feats = LfrCmvn(vad_feats, 5, 1);
vad_feats = LfrCmvn(vad_feats);
Forward(vad_feats, &vad_probs);
E2EVadModel vad_scorer = E2EVadModel();
@ -258,7 +279,6 @@ FsmnVad::Infer(const std::vector<float> &waves) {
vad_segments = vad_scorer(vad_probs, waves, true, false, vad_silence_duration_, vad_max_len_,
vad_speech_noise_thres_, vad_sample_rate_);
return vad_segments;
}
void FsmnVad::InitCache(){

View File

@ -18,15 +18,15 @@ class FsmnVad {
public:
FsmnVad();
void Test();
void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
float vad_speech_noise_thres);
void InitVad(const std::string &vad_model, const std::string &vad_cmvn, const std::string &vad_config);
std::vector<std::vector<int>> Infer(const std::vector<float> &waves);
void Reset();
private:
void ReadModel(const std::string &vad_model);
void ReadModel(const char* vad_model);
void LoadConfigFromYaml(const char* filename);
static void GetInputOutputInfo(
const std::shared_ptr<Ort::Session> &session,
@ -35,7 +35,7 @@ private:
void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
const std::vector<float> &waves);
std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats, int lfr_m, int lfr_n);
std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats);
void Forward(
const std::vector<std::vector<float>> &chunk_feats,
@ -54,10 +54,13 @@ private:
knf::FbankOptions fbank_opts;
std::vector<float> means_list;
std::vector<float> vars_list;
int vad_sample_rate_ = 16000;
int vad_silence_duration_ = 800;
int vad_max_len_ = 15000;
double vad_speech_noise_thres_ = 0.9;
int vad_sample_rate_ = MODEL_SAMPLE_RATE;
int vad_silence_duration_ = VAD_SILENCE_DURATION;
int vad_max_len_ = VAD_MAX_LEN;
double vad_speech_noise_thres_ = VAD_SPEECH_NOISE_THRES;
int lfr_m = VAD_LFR_M;
int lfr_n = VAD_LFR_N;
};

View File

@ -59,13 +59,13 @@ void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list,
if(result){
string msg = FunASRGetResult(result, 0);
printf("Thread: %d Result: %s \n", this_thread::get_id(), msg.c_str());
LOG(INFO) << "Thread: " << this_thread::get_id() <<" Result: " << msg.c_str();
float snippet_time = FunASRGetRetSnippetTime(result);
n_total_length += snippet_time;
FunASRFreeResult(result);
}else{
cout <<"No return data!";
LOG(ERROR) << ("No return data!\n");
}
}
{
@ -87,11 +87,13 @@ void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std:
int main(int argc, char *argv[])
{
//google::InitGoogleLogging(argv[0]);
google::InitGoogleLogging(argv[0]);
FLAGS_logtostderr = true;
TCLAP::CmdLine cmd("funasr-onnx-offline", ' ', "1.0");
TCLAP::CmdLine cmd("funasr-onnx-offline-rtf", ' ', "1.0");
TCLAP::ValueArg<std::string> vad_model("", VAD_MODEL_PATH, "vad model path", false, "", "string");
TCLAP::ValueArg<std::string> vad_cmvn("", VAD_CMVN_PATH, "vad cmvn path", false, "", "string");
TCLAP::ValueArg<std::string> vad_config("", VAD_CONFIG_PATH, "vad config path", false, "", "string");
TCLAP::ValueArg<std::string> am_model("", AM_MODEL_PATH, "am model path", false, "", "string");
TCLAP::ValueArg<std::string> am_cmvn("", AM_CMVN_PATH, "am cmvn path", false, "", "string");
@ -105,6 +107,7 @@ int main(int argc, char *argv[])
cmd.add(vad_model);
cmd.add(vad_cmvn);
cmd.add(vad_config);
cmd.add(am_model);
cmd.add(am_cmvn);
cmd.add(am_config);
@ -117,6 +120,7 @@ int main(int argc, char *argv[])
std::map<std::string, std::string> model_path;
GetValue(vad_model, VAD_MODEL_PATH, model_path);
GetValue(vad_cmvn, VAD_CMVN_PATH, model_path);
GetValue(vad_config, VAD_CONFIG_PATH, model_path);
GetValue(am_model, AM_MODEL_PATH, model_path);
GetValue(am_cmvn, AM_CMVN_PATH, model_path);
GetValue(am_config, AM_CONFIG_PATH, model_path);
@ -130,14 +134,14 @@ int main(int argc, char *argv[])
if (!asr_handle)
{
LOG(ERROR) << ("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
LOG(ERROR) << "FunASR init failed";
exit(-1);
}
gettimeofday(&end, NULL);
long seconds = (end.tv_sec - start.tv_sec);
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
printf("Model initialization takes %lfs.", (double)modle_init_micros / 1000000);
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
// read wav_scp
vector<string> wav_list;
@ -174,10 +178,10 @@ int main(int argc, char *argv[])
thread.join();
}
printf("total_time_wav %ld ms.\n", (long)(total_length * 1000));
printf("total_time_comput %ld ms.\n", total_time / 1000);
printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000));
printf("speedup %05lf .\n", 1.0/((double)total_time/ (total_length*1000000)));
LOG(INFO) << "total_time_wav " << (long)(total_length * 1000) << " ms";
LOG(INFO) << "total_time_comput " << total_time / 1000 << " ms";
LOG(INFO) << "total_rtf " << (double)total_time/ (total_length*1000000);
LOG(INFO) << "speedup " << 1.0/((double)total_time/ (total_length*1000000));
FunASRUninit(asr_handle);
return 0;

View File

@ -30,11 +30,13 @@ void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std:
int main(int argc, char *argv[])
{
//google::InitGoogleLogging(argv[0]);
google::InitGoogleLogging(argv[0]);
FLAGS_logtostderr = true;
TCLAP::CmdLine cmd("funasr-onnx-offline", ' ', "1.0");
TCLAP::ValueArg<std::string> vad_model("", VAD_MODEL_PATH, "vad model path", false, "", "string");
TCLAP::ValueArg<std::string> vad_cmvn("", VAD_CMVN_PATH, "vad cmvn path", false, "", "string");
TCLAP::ValueArg<std::string> vad_config("", VAD_CONFIG_PATH, "vad config path", false, "", "string");
TCLAP::ValueArg<std::string> am_model("", AM_MODEL_PATH, "am model path", false, "", "string");
TCLAP::ValueArg<std::string> am_cmvn("", AM_CMVN_PATH, "am cmvn path", false, "", "string");
@ -48,6 +50,7 @@ int main(int argc, char *argv[])
cmd.add(vad_model);
cmd.add(vad_cmvn);
cmd.add(vad_config);
cmd.add(am_model);
cmd.add(am_cmvn);
cmd.add(am_config);
@ -60,6 +63,7 @@ int main(int argc, char *argv[])
std::map<std::string, std::string> model_path;
GetValue(vad_model, VAD_MODEL_PATH, model_path);
GetValue(vad_cmvn, VAD_CMVN_PATH, model_path);
GetValue(vad_config, VAD_CONFIG_PATH, model_path);
GetValue(am_model, AM_MODEL_PATH, model_path);
GetValue(am_cmvn, AM_CMVN_PATH, model_path);
GetValue(am_config, AM_CONFIG_PATH, model_path);
@ -76,14 +80,14 @@ int main(int argc, char *argv[])
if (!asr_hanlde)
{
LOG(ERROR) << ("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
LOG(ERROR) << "FunASR init failed";
exit(-1);
}
gettimeofday(&end, NULL);
long seconds = (end.tv_sec - start.tv_sec);
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
printf("Model initialization takes %lfs.", (double)modle_init_micros / 1000000);
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
// read wav_path and wav_scp
vector<string> wav_list;
@ -94,7 +98,7 @@ int main(int argc, char *argv[])
if(model_path.find(WAV_SCP)!=model_path.end()){
ifstream in(model_path.at(WAV_SCP));
if (!in.is_open()) {
LOG(ERROR) << ("Failed to open file: %s", model_path.at(WAV_SCP));
LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ;
return 0;
}
string line;
@ -127,16 +131,14 @@ int main(int argc, char *argv[])
}
else
{
LOG(ERROR) << ("no return data!\n");
LOG(ERROR) << ("No return data!\n");
}
}
printf("Audio length %lfs.\n", (double)snippet_time);
printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000);
printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000));
LOG(INFO) << "Audio length: " << (double)snippet_time << " s";
LOG(INFO) << "Model inference takes: " << (double)taking_micros / 1000000 <<" s";
LOG(INFO) << "Model inference RTF: " << (double)taking_micros/ (snippet_time*1000000);
FunASRUninit(asr_hanlde);
return 0;
}

View File

@ -16,16 +16,18 @@ Paraformer::Paraformer(std::map<std::string, std::string>& model_path,int thread
use_vad = true;
string vad_model_path;
string vad_cmvn_path;
string vad_config_path;
try{
vad_model_path = model_path.at(VAD_MODEL_PATH);
vad_cmvn_path = model_path.at(VAD_CMVN_PATH);
vad_config_path = model_path.at(VAD_CONFIG_PATH);
}catch(const out_of_range& e){
LOG(ERROR) << "Error when read "<< VAD_CMVN_PATH <<" :" << e.what();
LOG(ERROR) << "Error when read "<< VAD_CMVN_PATH << " or " << VAD_CONFIG_PATH <<" :" << e.what();
exit(0);
}
vad_handle = make_unique<FsmnVad>();
vad_handle->InitVad(vad_model_path, vad_cmvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
vad_handle->InitVad(vad_model_path, vad_cmvn_path, vad_config_path);
}
// AM model

View File

@ -29,15 +29,20 @@ void CTokenizer::ReadYaml(const YAML::Node& node)
}
}
if (node.IsScalar()) {//<2F>DZ<EFBFBD><C7B1><EFBFBD><EFBFBD><EFBFBD>
cout << node.as<string>() << endl;
LOG(INFO) << node.as<string>();
}
}
bool CTokenizer::OpenYaml(const char* sz_yamlfile)
{
YAML::Node m_Config = YAML::LoadFile(sz_yamlfile);
if (m_Config.IsNull())
return false;
YAML::Node m_Config;
try{
m_Config = YAML::LoadFile(sz_yamlfile);
}catch(exception const &e){
LOG(INFO) << "Error loading file, yaml file error or not exist.";
exit(-1);
}
try
{
auto Tokens = m_Config["token_list"];
@ -66,7 +71,7 @@ bool CTokenizer::OpenYaml(const char* sz_yamlfile)
}
}
catch (YAML::BadFile& e) {
std::cout << "read error!" << std::endl;
LOG(ERROR) << "Read error!";
return false;
}
m_ready = true;

View File

@ -1,5 +1,6 @@
#include "vocab.h"
#include "yaml-cpp/yaml.h"
#include <yaml-cpp/yaml.h>
#include <glog/logging.h>
#include <fstream>
#include <iostream>
@ -22,8 +23,8 @@ void Vocab::LoadVocabFromYaml(const char* filename){
YAML::Node config;
try{
config = YAML::LoadFile(filename);
}catch(...){
printf("error loading file, yaml file error or not exist.\n");
}catch(exception const &e){
LOG(INFO) << "Error loading file, yaml file error or not exist.";
exit(-1);
}
YAML::Node myList = config["token_list"];