rename executable file;rm some unnecessary deps

This commit is contained in:
lyblsgo 2023-04-21 17:12:10 +08:00
parent 73e410afb9
commit 716e3fe512
16 changed files with 51 additions and 346 deletions

View File

@ -30,4 +30,3 @@ include_directories(${PROJECT_SOURCE_DIR}/kaldi-native-fbank)
add_subdirectory("./third_party/yaml-cpp")
add_subdirectory(kaldi-native-fbank/kaldi-native-fbank/csrc)
add_subdirectory(src)
add_subdirectory(tester)

View File

@ -2,14 +2,10 @@
#ifndef AUDIO_H
#define AUDIO_H
#include <ComDefine.h>
#include <queue>
#include <stdint.h>
#include "Model.h"
#ifndef model_sample_rate
#define model_sample_rate 16000
#endif
#ifndef WAV_HEADER_SIZE
#define WAV_HEADER_SIZE 44
#endif

View File

@ -8,4 +8,21 @@
#define S_ALL 3
#define S_ERR 4
#ifndef MODEL_SAMPLE_RATE
#define MODEL_SAMPLE_RATE 16000
#endif
#ifndef VAD_SILENCE_DYRATION
#define VAD_SILENCE_DYRATION 15000
#endif
#ifndef VAD_MAX_LEN
#define VAD_MAX_LEN 800
#endif
#ifndef VAD_SPEECH_NOISE_THRES
#define VAD_SPEECH_NOISE_THRES 0.9
#endif
#endif

View File

@ -187,13 +187,13 @@ Audio::~Audio()
void Audio::disp()
{
printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate,
printf("Audio time is %f s. len is %d\n", (float)speech_len / MODEL_SAMPLE_RATE,
speech_len);
}
float Audio::get_time_len()
{
return (float)speech_len / model_sample_rate;
return (float)speech_len / MODEL_SAMPLE_RATE;
}
void Audio::wavResample(int32_t sampling_rate, const float *waveform,
@ -203,9 +203,9 @@ void Audio::wavResample(int32_t sampling_rate, const float *waveform,
"Creating a resampler:\n"
" in_sample_rate: %d\n"
" output_sample_rate: %d\n",
sampling_rate, static_cast<int32_t>(model_sample_rate));
sampling_rate, static_cast<int32_t>(MODEL_SAMPLE_RATE));
float min_freq =
std::min<int32_t>(sampling_rate, model_sample_rate);
std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
@ -213,7 +213,7 @@ void Audio::wavResample(int32_t sampling_rate, const float *waveform,
//auto resampler = new LinearResample(
// sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
auto resampler = std::make_unique<LinearResample>(
sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width);
std::vector<float> samples;
resampler->Resample(waveform, n, true, &samples);
//reset speech_data
@ -270,7 +270,7 @@ bool Audio::loadwav(const char *filename, int32_t* sampling_rate)
}
//resample
if(*sampling_rate != model_sample_rate){
if(*sampling_rate != MODEL_SAMPLE_RATE){
wavResample(*sampling_rate, speech_data, speech_len);
}
@ -317,7 +317,7 @@ bool Audio::loadwav(const char* buf, int nFileLen, int32_t* sampling_rate)
}
//resample
if(*sampling_rate != model_sample_rate){
if(*sampling_rate != MODEL_SAMPLE_RATE){
wavResample(*sampling_rate, speech_data, speech_len);
}
@ -360,7 +360,7 @@ bool Audio::loadpcmwav(const char* buf, int nBufLen, int32_t* sampling_rate)
}
//resample
if(*sampling_rate != model_sample_rate){
if(*sampling_rate != MODEL_SAMPLE_RATE){
wavResample(*sampling_rate, speech_data, speech_len);
}
@ -411,7 +411,7 @@ bool Audio::loadpcmwav(const char* filename, int32_t* sampling_rate)
}
//resample
if(*sampling_rate != model_sample_rate){
if(*sampling_rate != MODEL_SAMPLE_RATE){
wavResample(*sampling_rate, speech_data, speech_len);
}
@ -511,7 +511,7 @@ void Audio::split(Model* pRecogObj)
std::vector<float> pcm_data(speech_data, speech_data+sp_len);
vector<std::vector<int>> vad_segments = pRecogObj->vad_seg(pcm_data);
int seg_sample = model_sample_rate/1000;
int seg_sample = MODEL_SAMPLE_RATE/1000;
for(vector<int> segment:vad_segments)
{
frame = new AudioFrame();

View File

@ -1,25 +1,22 @@
file(GLOB files1 "*.cpp")
file(GLOB files2 "*.cc")
file(GLOB files4 "paraformer/*.cpp")
set(files ${files1} ${files2} ${files3} ${files4})
# message("${files}")
set(files ${files1} ${files2})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
add_library(funasr ${files})
if(WIN32)
set(EXTRA_LIBS pthread yaml-cpp csrc)
if(CMAKE_CL_64)
target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
else()
target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
endif()
target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
set(EXTRA_LIBS pthread yaml-cpp csrc)
if(CMAKE_CL_64)
target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
else()
target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
endif()
target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
else()
set(EXTRA_LIBS pthread yaml-cpp csrc)
@ -38,4 +35,8 @@ endif()
include_directories(${CMAKE_SOURCE_DIR}/include)
target_link_libraries(funasr PUBLIC onnxruntime ${EXTRA_LIBS})
add_executable(funasr-onnx-offline "funasr-onnx-offline.cpp")
add_executable(funasr-onnx-offline-rtf "funasr-onnx-offline-rtf.cpp")
target_link_libraries(funasr-onnx-offline PUBLIC funasr)
target_link_libraries(funasr-onnx-offline-rtf PUBLIC funasr)

View File

@ -1,59 +0,0 @@
#include "precomp.h"
FeatureQueue::FeatureQueue()
{
buff = new Tensor<float>(67, 80);
window_size = 67;
buff_idx = 0;
}
FeatureQueue::~FeatureQueue()
{
delete buff;
}
void FeatureQueue::reinit(int size)
{
delete buff;
buff = new Tensor<float>(size, 80);
buff_idx = 0;
window_size = size;
}
void FeatureQueue::reset()
{
buff_idx = 0;
}
void FeatureQueue::push(float *din, int flag)
{
int offset = buff_idx * 80;
memcpy(buff->buff + offset, din, 80 * sizeof(float));
buff_idx++;
if (flag == S_END) {
Tensor<float> *tmp = new Tensor<float>(buff_idx, 80);
memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float));
feature_queue.push(tmp);
buff_idx = 0;
} else if (buff_idx == window_size) {
feature_queue.push(buff);
Tensor<float> *tmp = new Tensor<float>(window_size, 80);
memcpy(tmp->buff, buff->buff + (window_size - 3) * 80,
3 * 80 * sizeof(float));
buff_idx = 3;
buff = tmp;
}
}
Tensor<float> *FeatureQueue::pop()
{
Tensor<float> *tmp = feature_queue.front();
feature_queue.pop();
return tmp;
}
int FeatureQueue::size()
{
return feature_queue.size();
}

View File

@ -1,28 +0,0 @@
#ifndef FEATUREQUEUE_H
#define FEATUREQUEUE_H
#include "Tensor.h"
#include <queue>
#include <stdint.h>
using namespace std;
class FeatureQueue {
private:
queue<Tensor<float> *> feature_queue;
Tensor<float> *buff;
int buff_idx;
int window_size;
public:
FeatureQueue();
~FeatureQueue();
void reinit(int size);
void reset();
void push(float *din, int flag);
Tensor<float> *pop();
int size();
};
#endif

View File

@ -1,39 +0,0 @@
#include "precomp.h"
SpeechWrap::SpeechWrap()
{
cache_size = 0;
}
SpeechWrap::~SpeechWrap()
{
}
void SpeechWrap::reset()
{
cache_size = 0;
}
void SpeechWrap::load(float *din, int len)
{
in = din;
in_size = len;
total_size = cache_size + in_size;
}
int SpeechWrap::size()
{
return total_size;
}
void SpeechWrap::update(int offset)
{
int in_offset = offset - cache_size;
cache_size = (total_size - offset);
memcpy(cache, in + in_offset, cache_size * sizeof(float));
}
float &SpeechWrap::operator[](int i)
{
return i < cache_size ? cache[i] : in[i - cache_size];
}

View File

@ -1,26 +0,0 @@
#ifndef SPEECHWRAP_H
#define SPEECHWRAP_H
#include <stdint.h>
class SpeechWrap {
private:
float cache[400];
int cache_size;
float *in;
int in_size;
int total_size;
int next_cache_size;
public:
SpeechWrap();
~SpeechWrap();
void load(float *din, int len);
void update(int offset);
void reset();
int size();
float &operator[](int i);
};
#endif

View File

@ -1,6 +1,5 @@
#pragma once
typedef struct
{
std::string msg;
@ -11,8 +10,6 @@ typedef struct
#ifdef _WIN32
#include <codecvt>
inline std::wstring string2wstring(const std::string& str, const std::string& locale)
{
typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
@ -29,8 +26,6 @@ inline std::wstring strToWstr(std::string str) {
#endif
inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) {
size_t numInputNodes = session->GetInputCount();
if (numInputNodes > 0) {

View File

@ -6,9 +6,6 @@
#endif
#include "libfunasrapi.h"
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;
@ -41,12 +38,10 @@ int main(int argc, char *argv[])
printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
gettimeofday(&start, NULL);
float snippet_time = 0.0f;
FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL, use_vad);
gettimeofday(&end, NULL);
float snippet_time = 0.0f;
if (Result)
{
string msg = FunASRGetResult(Result, 0);
@ -57,7 +52,7 @@ int main(int argc, char *argv[])
}
else
{
cout <<"no return data!";
printf("no return data!");
}
printf("Audio length %lfs.\n", (double)snippet_time);

View File

@ -14,7 +14,7 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize, bool use_vad)
string vad_path = pathAppend(path, "vad_model.onnx");
string mvn_path = pathAppend(path, "vad.mvn");
vadHandle = make_unique<FsmnVad>();
vadHandle->init_vad(vad_path, mvn_path, model_sample_rate, 800, 15000, 0.9);
vadHandle->init_vad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
}
if(quantize)
@ -29,7 +29,7 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize, bool use_vad)
// knf options
fbank_opts.frame_opts.dither = 0;
fbank_opts.mel_opts.num_bins = 80;
fbank_opts.frame_opts.samp_freq = model_sample_rate;
fbank_opts.frame_opts.samp_freq = MODEL_SAMPLE_RATE;
fbank_opts.frame_opts.window_type = "hamming";
fbank_opts.frame_opts.frame_shift_ms = 10;
fbank_opts.frame_opts.frame_length_ms = 25;
@ -191,7 +191,7 @@ string ModelImp::forward(float* din, int len, int flag)
{
int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
std::vector<float> wav_feats = FbankKaldi(model_sample_rate, din, len);
std::vector<float> wav_feats = FbankKaldi(MODEL_SAMPLE_RATE, din, len);
wav_feats = ApplyLFR(wav_feats);
ApplyCMVN(&wav_feats);

View File

@ -1,6 +1,5 @@
#pragma once
// system
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@ -16,8 +15,6 @@
#include <string>
#include <math.h>
#include <numeric>
#include <cstring>
using namespace std;
@ -27,27 +24,19 @@ using namespace std;
#include "kaldi-native-fbank/csrc/feature-fbank.h"
#include "kaldi-native-fbank/csrc/online-feature.h"
// mine
#include "ComDefine.h"
#include "commonfunc.h"
#include <ComDefine.h>
#include "predefine_coe.h"
#include "FsmnVad.h"
#include <ComDefine.h>
//#include "alignedmem.h"
#include "Vocab.h"
#include "CommonStruct.h"
#include "Audio.h"
#include "Tensor.h"
#include "util.h"
#include "CommonStruct.h"
#include "FeatureQueue.h"
#include "SpeechWrap.h"
#include <Audio.h>
#include "resample.h"
#include "Model.h"
#include "paraformer_onnx.h"
#include "libfunasrapi.h"
using namespace paraformer;

View File

@ -1,112 +0,0 @@
#ifndef WENETPARAMS_H
#define WENETPARAMS_H
// #pragma pack(1)
#define vocab_size 5538
typedef struct {
float conv0_weight[512 * 9];
float conv0_bias[512];
float conv1_weight[512 * 512 * 9];
float conv1_bias[512];
float out0_weight[9728 * 512];
float out0_bias[512];
} EncEmbedParams;
typedef struct {
float linear_q_weight[512 * 512];
float linear_q_bias[512];
float linear_k_weight[512 * 512];
float linear_k_bias[512];
float linear_v_weight[512 * 512];
float linear_v_bias[512];
float linear_out_weight[512 * 512];
float linear_out_bias[512];
} SelfAttnParams;
typedef struct {
SelfAttnParams linear0;
float linear_pos_weight[512 * 512];
float pos_bias_u[512];
float pos_bias_v[512];
} EncSelfAttnParams;
typedef struct {
float w1_weight[512 * 2048];
float w1_bias[2048];
float w2_weight[2048 * 512];
float w2_bias[512];
} FeedForwardParams;
typedef struct {
float weight[512];
float bias[512];
} NormParams;
typedef struct {
float pointwise_conv1_weight[1024 * 512];
float pointwise_conv1_bias[1024];
float depthwise_conv_weight[512 * 15];
float depthwise_conv_bias[512];
float pointwise_conv2_weight[512 * 512];
float pointwise_conv2_bias[512];
NormParams norm;
} EncConvParams;
typedef struct {
EncSelfAttnParams self_attn;
FeedForwardParams feedforward;
FeedForwardParams feedforward_macaron;
EncConvParams conv_module;
NormParams norm_ff;
NormParams norm_mha;
NormParams norm_macaron;
NormParams norm_conv;
NormParams norm_final;
// float concat_weight[1024 * 512];
// float concat_bias[512];
} SubEncoderParams;
typedef struct {
EncEmbedParams embed;
SubEncoderParams sub_encoder[12];
NormParams after_norm;
} EncoderParams;
typedef struct {
SelfAttnParams self_attn;
SelfAttnParams src_attn;
FeedForwardParams feedward;
NormParams norm1;
NormParams norm2;
NormParams norm3;
// float concat_weight1[1024 * 512];
// float concat_bias1[512];
// float concat_weight2[1024 * 512];
// float concat_bias2[512];
} SubDecoderParams;
typedef struct {
float embed_weight[vocab_size * 512];
SubDecoderParams sub_decoder[6];
NormParams after_norm;
float output_weight[vocab_size * 512];
float output_bias[vocab_size];
} DecoderParams;
typedef struct {
EncoderParams encoder;
float ctc_weight[512 * vocab_size];
float ctc_bias[vocab_size];
DecoderParams decoder;
} WenetParams;
// #pragma pack()
#endif

View File

@ -1,23 +0,0 @@
if(WIN32)
if(CMAKE_CL_64)
link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x64 )
else()
link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x86 )
endif()
endif()
set(EXTRA_LIBS funasr)
include_directories(${CMAKE_SOURCE_DIR}/include)
set(EXECNAME "tester")
set(EXECNAMERTF "tester_rtf")
add_executable(${EXECNAME} "tester.cpp")
target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})
add_executable(${EXECNAMERTF} "tester_rtf.cpp")
target_link_libraries(${EXECNAMERTF} PUBLIC ${EXTRA_LIBS})