rename executable file;rm some unnecessary deps

2025-09-15 14:48:36 +08:00 · 2023-04-21 17:12:10 +08:00 · 2023-04-21 17:12:10 +08:00 · 716e3fe512
commit 716e3fe512
parent 73e410afb9
16 changed files with 51 additions and 346 deletions
--- a/funasr/runtime/onnxruntime/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/CMakeLists.txt
@ -30,4 +30,3 @@ include_directories(${PROJECT_SOURCE_DIR}/kaldi-native-fbank)
 add_subdirectory("./third_party/yaml-cpp")
 add_subdirectory(kaldi-native-fbank/kaldi-native-fbank/csrc)
 add_subdirectory(src)
-add_subdirectory(tester)
--- a/funasr/runtime/onnxruntime/include/Audio.h
+++ b/funasr/runtime/onnxruntime/include/Audio.h
@ -2,14 +2,10 @@
 #ifndef AUDIO_H
 #define AUDIO_H

-#include <ComDefine.h>
 #include <queue>
 #include <stdint.h>
 #include "Model.h"

-#ifndef model_sample_rate
-#define model_sample_rate 16000
-#endif
 #ifndef WAV_HEADER_SIZE
 #define WAV_HEADER_SIZE 44
 #endif
--- a/funasr/runtime/onnxruntime/include/ComDefine.h
+++ b/funasr/runtime/onnxruntime/include/ComDefine.h
@ -8,4 +8,21 @@
 #define S_ALL    3
 #define S_ERR    4

+#ifndef MODEL_SAMPLE_RATE
+#define MODEL_SAMPLE_RATE 16000
+#endif
+
+#ifndef VAD_SILENCE_DYRATION
+#define VAD_SILENCE_DYRATION 15000
+#endif
+
+#ifndef VAD_MAX_LEN
+#define VAD_MAX_LEN 800
+#endif
+
+#ifndef VAD_SPEECH_NOISE_THRES
+#define VAD_SPEECH_NOISE_THRES 0.9
+#endif
+
+
 #endif
--- a/funasr/runtime/onnxruntime/src/Audio.cpp
+++ b/funasr/runtime/onnxruntime/src/Audio.cpp
@ -187,13 +187,13 @@ Audio::~Audio()

 void Audio::disp()
 {
-    printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate,
+    printf("Audio time is %f s. len is %d\n", (float)speech_len / MODEL_SAMPLE_RATE,
           speech_len);
 }

 float Audio::get_time_len()
 {
-    return (float)speech_len / model_sample_rate;
+    return (float)speech_len / MODEL_SAMPLE_RATE;
 }

 void Audio::wavResample(int32_t sampling_rate, const float *waveform,
@ -203,9 +203,9 @@ void Audio::wavResample(int32_t sampling_rate, const float *waveform,
          "Creating a resampler:\n"
          "   in_sample_rate: %d\n"
          "   output_sample_rate: %d\n",
-          sampling_rate, static_cast<int32_t>(model_sample_rate));
+          sampling_rate, static_cast<int32_t>(MODEL_SAMPLE_RATE));
    float min_freq =
-        std::min<int32_t>(sampling_rate, model_sample_rate);
+        std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
@ -213,7 +213,7 @@ void Audio::wavResample(int32_t sampling_rate, const float *waveform,
    //auto resampler = new LinearResample(
    //      sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
    auto resampler = std::make_unique<LinearResample>(
-          sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
+          sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width);
    std::vector<float> samples;
    resampler->Resample(waveform, n, true, &samples);
    //reset speech_data
@ -270,7 +270,7 @@ bool Audio::loadwav(const char *filename, int32_t* sampling_rate)
        }

        //resample
-        if(*sampling_rate != model_sample_rate){
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
            wavResample(*sampling_rate, speech_data, speech_len);
        }

@ -317,7 +317,7 @@ bool Audio::loadwav(const char* buf, int nFileLen, int32_t* sampling_rate)
        }
        
        //resample
-        if(*sampling_rate != model_sample_rate){
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
            wavResample(*sampling_rate, speech_data, speech_len);
        }

@ -360,7 +360,7 @@ bool Audio::loadpcmwav(const char* buf, int nBufLen, int32_t* sampling_rate)
        }
        
        //resample
-        if(*sampling_rate != model_sample_rate){
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
            wavResample(*sampling_rate, speech_data, speech_len);
        }

@ -411,7 +411,7 @@ bool Audio::loadpcmwav(const char* filename, int32_t* sampling_rate)
        }

        //resample
-        if(*sampling_rate != model_sample_rate){
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
            wavResample(*sampling_rate, speech_data, speech_len);
        }

@ -511,7 +511,7 @@ void Audio::split(Model* pRecogObj)

    std::vector<float> pcm_data(speech_data, speech_data+sp_len);
    vector<std::vector<int>> vad_segments = pRecogObj->vad_seg(pcm_data);
-    int seg_sample = model_sample_rate/1000;
+    int seg_sample = MODEL_SAMPLE_RATE/1000;
    for(vector<int> segment:vad_segments)
    {
        frame = new AudioFrame();
--- a/funasr/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt
@ -1,25 +1,22 @@

 file(GLOB files1 "*.cpp")
 file(GLOB files2 "*.cc")
-file(GLOB files4 "paraformer/*.cpp")

-set(files ${files1} ${files2} ${files3} ${files4})
-
-# message("${files}")
+set(files ${files1} ${files2})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

 add_library(funasr ${files})

 if(WIN32)
-
-        set(EXTRA_LIBS pthread yaml-cpp csrc)
-        if(CMAKE_CL_64)
-            target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
-        else()
-            target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
-        endif()
-        target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
-        
-        target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
+    set(EXTRA_LIBS pthread yaml-cpp csrc)
+    if(CMAKE_CL_64)
+        target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
+    else()
+        target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
+    endif()
+    target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
+    
+    target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
 else()

    set(EXTRA_LIBS pthread yaml-cpp csrc)
@ -38,4 +35,8 @@ endif()
 include_directories(${CMAKE_SOURCE_DIR}/include)
 target_link_libraries(funasr PUBLIC onnxruntime ${EXTRA_LIBS})

+add_executable(funasr-onnx-offline "funasr-onnx-offline.cpp")
+add_executable(funasr-onnx-offline-rtf "funasr-onnx-offline-rtf.cpp")
+target_link_libraries(funasr-onnx-offline PUBLIC funasr)
+target_link_libraries(funasr-onnx-offline-rtf PUBLIC funasr)

--- a/funasr/runtime/onnxruntime/src/FeatureQueue.cpp
+++ b/funasr/runtime/onnxruntime/src/FeatureQueue.cpp
@ -1,59 +0,0 @@
-#include "precomp.h"
-FeatureQueue::FeatureQueue()
-{
-    buff = new Tensor<float>(67, 80);
-    window_size = 67;
-    buff_idx = 0;
-}
-
-FeatureQueue::~FeatureQueue()
-{
-    delete buff;
-}
-
-void FeatureQueue::reinit(int size)
-{
-    delete buff;
-    buff = new Tensor<float>(size, 80);
-    buff_idx = 0;
-    window_size = size;
-}
-
-void FeatureQueue::reset()
-{
-    buff_idx = 0;
-}
-
-void FeatureQueue::push(float *din, int flag)
-{
-    int offset = buff_idx * 80;
-    memcpy(buff->buff + offset, din, 80 * sizeof(float));
-    buff_idx++;
-
-    if (flag == S_END) {
-        Tensor<float> *tmp = new Tensor<float>(buff_idx, 80);
-        memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float));
-        feature_queue.push(tmp);
-        buff_idx = 0;
-    } else if (buff_idx == window_size) {
-        feature_queue.push(buff);
-        Tensor<float> *tmp = new Tensor<float>(window_size, 80);
-        memcpy(tmp->buff, buff->buff + (window_size - 3) * 80,
-               3 * 80 * sizeof(float));
-        buff_idx = 3;
-        buff = tmp;
-    }
-}
-
-Tensor<float> *FeatureQueue::pop()
-{
-
-    Tensor<float> *tmp = feature_queue.front();
-    feature_queue.pop();
-    return tmp;
-}
-
-int FeatureQueue::size()
-{
-    return feature_queue.size();
-}
--- a/funasr/runtime/onnxruntime/src/FeatureQueue.h
+++ b/funasr/runtime/onnxruntime/src/FeatureQueue.h
@ -1,28 +0,0 @@
-
-#ifndef FEATUREQUEUE_H
-#define FEATUREQUEUE_H
-
-#include "Tensor.h"
-#include <queue>
-#include <stdint.h>
-using namespace std;
-
-
-class FeatureQueue {
-  private:
-    queue<Tensor<float> *> feature_queue;
-    Tensor<float> *buff;
-    int buff_idx;
-    int window_size;
-
-  public:
-    FeatureQueue();
-    ~FeatureQueue();
-    void reinit(int size);
-    void reset();
-    void push(float *din, int flag);
-    Tensor<float> *pop();
-    int size();
-};
-
-#endif
--- a/funasr/runtime/onnxruntime/src/SpeechWrap.cpp
+++ b/funasr/runtime/onnxruntime/src/SpeechWrap.cpp
@ -1,39 +0,0 @@
-#include "precomp.h"
-
-SpeechWrap::SpeechWrap()
-{
-    cache_size = 0;
-}
-
-SpeechWrap::~SpeechWrap()
-{
-}
-
-void SpeechWrap::reset()
-{
-    cache_size = 0;
-}
-
-void SpeechWrap::load(float *din, int len)
-{
-    in = din;
-    in_size = len;
-    total_size = cache_size + in_size;
-}
-
-int SpeechWrap::size()
-{
-    return total_size;
-}
-
-void SpeechWrap::update(int offset)
-{
-    int in_offset = offset - cache_size;
-    cache_size = (total_size - offset);
-    memcpy(cache, in + in_offset, cache_size * sizeof(float));
-}
-
-float &SpeechWrap::operator[](int i)
-{
-    return i < cache_size ? cache[i] : in[i - cache_size];
-}
--- a/funasr/runtime/onnxruntime/src/SpeechWrap.h
+++ b/funasr/runtime/onnxruntime/src/SpeechWrap.h
@ -1,26 +0,0 @@
-
-#ifndef SPEECHWRAP_H
-#define SPEECHWRAP_H
-
-#include <stdint.h>
-
-class SpeechWrap {
-  private:
-    float cache[400];
-    int cache_size;
-    float *in;
-    int in_size;
-    int total_size;
-    int next_cache_size;
-
-  public:
-    SpeechWrap();
-    ~SpeechWrap();
-    void load(float *din, int len);
-    void update(int offset);
-    void reset();
-    int size();
-    float &operator[](int i);
-};
-
-#endif
--- a/funasr/runtime/onnxruntime/src/commonfunc.h
+++ b/funasr/runtime/onnxruntime/src/commonfunc.h
@ -1,6 +1,5 @@
 #pragma once 

-
 typedef struct
 {
    std::string msg;
@ -11,8 +10,6 @@ typedef struct
 #ifdef _WIN32
 #include <codecvt>

-
-
 inline std::wstring string2wstring(const std::string& str, const std::string& locale)
 {
    typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
@ -29,8 +26,6 @@ inline std::wstring  strToWstr(std::string str) {

 #endif

-
-
 inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) {
    size_t numInputNodes = session->GetInputCount();
    if (numInputNodes > 0) {
--- a/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
+++ b/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
--- a/funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
+++ b/funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
@ -6,9 +6,6 @@
 #endif

 #include "libfunasrapi.h"
-
-#include <iostream>
-#include <fstream>
 #include <sstream>
 using namespace std;

@ -41,12 +38,10 @@ int main(int argc, char *argv[])
    printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);

    gettimeofday(&start, NULL);
-    float snippet_time = 0.0f;
-
    FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL, use_vad);
-
    gettimeofday(&end, NULL);
-   
+
+    float snippet_time = 0.0f;
    if (Result)
    {
        string msg = FunASRGetResult(Result, 0);
@ -57,7 +52,7 @@ int main(int argc, char *argv[])
    }
    else
    {
-        cout <<"no return data!";
+        printf("no return data!");
    }
 
    printf("Audio length %lfs.\n", (double)snippet_time);
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
+++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
@ -14,7 +14,7 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize, bool use_vad)
        string vad_path = pathAppend(path, "vad_model.onnx");
        string mvn_path = pathAppend(path, "vad.mvn");
        vadHandle = make_unique<FsmnVad>();
-        vadHandle->init_vad(vad_path, mvn_path, model_sample_rate, 800, 15000, 0.9);
+        vadHandle->init_vad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
    }

    if(quantize)
@ -29,7 +29,7 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize, bool use_vad)
    // knf options
    fbank_opts.frame_opts.dither = 0;
    fbank_opts.mel_opts.num_bins = 80;
-    fbank_opts.frame_opts.samp_freq = model_sample_rate;
+    fbank_opts.frame_opts.samp_freq = MODEL_SAMPLE_RATE;
    fbank_opts.frame_opts.window_type = "hamming";
    fbank_opts.frame_opts.frame_shift_ms = 10;
    fbank_opts.frame_opts.frame_length_ms = 25;
@ -191,7 +191,7 @@ string ModelImp::forward(float* din, int len, int flag)
 {

    int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
-    std::vector<float> wav_feats = FbankKaldi(model_sample_rate, din, len);
+    std::vector<float> wav_feats = FbankKaldi(MODEL_SAMPLE_RATE, din, len);
    wav_feats = ApplyLFR(wav_feats);
    ApplyCMVN(&wav_feats);

--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@ -1,6 +1,5 @@
 #pragma once 
 // system 
-
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@ -16,8 +15,6 @@
 #include <string>
 #include <math.h>
 #include <numeric>
-
-
 #include <cstring>

 using namespace std;
@ -27,27 +24,19 @@ using namespace std;
 #include "kaldi-native-fbank/csrc/feature-fbank.h"
 #include "kaldi-native-fbank/csrc/online-feature.h"

-
 // mine
-
+#include "ComDefine.h"
 #include "commonfunc.h"
-#include <ComDefine.h>
 #include "predefine_coe.h"
 #include "FsmnVad.h"
-
-#include <ComDefine.h>
-//#include "alignedmem.h"
 #include "Vocab.h"
+#include "CommonStruct.h"
+#include "Audio.h"
 #include "Tensor.h"
 #include "util.h"
-#include "CommonStruct.h"
-#include "FeatureQueue.h"
-#include "SpeechWrap.h"
-#include <Audio.h>
 #include "resample.h"
 #include "Model.h"
 #include "paraformer_onnx.h"
 #include "libfunasrapi.h"

-
 using namespace paraformer;
--- a/funasr/runtime/onnxruntime/src/tmp.h
+++ b/funasr/runtime/onnxruntime/src/tmp.h
@ -1,112 +0,0 @@
-
-#ifndef WENETPARAMS_H
-#define WENETPARAMS_H
-// #pragma pack(1)
-
-#define vocab_size 5538
-
-typedef struct {
-    float conv0_weight[512 * 9];
-    float conv0_bias[512];
-
-    float conv1_weight[512 * 512 * 9];
-    float conv1_bias[512];
-
-    float out0_weight[9728 * 512];
-    float out0_bias[512];
-
-} EncEmbedParams;
-
-typedef struct {
-    float linear_q_weight[512 * 512];
-    float linear_q_bias[512];
-    float linear_k_weight[512 * 512];
-    float linear_k_bias[512];
-    float linear_v_weight[512 * 512];
-    float linear_v_bias[512];
-    float linear_out_weight[512 * 512];
-    float linear_out_bias[512];
-} SelfAttnParams;
-
-typedef struct {
-    SelfAttnParams linear0;
-    float linear_pos_weight[512 * 512];
-    float pos_bias_u[512];
-    float pos_bias_v[512];
-
-} EncSelfAttnParams;
-
-typedef struct {
-    float w1_weight[512 * 2048];
-    float w1_bias[2048];
-    float w2_weight[2048 * 512];
-    float w2_bias[512];
-} FeedForwardParams;
-
-typedef struct {
-    float weight[512];
-    float bias[512];
-} NormParams;
-
-typedef struct {
-    float pointwise_conv1_weight[1024 * 512];
-    float pointwise_conv1_bias[1024];
-
-    float depthwise_conv_weight[512 * 15];
-    float depthwise_conv_bias[512];
-
-    float pointwise_conv2_weight[512 * 512];
-    float pointwise_conv2_bias[512];
-    NormParams norm;
-} EncConvParams;
-
-typedef struct {
-    EncSelfAttnParams self_attn;
-    FeedForwardParams feedforward;
-    FeedForwardParams feedforward_macaron;
-    EncConvParams conv_module;
-    NormParams norm_ff;
-    NormParams norm_mha;
-    NormParams norm_macaron;
-    NormParams norm_conv;
-    NormParams norm_final;
-    // float concat_weight[1024 * 512];
-    // float concat_bias[512];
-} SubEncoderParams;
-
-typedef struct {
-    EncEmbedParams embed;
-    SubEncoderParams sub_encoder[12];
-    NormParams after_norm;
-} EncoderParams;
-
-typedef struct {
-    SelfAttnParams self_attn;
-    SelfAttnParams src_attn;
-    FeedForwardParams feedward;
-    NormParams norm1;
-    NormParams norm2;
-    NormParams norm3;
-    // float concat_weight1[1024 * 512];
-    // float concat_bias1[512];
-    // float concat_weight2[1024 * 512];
-    // float concat_bias2[512];
-} SubDecoderParams;
-
-typedef struct {
-    float embed_weight[vocab_size * 512];
-    SubDecoderParams sub_decoder[6];
-    NormParams after_norm;
-    float output_weight[vocab_size * 512];
-    float output_bias[vocab_size];
-} DecoderParams;
-
-typedef struct {
-    EncoderParams encoder;
-    float ctc_weight[512 * vocab_size];
-    float ctc_bias[vocab_size];
-    DecoderParams decoder;
-} WenetParams;
-
-// #pragma pack()
-#endif
--- a/funasr/runtime/onnxruntime/tester/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/tester/CMakeLists.txt
@ -1,23 +0,0 @@
-
-
-if(WIN32)
-    if(CMAKE_CL_64)
-        link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x64 )
-    else()
-        link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x86 )
-    endif()
-endif()
-
-set(EXTRA_LIBS funasr)
-
-
-include_directories(${CMAKE_SOURCE_DIR}/include)
-set(EXECNAME "tester")
-set(EXECNAMERTF "tester_rtf")
-
-add_executable(${EXECNAME} "tester.cpp")
-target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})
-
-add_executable(${EXECNAMERTF} "tester_rtf.cpp")
-target_link_libraries(${EXECNAMERTF} PUBLIC ${EXTRA_LIBS})
-