FunASR/funasr/runtime/onnxruntime/include/audio.h
Yabin Li b454a1054f
update online runtime, including vad-online, paraformer-online, punc-online,2pass (#815)
* init

* update

* add LoadConfigFromYaml

* update

* update

* update

* del time stat

* update

* update

* update

* update

* update

* update

* update

* add cpp websocket online 2pass srv

* [feature] multithread grpc server

* update

* update

* update

* [feature] support 2pass grpc cpp server and python client, can change mode to use offline, online or 2pass decoding

* update

* update

* update

* update

* add paraformer online onnx model export

* add paraformer online onnx model export

* add paraformer online onnx model export

* add paraformer online onnxruntime

* add paraformer online onnxruntime

* add paraformer online onnxruntime

* fix export paraformer online onnx model bug

* for client closed earlier and core dump

* support GRPC two pass decoding (#813)

* [refator] optimize grpc server pipeline and instruction

* [refator] rm useless file

* [refator] optimize grpc client pipeline and instruction

* [debug] hanlde coredump when client ternimated

* [refator] rm useless log

* [refator] modify grpc cmake

* Create run_server_2pass.sh

* Update SDK_tutorial_online_zh.md

* Update SDK_tutorial_online.md

* Update SDK_advanced_guide_online.md

* Update SDK_advanced_guide_online_zh.md

* Update SDK_tutorial_online_zh.md

* Update SDK_tutorial_online.md

* update

---------

Co-authored-by: zhaoming <zhaomingwork@qq.com>
Co-authored-by: boji123 <boji123@aliyun.com>
Co-authored-by: haoneng.lhn <haoneng.lhn@alibaba-inc.com>
2023-08-08 11:17:43 +08:00

100 lines
2.7 KiB
C++

#ifndef AUDIO_H
#define AUDIO_H
#include <queue>
#include <stdint.h>
#include "vad-model.h"
#include "offline-stream.h"
#include "com-define.h"
#ifndef WAV_HEADER_SIZE
#define WAV_HEADER_SIZE 44
#endif
using namespace std;
namespace funasr {
class AudioFrame {
private:
int start;
int end;
public:
AudioFrame();
AudioFrame(int len);
AudioFrame(const AudioFrame &other);
AudioFrame(int start, int end, bool is_final);
~AudioFrame();
int SetStart(int val);
int SetEnd(int val);
int GetStart();
int GetLen();
int Disp();
// 2pass
bool is_final = false;
float* data = nullptr;
int len;
};
class Audio {
private:
float *speech_data=nullptr;
int16_t *speech_buff=nullptr;
char* speech_char=nullptr;
int speech_len;
int speech_align_len;
float align_size;
int data_type;
queue<AudioFrame *> frame_queue;
queue<AudioFrame *> asr_online_queue;
queue<AudioFrame *> asr_offline_queue;
public:
Audio(int data_type);
Audio(int data_type, int size);
~Audio();
void Disp();
void WavResample(int32_t sampling_rate, const float *waveform, int32_t n);
bool LoadWav(const char* buf, int n_len, int32_t* sampling_rate);
bool LoadWav(const char* filename, int32_t* sampling_rate);
bool LoadWav2Char(const char* filename, int32_t* sampling_rate);
bool LoadPcmwav(const char* buf, int n_file_len, int32_t* sampling_rate);
bool LoadPcmwav(const char* filename, int32_t* sampling_rate);
bool LoadPcmwav2Char(const char* filename, int32_t* sampling_rate);
bool LoadOthers2Char(const char* filename);
bool FfmpegLoad(const char *filename, bool copy2char=false);
bool FfmpegLoad(const char* buf, int n_file_len);
int FetchChunck(AudioFrame *&frame);
int FetchTpass(AudioFrame *&frame);
int Fetch(float *&dout, int &len, int &flag);
void Padding();
void Split(OfflineStream* offline_streamj);
void Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished=true);
void Split(VadModel* vad_obj, int chunk_len, bool input_finished=true, ASR_TYPE asr_mode=ASR_TWO_PASS);
float GetTimeLen();
int GetQueueSize() { return (int)frame_queue.size(); }
char* GetSpeechChar(){return speech_char;}
int GetSpeechLen(){return speech_len;}
// 2pass
vector<float> all_samples;
int offset = 0;
int speech_start=-1, speech_end=0;
int speech_offline_start=-1;
int seg_sample = MODEL_SAMPLE_RATE/1000;
bool LoadPcmwavOnline(const char* buf, int n_file_len, int32_t* sampling_rate);
void ResetIndex(){
speech_start=-1;
speech_end=0;
speech_offline_start=-1;
offset = 0;
all_samples.clear();
}
};
} // namespace funasr
#endif