diff --git a/funasr/runtime/onnxruntime/include/funasrruntime.h b/funasr/runtime/onnxruntime/include/funasrruntime.h index 75be80e5c..5cfdb47d3 100644 --- a/funasr/runtime/onnxruntime/include/funasrruntime.h +++ b/funasr/runtime/onnxruntime/include/funasrruntime.h @@ -46,15 +46,20 @@ typedef enum { FUNASR_MODEL_PARAFORMER = 3, }FUNASR_MODEL_TYPE; +typedef enum +{ + FSMN_VAD_OFFLINE=0, + FSMN_VAD_ONLINE = 1, +}FSMN_VAD_MODE; + typedef void (* QM_CALLBACK)(int cur_step, int n_total); // n_total: total steps; cur_step: Current Step. // ASR _FUNASRAPI FUNASR_HANDLE FunASRInit(std::map& model_path, int thread_num); - -_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback); -_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback); -_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback); -_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback); +// buffer +_FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); +// file, support wav & pcm +_FUNASRAPI FUNASR_RESULT FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); _FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index); _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result); @@ -63,9 +68,12 @@ _FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle); _FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result); // VAD -_FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map& model_path, int thread_num); +_FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map& model_path, int thread_num, FSMN_VAD_MODE mode=FSMN_VAD_OFFLINE); +// buffer +_FUNASRAPI FUNASR_RESULT FsmnVadInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); +// file, support wav & pcm +_FUNASRAPI FUNASR_RESULT FsmnVadInfer(FUNASR_HANDLE handle, const char* sz_filename, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); -_FUNASRAPI FUNASR_RESULT FsmnVadWavFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback); _FUNASRAPI std::vector>* FsmnVadGetResult(FUNASR_RESULT result,int n_index); _FUNASRAPI void FsmnVadFreeResult(FUNASR_RESULT result); _FUNASRAPI void FsmnVadUninit(FUNASR_HANDLE handle); @@ -78,8 +86,10 @@ _FUNASRAPI void CTTransformerUninit(FUNASR_HANDLE handle); //OfflineStream _FUNASRAPI FUNASR_HANDLE FunOfflineInit(std::map& model_path, int thread_num); -_FUNASRAPI FUNASR_RESULT FunOfflineRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback); -_FUNASRAPI FUNASR_RESULT FunOfflineRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback); +// buffer +_FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); +// file, support wav & pcm +_FUNASRAPI FUNASR_RESULT FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); _FUNASRAPI void FunOfflineUninit(FUNASR_HANDLE handle); #ifdef __cplusplus diff --git a/funasr/runtime/onnxruntime/src/funasrruntime.cpp b/funasr/runtime/onnxruntime/src/funasrruntime.cpp index 893ba70d7..adef5049e 100644 --- a/funasr/runtime/onnxruntime/src/funasrruntime.cpp +++ b/funasr/runtime/onnxruntime/src/funasrruntime.cpp @@ -11,9 +11,9 @@ extern "C" { return mm; } - _FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map& model_path, int thread_num) + _FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map& model_path, int thread_num, FSMN_VAD_MODE mode) { - funasr::VadModel* mm = funasr::CreateVadModel(model_path, thread_num); + funasr::VadModel* mm = funasr::CreateVadModel(model_path, thread_num, mode); return mm; } @@ -30,36 +30,7 @@ extern "C" { } // APIs for ASR Infer - _FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback) - { - funasr::Model* recog_obj = (funasr::Model*)handle; - if (!recog_obj) - return nullptr; - - int32_t sampling_rate = -1; - funasr::Audio audio(1); - if (!audio.LoadWav(sz_buf, n_len, &sampling_rate)) - return nullptr; - - float* buff; - int len; - int flag=0; - funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; - p_result->snippet_time = audio.GetTimeLen(); - int n_step = 0; - int n_total = audio.GetQueueSize(); - while (audio.Fetch(buff, len, flag) > 0) { - string msg = recog_obj->Forward(buff, len, flag); - p_result->msg += msg; - n_step++; - if (fn_callback) - fn_callback(n_step, n_total); - } - - return p_result; - } - - _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback) + _FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) { funasr::Model* recog_obj = (funasr::Model*)handle; if (!recog_obj) @@ -87,23 +58,32 @@ extern "C" { return p_result; } - _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback) + _FUNASRAPI FUNASR_RESULT FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) { funasr::Model* recog_obj = (funasr::Model*)handle; if (!recog_obj) return nullptr; funasr::Audio audio(1); - if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) - return nullptr; + if(funasr::is_target_file(sz_filename, "wav")){ + int32_t sampling_rate_ = -1; + if(!audio.LoadWav(sz_filename, &sampling_rate_)) + return nullptr; + }else if(funasr::is_target_file(sz_filename, "pcm")){ + if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) + return nullptr; + }else{ + LOG(ERROR)<<"Wrong wav extension"; + exit(-1); + } float* buff; int len; int flag = 0; - funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; - p_result->snippet_time = audio.GetTimeLen(); int n_step = 0; int n_total = audio.GetQueueSize(); + funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; + p_result->snippet_time = audio.GetTimeLen(); while (audio.Fetch(buff, len, flag) > 0) { string msg = recog_obj->Forward(buff, len, flag); p_result->msg += msg; @@ -115,45 +95,15 @@ extern "C" { return p_result; } - _FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback) - { - funasr::Model* recog_obj = (funasr::Model*)handle; - if (!recog_obj) - return nullptr; - - int32_t sampling_rate = -1; - funasr::Audio audio(1); - if(!audio.LoadWav(sz_wavfile, &sampling_rate)) - return nullptr; - - float* buff; - int len; - int flag = 0; - int n_step = 0; - int n_total = audio.GetQueueSize(); - funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; - p_result->snippet_time = audio.GetTimeLen(); - while (audio.Fetch(buff, len, flag) > 0) { - string msg = recog_obj->Forward(buff, len, flag); - p_result->msg+= msg; - n_step++; - if (fn_callback) - fn_callback(n_step, n_total); - } - - return p_result; - } - // APIs for VAD Infer - _FUNASRAPI FUNASR_RESULT FsmnVadWavFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback) + _FUNASRAPI FUNASR_RESULT FsmnVadInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) { funasr::VadModel* vad_obj = (funasr::VadModel*)handle; if (!vad_obj) return nullptr; - - int32_t sampling_rate = -1; + funasr::Audio audio(1); - if(!audio.LoadWav(sz_wavfile, &sampling_rate)) + if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate)) return nullptr; funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT; @@ -166,6 +116,35 @@ extern "C" { return p_result; } + _FUNASRAPI FUNASR_RESULT FsmnVadInfer(FUNASR_HANDLE handle, const char* sz_filename, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) + { + funasr::VadModel* vad_obj = (funasr::VadModel*)handle; + if (!vad_obj) + return nullptr; + + funasr::Audio audio(1); + if(funasr::is_target_file(sz_filename, "wav")){ + int32_t sampling_rate_ = -1; + if(!audio.LoadWav(sz_filename, &sampling_rate_)) + return nullptr; + }else if(funasr::is_target_file(sz_filename, "pcm")){ + if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) + return nullptr; + }else{ + LOG(ERROR)<<"Wrong wav extension"; + exit(-1); + } + + funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT; + p_result->snippet_time = audio.GetTimeLen(); + + vector> vad_segments; + audio.Split(vad_obj, vad_segments); + p_result->segments = new vector>(vad_segments); + + return p_result; + } + // APIs for PUNC Infer _FUNASRAPI const std::string CTTransformerInfer(FUNASR_HANDLE handle, const char* sz_sentence, FUNASR_MODE mode, QM_CALLBACK fn_callback) { @@ -178,43 +157,7 @@ extern "C" { } // APIs for Offline-stream Infer - _FUNASRAPI FUNASR_RESULT FunOfflineRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback) - { - funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle; - if (!offline_stream) - return nullptr; - - int32_t sampling_rate = -1; - funasr::Audio audio(1); - if(!audio.LoadWav(sz_wavfile, &sampling_rate)) - return nullptr; - if(offline_stream->UseVad()){ - audio.Split(offline_stream); - } - - float* buff; - int len; - int flag = 0; - int n_step = 0; - int n_total = audio.GetQueueSize(); - funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; - p_result->snippet_time = audio.GetTimeLen(); - while (audio.Fetch(buff, len, flag) > 0) { - string msg = (offline_stream->asr_handle)->Forward(buff, len, flag); - p_result->msg+= msg; - n_step++; - if (fn_callback) - fn_callback(n_step, n_total); - } - if(offline_stream->UsePunc()){ - string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str()); - p_result->msg = punc_res; - } - - return p_result; - } - - _FUNASRAPI FUNASR_RESULT FunOfflineRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback) + _FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) { funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle; if (!offline_stream) @@ -249,6 +192,50 @@ extern "C" { return p_result; } + _FUNASRAPI FUNASR_RESULT FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) + { + funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle; + if (!offline_stream) + return nullptr; + + funasr::Audio audio(1); + if(funasr::is_target_file(sz_filename, "wav")){ + int32_t sampling_rate_ = -1; + if(!audio.LoadWav(sz_filename, &sampling_rate_)) + return nullptr; + }else if(funasr::is_target_file(sz_filename, "pcm")){ + if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) + return nullptr; + }else{ + LOG(ERROR)<<"Wrong wav extension"; + exit(-1); + } + if(offline_stream->UseVad()){ + audio.Split(offline_stream); + } + + float* buff; + int len; + int flag = 0; + int n_step = 0; + int n_total = audio.GetQueueSize(); + funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; + p_result->snippet_time = audio.GetTimeLen(); + while (audio.Fetch(buff, len, flag) > 0) { + string msg = (offline_stream->asr_handle)->Forward(buff, len, flag); + p_result->msg+= msg; + n_step++; + if (fn_callback) + fn_callback(n_step, n_total); + } + if(offline_stream->UsePunc()){ + string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str()); + p_result->msg = punc_res; + } + + return p_result; + } + _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result) { if (!result)