From 39529df088e74988257dd4dac9adc818080cb49f Mon Sep 17 00:00:00 2001 From: mayong Date: Tue, 7 Mar 2023 12:15:07 +0800 Subject: [PATCH] update files. --- funasr/runtime/onnxruntime/CMakeSettings.json | 22 +- funasr/runtime/onnxruntime/include/Audio.h | 7 +- .../onnxruntime/include/librapidasrapi.h | 95 ++++++++ funasr/runtime/onnxruntime/src/Audio.cpp | 211 ++++++++++++++++-- funasr/runtime/onnxruntime/src/CMakeLists.txt | 2 +- funasr/runtime/onnxruntime/src/commonfunc.h | 11 + .../onnxruntime/src/librapidasrapi.cpp | 204 +++++++++++++++++ funasr/runtime/onnxruntime/src/precomp.h | 3 +- .../runtime/onnxruntime/tester/CMakeLists.txt | 2 +- funasr/runtime/onnxruntime/tester/tester.cpp | 50 ++--- 10 files changed, 554 insertions(+), 53 deletions(-) create mode 100644 funasr/runtime/onnxruntime/include/librapidasrapi.h create mode 100644 funasr/runtime/onnxruntime/src/librapidasrapi.cpp diff --git a/funasr/runtime/onnxruntime/CMakeSettings.json b/funasr/runtime/onnxruntime/CMakeSettings.json index f515d1fb2..2eb6c5a26 100644 --- a/funasr/runtime/onnxruntime/CMakeSettings.json +++ b/funasr/runtime/onnxruntime/CMakeSettings.json @@ -19,8 +19,26 @@ "cmakeCommandArgs": "", "buildCommandArgs": "", "ctestCommandArgs": "", - "inheritEnvironments": [ "msvc_x64_x64" ], - "variables": [] + "inheritEnvironments": [ "msvc_x64_x64" ] + }, + { + "name": "Linux-GCC-Debug", + "generator": "Unix Makefiles", + "configurationType": "Debug", + "cmakeExecutable": "cmake", + "remoteCopySourcesExclusionList": [ ".vs", ".git", "out" ], + "cmakeCommandArgs": "-DONNXRUNTIME_DIR=/data/linux/thirdpart/onnxruntime-linux-x64-1.14.1", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "inheritEnvironments": [ "linux_x64" ], + "remoteMachineName": "${defaultRemoteMachineName}", + "remoteCMakeListsRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/src", + "remoteBuildRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/build/${name}", + "remoteInstallRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/install/${name}", + "remoteCopySources": true, + "rsyncCommandArgs": "-t --delete", + "remoteCopyBuildOutput": false, + "remoteCopySourcesMethod": "rsync" } ] } \ No newline at end of file diff --git a/funasr/runtime/onnxruntime/include/Audio.h b/funasr/runtime/onnxruntime/include/Audio.h index af8d2a904..8b40c4d7d 100644 --- a/funasr/runtime/onnxruntime/include/Audio.h +++ b/funasr/runtime/onnxruntime/include/Audio.h @@ -43,11 +43,16 @@ class Audio { Audio(int data_type, int size); ~Audio(); void disp(); - bool loadwav(const char *filename); + bool loadwav(const char* filename); + bool loadwav(const char* buf, int nLen); + bool loadpcmwav(const char* buf, int nFileLen); + bool loadpcmwav(const char* filename); int fetch_chunck(float *&dout, int len); int fetch(float *&dout, int &len, int &flag); void padding(); void split(); + + int get_queue_size() { return (int)frame_queue.size(); } }; #endif diff --git a/funasr/runtime/onnxruntime/include/librapidasrapi.h b/funasr/runtime/onnxruntime/include/librapidasrapi.h new file mode 100644 index 000000000..e9ab87a23 --- /dev/null +++ b/funasr/runtime/onnxruntime/include/librapidasrapi.h @@ -0,0 +1,95 @@ +#pragma once + + +#ifdef WIN32 + + +#ifdef _RPASR_API_EXPORT + +#define _RAPIDASRAPI __declspec(dllexport) +#else +#define _RAPIDASRAPI __declspec(dllimport) +#endif + + +#else +#define _RAPIDASRAPI +#endif + + + + + +#ifndef _WIN32 + +#define RPASR_CALLBCK_PREFIX __attribute__((__stdcall__)) + +#else +#define RPASR_CALLBCK_PREFIX __stdcall +#endif + + +#ifdef __cplusplus + +extern "C" { +#endif + +typedef void* RPASR_HANDLE; + +typedef void* RPASR_RESULT; + +typedef unsigned char RPASR_BOOL; + +#define RPASR_TRUE 1 +#define RPASR_FALSE 0 +#define QM_DEFAULT_THREAD_NUM 4 + + +typedef enum +{ + RASR_NONE=-1, + RASRM_CTC_GREEDY_SEARCH=0, + RASRM_CTC_RPEFIX_BEAM_SEARCH = 1, + RASRM_ATTENSION_RESCORING = 2, + +}RPASR_MODE; + +typedef enum { + + RPASR_MODEL_PADDLE = 0, + RPASR_MODEL_PADDLE_2 = 1, + RPASR_MODEL_K2 = 2, + RPASR_MODEL_PARAFORMER = 3, + +}RPASR_MODEL_TYPE; + + +typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step. + + // APIs for qmasr + +_RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread); + + + +// if not give a fnCallback ,it should be NULL +_RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback); +_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback); + +_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback); + +_RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback); + +_RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex); + +_RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result); +_RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result); + + +_RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE Handle); + + +#ifdef __cplusplus + +} +#endif \ No newline at end of file diff --git a/funasr/runtime/onnxruntime/src/Audio.cpp b/funasr/runtime/onnxruntime/src/Audio.cpp index f515a6d0a..d5d07462d 100644 --- a/funasr/runtime/onnxruntime/src/Audio.cpp +++ b/funasr/runtime/onnxruntime/src/Audio.cpp @@ -25,8 +25,7 @@ class AudioWindow { out_idx = 1; sum = 0; }; - ~AudioWindow() - { + ~AudioWindow(){ free(window); }; int put(int val) @@ -102,6 +101,11 @@ Audio::~Audio() { if (speech_buff != NULL) { free(speech_buff); + + } + + if (speech_data != NULL) { + free(speech_data); } } @@ -115,9 +119,11 @@ void Audio::disp() bool Audio::loadwav(const char *filename) { + if (speech_data != NULL) { + free(speech_data); + } if (speech_buff != NULL) { free(speech_buff); - free(speech_data); } offset = 0; @@ -133,28 +139,191 @@ bool Audio::loadwav(const char *filename) speech_len = (nFileLen - 44) / 2; speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size); speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len); - memset(speech_buff, 0, sizeof(int16_t) * speech_align_len); - int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp); - fclose(fp); - speech_data = (float *)malloc(sizeof(float) * speech_align_len); - memset(speech_data, 0, sizeof(float) * speech_align_len); - int i; - float scale = 1; + if (speech_buff) + { + memset(speech_buff, 0, sizeof(int16_t) * speech_align_len); + int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp); + fclose(fp); - if (data_type == 1) { - scale = 32768; + speech_data = (float*)malloc(sizeof(float) * speech_align_len); + memset(speech_data, 0, sizeof(float) * speech_align_len); + int i; + float scale = 1; + + if (data_type == 1) { + scale = 32768; + } + + for (i = 0; i < speech_len; i++) { + speech_data[i] = (float)speech_buff[i] / scale; + } + + AudioFrame* frame = new AudioFrame(speech_len); + frame_queue.push(frame); + + + return true; } - - for (i = 0; i < speech_len; i++) { - speech_data[i] = (float)speech_buff[i] / scale; - } - - AudioFrame *frame = new AudioFrame(speech_len); - frame_queue.push(frame); - return true; + else + return false; } + +bool Audio::loadwav(const char* buf, int nFileLen) +{ + + + + if (speech_data != NULL) { + free(speech_data); + } + if (speech_buff != NULL) { + free(speech_buff); + } + + offset = 0; + + size_t nOffset = 0; + +#define WAV_HEADER_SIZE 44 + + speech_len = (nFileLen - WAV_HEADER_SIZE) / 2; + speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size); + speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len); + if (speech_buff) + { + memset(speech_buff, 0, sizeof(int16_t) * speech_align_len); + memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t)); + + + speech_data = (float*)malloc(sizeof(float) * speech_align_len); + memset(speech_data, 0, sizeof(float) * speech_align_len); + int i; + float scale = 1; + + if (data_type == 1) { + scale = 32768; + } + + for (i = 0; i < speech_len; i++) { + speech_data[i] = (float)speech_buff[i] / scale; + } + + + return true; + } + else + return false; + +} + + +bool Audio::loadpcmwav(const char* buf, int nBufLen) +{ + if (speech_data != NULL) { + free(speech_data); + } + if (speech_buff != NULL) { + free(speech_buff); + } + offset = 0; + + size_t nOffset = 0; + +#define WAV_HEADER_SIZE 44 + + speech_len = nBufLen / 2; + speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size); + speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len); + if (speech_buff) + { + memset(speech_buff, 0, sizeof(int16_t) * speech_align_len); + memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t)); + + + speech_data = (float*)malloc(sizeof(float) * speech_align_len); + memset(speech_data, 0, sizeof(float) * speech_align_len); + + + int i; + float scale = 1; + + if (data_type == 1) { + scale = 32768; + } + + for (i = 0; i < speech_len; i++) { + speech_data[i] = (float)speech_buff[i] / scale; + } + + + return true; + + } + else + return false; + + +} + +bool Audio::loadpcmwav(const char* filename) +{ + + if (speech_data != NULL) { + free(speech_data); + } + if (speech_buff != NULL) { + free(speech_buff); + } + offset = 0; + + FILE* fp; + fp = fopen(filename, "rb"); + if (fp == nullptr) + return false; + fseek(fp, 0, SEEK_END); + uint32_t nFileLen = ftell(fp); + fseek(fp, 0, SEEK_SET); + + speech_len = (nFileLen) / 2; + speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size); + speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len); + if (speech_buff) + { + memset(speech_buff, 0, sizeof(int16_t) * speech_align_len); + int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp); + fclose(fp); + + speech_data = (float*)malloc(sizeof(float) * speech_align_len); + memset(speech_data, 0, sizeof(float) * speech_align_len); + + + + int i; + float scale = 1; + + if (data_type == 1) { + scale = 32768; + } + + for (i = 0; i < speech_len; i++) { + speech_data[i] = (float)speech_buff[i] / scale; + } + + + AudioFrame* frame = new AudioFrame(speech_len); + frame_queue.push(frame); + + + return true; + } + else + return false; + +} + + int Audio::fetch_chunck(float *&dout, int len) { if (offset >= speech_align_len) { @@ -163,7 +332,7 @@ int Audio::fetch_chunck(float *&dout, int len) } else if (offset == speech_align_len - len) { dout = speech_data + offset; offset = speech_align_len; - // 临时解决 + // 临时解决 AudioFrame *frame = frame_queue.front(); frame_queue.pop(); delete frame; diff --git a/funasr/runtime/onnxruntime/src/CMakeLists.txt b/funasr/runtime/onnxruntime/src/CMakeLists.txt index 4842072f3..aea222b92 100644 --- a/funasr/runtime/onnxruntime/src/CMakeLists.txt +++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt @@ -18,7 +18,7 @@ if(WIN32) endif() target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include ) - + target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT) else() set(EXTRA_LIBS fftw3f webrtcvad pthread) diff --git a/funasr/runtime/onnxruntime/src/commonfunc.h b/funasr/runtime/onnxruntime/src/commonfunc.h index 3f3c53a6c..237df86c1 100644 --- a/funasr/runtime/onnxruntime/src/commonfunc.h +++ b/funasr/runtime/onnxruntime/src/commonfunc.h @@ -1,7 +1,18 @@ #pragma once + + +typedef struct +{ + std::string msg; + +}RPASR_RECOG_RESULT; + + #ifdef _WIN32 #include + + inline std::wstring string2wstring(const std::string& str, const std::string& locale) { typedef std::codecvt_byname F; diff --git a/funasr/runtime/onnxruntime/src/librapidasrapi.cpp b/funasr/runtime/onnxruntime/src/librapidasrapi.cpp new file mode 100644 index 000000000..4e1a4575d --- /dev/null +++ b/funasr/runtime/onnxruntime/src/librapidasrapi.cpp @@ -0,0 +1,204 @@ +#include "precomp.h" +#ifdef __cplusplus + + + +// void __attribute__ ((visibility ("default"))) fun(); +extern "C" { +#endif + + + // APIs for qmasr + _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThreadNum) + { + +#ifdef NDEBUG + QMLIC_BOOL bMatched = QmLicCheckValid(QLFM_ASR); + if (!bMatched) { + return nullptr; + } +#endif + + Model* mm = create_model(szModelDir, nThreadNum); + + return mm; + } + + + _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback) + { + + + Model* pRecogObj = (Model*)handle; + + if (!pRecogObj) + return nullptr; + + Audio audio(1); + audio.loadwav(szBuf,nLen); + audio.split(); + + float* buff; + int len; + int flag=0; + RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT; + int nStep = 0; + int nTotal = audio.get_queue_size(); + while (audio.fetch(buff, len, flag) > 0) { + pRecogObj->reset(); + string msg = pRecogObj->forward(buff, len, flag); + pResult->msg += msg; + nStep++; + if (fnCallback) + fnCallback(nStep, nTotal); + } + + + return pResult; + } + + _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback) + { + + Model* pRecogObj = (Model*)handle; + + if (!pRecogObj) + return nullptr; + + Audio audio(1); + audio.loadpcmwav(szBuf, nLen); + audio.split(); + + float* buff; + int len; + int flag = 0; + RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT; + int nStep = 0; + int nTotal = audio.get_queue_size(); + while (audio.fetch(buff, len, flag) > 0) { + pRecogObj->reset(); + string msg = pRecogObj->forward(buff, len, flag); + pResult->msg += msg; + nStep++; + if (fnCallback) + fnCallback(nStep, nTotal); + } + + + return pResult; + + } + + _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback) + { + + Model* pRecogObj = (Model*)handle; + + if (!pRecogObj) + return nullptr; + + Audio audio(1); + audio.loadpcmwav(szFileName); + audio.split(); + + float* buff; + int len; + int flag = 0; + RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT; + int nStep = 0; + int nTotal = audio.get_queue_size(); + while (audio.fetch(buff, len, flag) > 0) { + pRecogObj->reset(); + string msg = pRecogObj->forward(buff, len, flag); + pResult->msg += msg; + nStep++; + if (fnCallback) + fnCallback(nStep, nTotal); + } + + + return pResult; + + } + + _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback) + { + Model* pRecogObj = (Model*)handle; + + if (!pRecogObj) + return nullptr; + + Audio audio(1); + if(!audio.loadwav(szWavfile)) + return nullptr; + audio.split(); + + float* buff; + int len; + int flag = 0; + int nStep = 0; + int nTotal = audio.get_queue_size(); + RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT; + while (audio.fetch(buff, len, flag) > 0) { + pRecogObj->reset(); + string msg = pRecogObj->forward(buff, len, flag); + pResult->msg+= msg; + nStep++; + if (fnCallback) + fnCallback(nStep, nTotal); + } + + + + + return pResult; + } + + _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result) + { + if (!Result) + return 0; + + return 1; + + } + _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex) + { + RPASR_RECOG_RESULT * pResult = (RPASR_RECOG_RESULT*)Result; + if(!pResult) + return nullptr; + + return pResult->msg.c_str(); + + } + + _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result) + { + + if (Result) + { + delete (RPASR_RECOG_RESULT*)Result; + + } + } + + _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE handle) + { + + Model* pRecogObj = (Model*)handle; + + + if (!pRecogObj) + return; + + delete pRecogObj; + + } + + + +#ifdef __cplusplus + +} +#endif + diff --git a/funasr/runtime/onnxruntime/src/precomp.h b/funasr/runtime/onnxruntime/src/precomp.h index ec0766dc7..358844baf 100644 --- a/funasr/runtime/onnxruntime/src/precomp.h +++ b/funasr/runtime/onnxruntime/src/precomp.h @@ -41,9 +41,10 @@ using namespace std; #include "FeatureExtract.h" #include "FeatureQueue.h" #include "SpeechWrap.h" +#include #include "Model.h" #include "paraformer_onnx.h" - +#include "librapidasrapi.h" using namespace paraformer; diff --git a/funasr/runtime/onnxruntime/tester/CMakeLists.txt b/funasr/runtime/onnxruntime/tester/CMakeLists.txt index 651b87fde..d79427135 100644 --- a/funasr/runtime/onnxruntime/tester/CMakeLists.txt +++ b/funasr/runtime/onnxruntime/tester/CMakeLists.txt @@ -15,6 +15,6 @@ include_directories(${CMAKE_SOURCE_DIR}/include) set(EXECNAME "tester") add_executable(${EXECNAME} "tester.cpp") -target_link_libraries(${EXECNAME} PUBLIC onnxruntime ${EXTRA_LIBS}) +target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS}) diff --git a/funasr/runtime/onnxruntime/tester/tester.cpp b/funasr/runtime/onnxruntime/tester/tester.cpp index 7bfb4c00f..21b783fbf 100644 --- a/funasr/runtime/onnxruntime/tester/tester.cpp +++ b/funasr/runtime/onnxruntime/tester/tester.cpp @@ -1,12 +1,13 @@ -#include + #ifndef _WIN32 #include #else #include #endif -#include -#include +#include "librapidasrapi.h" + +#include using namespace std; @@ -21,52 +22,49 @@ int main(int argc, char *argv[]) struct timeval start, end; gettimeofday(&start, NULL); int nThreadNum = 4; - Model* mm = create_model(argv[1], nThreadNum); - if (!mm) + RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum); + + if (!AsrHanlde) { printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]); exit(-1); } - - Audio audio(0); - if (!audio.loadwav(argv[2])) - { - printf("cannot load %s\n", argv[2]); - return -1; - } - audio.disp(); - + gettimeofday(&end, NULL); long seconds = (end.tv_sec - start.tv_sec); long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); printf("Model initialization takes %lfs.\n", (double)micros / 1000000); - audio.split(); setbuf(stdout, NULL); cout << "Result: \""; gettimeofday(&start, NULL); - float *buff; - int len; - int flag; - while (audio.fetch(buff, len, flag) > 0) { - mm->reset(); - string msg = mm->forward(buff, len, flag); - cout << msg; - } + RPASR_RESULT Result=RapidAsrRecogPCMFile(AsrHanlde, argv[2], RASR_NONE, NULL); gettimeofday(&end, NULL); - cout << "\"." << endl; - + if (Result) + { + string msg = RapidAsrGetResult(Result, 0); + cout << msg << endl; + cout << "\"." << endl; + RapidAsrFreeResult(Result); + } + else + { + cout <<("no return data!"); + } + seconds = (end.tv_sec - start.tv_sec); long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); printf("Model inference takes %lfs.\n", (double)micros / 1000000); printf("Model inference RTF: %04lf.\n", (double)taking_micros/micros ); - delete mm; + RapidAsrUninit(AsrHanlde); return 0; } + + \ No newline at end of file