diff --git a/funasr/runtime/grpc/Readme.md b/funasr/runtime/grpc/Readme.md index 80e55aab2..2bcad08f9 100644 --- a/funasr/runtime/grpc/Readme.md +++ b/funasr/runtime/grpc/Readme.md @@ -44,8 +44,8 @@ source ~/.bashrc #### Step 4. Start grpc paraformer server ``` -Usage: ./cmake/build/paraformer_server port thread_num /path/to/model_file -./cmake/build/paraformer_server 10108 4 /data/asrmodel/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch +Usage: ./cmake/build/paraformer_server port thread_num /path/to/model_file quantize(true or false) +./cmake/build/paraformer_server 10108 4 /data/asrmodel/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch false ``` diff --git a/funasr/runtime/grpc/paraformer_server.cc b/funasr/runtime/grpc/paraformer_server.cc index e5814a56c..69ce9032f 100644 --- a/funasr/runtime/grpc/paraformer_server.cc +++ b/funasr/runtime/grpc/paraformer_server.cc @@ -29,8 +29,8 @@ using paraformer::Request; using paraformer::Response; using paraformer::ASR; -ASRServicer::ASRServicer(const char* model_path, int thread_num) { - AsrHanlde=RapidAsrInit(model_path, thread_num); +ASRServicer::ASRServicer(const char* model_path, int thread_num, bool quantize) { + AsrHanlde=RapidAsrInit(model_path, thread_num, quantize); std::cout << "ASRServicer init" << std::endl; init_flag = 0; } @@ -170,10 +170,10 @@ grpc::Status ASRServicer::Recognize( } -void RunServer(const std::string& port, int thread_num, const char* model_path) { +void RunServer(const std::string& port, int thread_num, const char* model_path, bool quantize) { std::string server_address; server_address = "0.0.0.0:" + port; - ASRServicer service(model_path, thread_num); + ASRServicer service(model_path, thread_num, quantize); ServerBuilder builder; builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); @@ -184,12 +184,15 @@ void RunServer(const std::string& port, int thread_num, const char* model_path) } int main(int argc, char* argv[]) { - if (argc < 3) + if (argc < 5) { - printf("Usage: %s port thread_num /path/to/model_file\n", argv[0]); + printf("Usage: %s port thread_num /path/to/model_file quantize(true or false) \n", argv[0]); exit(-1); } - RunServer(argv[1], atoi(argv[2]), argv[3]); + // is quantize + bool quantize = false; + std::istringstream(argv[4]) >> std::boolalpha >> quantize; + RunServer(argv[1], atoi(argv[2]), argv[3], quantize); return 0; } diff --git a/funasr/runtime/grpc/paraformer_server.h b/funasr/runtime/grpc/paraformer_server.h index f356d9413..e42e041d1 100644 --- a/funasr/runtime/grpc/paraformer_server.h +++ b/funasr/runtime/grpc/paraformer_server.h @@ -45,7 +45,7 @@ class ASRServicer final : public ASR::Service { std::unordered_map client_transcription; public: - ASRServicer(const char* model_path, int thread_num); + ASRServicer(const char* model_path, int thread_num, bool quantize); void clear_states(const std::string& user); void clear_buffers(const std::string& user); void clear_transcriptions(const std::string& user); diff --git a/funasr/runtime/onnxruntime/include/Model.h b/funasr/runtime/onnxruntime/include/Model.h index 06267cb30..6f45c3850 100644 --- a/funasr/runtime/onnxruntime/include/Model.h +++ b/funasr/runtime/onnxruntime/include/Model.h @@ -13,5 +13,5 @@ class Model { virtual std::string rescoring() = 0; }; -Model *create_model(const char *path,int nThread=0); +Model *create_model(const char *path,int nThread=0,bool quantize=false); #endif diff --git a/funasr/runtime/onnxruntime/include/librapidasrapi.h b/funasr/runtime/onnxruntime/include/librapidasrapi.h index a83098f93..918e5740f 100644 --- a/funasr/runtime/onnxruntime/include/librapidasrapi.h +++ b/funasr/runtime/onnxruntime/include/librapidasrapi.h @@ -1,33 +1,20 @@ #pragma once - #ifdef WIN32 - - #ifdef _RPASR_API_EXPORT - #define _RAPIDASRAPI __declspec(dllexport) #else #define _RAPIDASRAPI __declspec(dllimport) #endif - - #else -#define _RAPIDASRAPI +#define _RAPIDASRAPI #endif - - - - #ifndef _WIN32 - #define RPASR_CALLBCK_PREFIX __attribute__((__stdcall__)) - #else #define RPASR_CALLBCK_PREFIX __stdcall #endif - #ifdef __cplusplus @@ -35,16 +22,13 @@ extern "C" { #endif typedef void* RPASR_HANDLE; - typedef void* RPASR_RESULT; - typedef unsigned char RPASR_BOOL; #define RPASR_TRUE 1 #define RPASR_FALSE 0 #define QM_DEFAULT_THREAD_NUM 4 - typedef enum { RASR_NONE=-1, @@ -55,7 +39,6 @@ typedef enum }RPASR_MODE; typedef enum { - RPASR_MODEL_PADDLE = 0, RPASR_MODEL_PADDLE_2 = 1, RPASR_MODEL_K2 = 2, @@ -63,17 +46,15 @@ typedef enum { }RPASR_MODEL_TYPE; - typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step. - // APIs for qmasr - -_RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread); - +// APIs for qmasr +_RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread, bool quantize); // if not give a fnCallback ,it should be NULL _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback); + _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback); _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback); @@ -83,8 +64,8 @@ _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szW _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex); _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result); -_RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result); +_RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result); _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE Handle); diff --git a/funasr/runtime/onnxruntime/readme.md b/funasr/runtime/onnxruntime/readme.md index 41c63c6f1..16d9dc72a 100644 --- a/funasr/runtime/onnxruntime/readme.md +++ b/funasr/runtime/onnxruntime/readme.md @@ -16,9 +16,9 @@ See the bottom of this page: Building Guidance ### 运行程序 -tester /path/to/models/dir /path/to/wave/file +tester /path/to/models/dir /path/to/wave/file quantize(true or false) - 例如: tester /data/models /data/test.wav + 例如: tester /data/models /data/test.wav false /data/models 需要包括如下两个文件: model.onnx 和vocab.txt diff --git a/funasr/runtime/onnxruntime/src/Model.cpp b/funasr/runtime/onnxruntime/src/Model.cpp index ddd4fd0b4..7ddb63535 100644 --- a/funasr/runtime/onnxruntime/src/Model.cpp +++ b/funasr/runtime/onnxruntime/src/Model.cpp @@ -1,11 +1,10 @@ #include "precomp.h" -Model *create_model(const char *path,int nThread) +Model *create_model(const char *path, int nThread, bool quantize) { Model *mm; - - mm = new paraformer::ModelImp(path, nThread); + mm = new paraformer::ModelImp(path, nThread, quantize); return mm; } diff --git a/funasr/runtime/onnxruntime/src/librapidasrapi.cpp b/funasr/runtime/onnxruntime/src/librapidasrapi.cpp index f5f9d66be..62f47a5ac 100644 --- a/funasr/runtime/onnxruntime/src/librapidasrapi.cpp +++ b/funasr/runtime/onnxruntime/src/librapidasrapi.cpp @@ -4,24 +4,16 @@ extern "C" { #endif - // APIs for qmasr - _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThreadNum) + _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThreadNum, bool quantize) { - - - Model* mm = create_model(szModelDir, nThreadNum); - + Model* mm = create_model(szModelDir, nThreadNum, quantize); return mm; } - _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback) { - - Model* pRecogObj = (Model*)handle; - if (!pRecogObj) return nullptr; @@ -46,15 +38,12 @@ extern "C" { fnCallback(nStep, nTotal); } - return pResult; } _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback) { - Model* pRecogObj = (Model*)handle; - if (!pRecogObj) return nullptr; @@ -79,16 +68,12 @@ extern "C" { fnCallback(nStep, nTotal); } - return pResult; - } _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback) { - Model* pRecogObj = (Model*)handle; - if (!pRecogObj) return nullptr; @@ -113,15 +98,12 @@ extern "C" { fnCallback(nStep, nTotal); } - return pResult; - } _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback) { Model* pRecogObj = (Model*)handle; - if (!pRecogObj) return nullptr; @@ -146,9 +128,6 @@ extern "C" { fnCallback(nStep, nTotal); } - - - return pResult; } @@ -158,7 +137,6 @@ extern "C" { return 0; return 1; - } @@ -168,7 +146,6 @@ extern "C" { return 0.0f; return ((RPASR_RECOG_RESULT*)Result)->snippet_time; - } _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex) @@ -178,34 +155,26 @@ extern "C" { return nullptr; return pResult->msg.c_str(); - } _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result) { - if (Result) { delete (RPASR_RECOG_RESULT*)Result; - } } _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE handle) { - Model* pRecogObj = (Model*)handle; - if (!pRecogObj) return; delete pRecogObj; - } - - #ifdef __cplusplus } diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp index 46b521153..8eb0e8916 100644 --- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp +++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp @@ -3,14 +3,22 @@ using namespace std; using namespace paraformer; -ModelImp::ModelImp(const char* path,int nNumThread) +ModelImp::ModelImp(const char* path,int nNumThread, bool quantize) { - string model_path = pathAppend(path, "model.onnx"); - string vocab_path = pathAppend(path, "vocab.txt"); + string model_path; + string vocab_path; + if(quantize) + { + model_path = pathAppend(path, "model_quant.onnx"); + }else{ + model_path = pathAppend(path, "model.onnx"); + } + vocab_path = pathAppend(path, "vocab.txt"); fe = new FeatureExtract(3); - sessionOptions.SetInterOpNumThreads(nNumThread); + //sessionOptions.SetInterOpNumThreads(1); + sessionOptions.SetIntraOpNumThreads(nNumThread); sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); #ifdef _WIN32 diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.h b/funasr/runtime/onnxruntime/src/paraformer_onnx.h index ebbbb5152..db0084253 100644 --- a/funasr/runtime/onnxruntime/src/paraformer_onnx.h +++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.h @@ -4,10 +4,6 @@ #ifndef PARAFORMER_MODELIMP_H #define PARAFORMER_MODELIMP_H - - - - namespace paraformer { class ModelImp : public Model { @@ -19,7 +15,6 @@ namespace paraformer { void apply_lfr(Tensor*& din); void apply_cmvn(Tensor* din); - string greedy_search( float* in, int nLen); #ifdef _WIN_X86 @@ -39,7 +34,7 @@ namespace paraformer { //string m_strOutputName, m_strOutputNameLen; public: - ModelImp(const char* path, int nNumThread=0); + ModelImp(const char* path, int nNumThread=0, bool quantize=false); ~ModelImp(); void reset(); string forward_chunk(float* din, int len, int flag); diff --git a/funasr/runtime/onnxruntime/tester/CMakeLists.txt b/funasr/runtime/onnxruntime/tester/CMakeLists.txt index d79427135..f66319dfb 100644 --- a/funasr/runtime/onnxruntime/tester/CMakeLists.txt +++ b/funasr/runtime/onnxruntime/tester/CMakeLists.txt @@ -13,8 +13,11 @@ set(EXTRA_LIBS rapidasr) include_directories(${CMAKE_SOURCE_DIR}/include) set(EXECNAME "tester") +set(EXECNAMERTF "tester_rtf") add_executable(${EXECNAME} "tester.cpp") target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS}) +add_executable(${EXECNAMERTF} "tester_rtf.cpp") +target_link_libraries(${EXECNAMERTF} PUBLIC ${EXTRA_LIBS}) diff --git a/funasr/runtime/onnxruntime/tester/tester.cpp b/funasr/runtime/onnxruntime/tester/tester.cpp index ba5c61ccb..2bba39a8a 100644 --- a/funasr/runtime/onnxruntime/tester/tester.cpp +++ b/funasr/runtime/onnxruntime/tester/tester.cpp @@ -9,41 +9,40 @@ #include #include +#include using namespace std; int main(int argc, char *argv[]) { - if (argc < 2) + if (argc < 4) { - printf("Usage: %s /path/to/model_dir /path/to/wav/file", argv[0]); + printf("Usage: %s /path/to/model_dir /path/to/wav/file quantize(true or false) \n", argv[0]); exit(-1); } struct timeval start, end; gettimeofday(&start, NULL); int nThreadNum = 4; - RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum); + // is quantize + bool quantize = false; + istringstream(argv[3]) >> boolalpha >> quantize; + RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum, quantize); if (!AsrHanlde) { printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]); exit(-1); } - - gettimeofday(&end, NULL); long seconds = (end.tv_sec - start.tv_sec); long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000); - - gettimeofday(&start, NULL); float snippet_time = 0.0f; - - RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL); + RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL); gettimeofday(&end, NULL); @@ -62,7 +61,6 @@ int main(int argc, char *argv[]) cout <<"no return data!"; } - //char* buff = nullptr; //int len = 0; //ifstream ifs(argv[2], std::ios::binary | std::ios::in); @@ -101,13 +99,11 @@ int main(int argc, char *argv[]) // //delete[]buff; //} - printf("Audio length %lfs.\n", (double)snippet_time); seconds = (end.tv_sec - start.tv_sec); long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000); - printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000)); RapidAsrUninit(AsrHanlde); diff --git a/funasr/runtime/onnxruntime/tester/tester_rtf.cpp b/funasr/runtime/onnxruntime/tester/tester_rtf.cpp new file mode 100644 index 000000000..9651900e0 --- /dev/null +++ b/funasr/runtime/onnxruntime/tester/tester_rtf.cpp @@ -0,0 +1,99 @@ + +#ifndef _WIN32 +#include +#else +#include +#endif + +#include "librapidasrapi.h" + +#include +#include +#include +#include +using namespace std; + +int main(int argc, char *argv[]) +{ + + if (argc < 4) + { + printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) \n", argv[0]); + exit(-1); + } + + // read wav.scp + vector wav_list; + ifstream in(argv[2]); + if (!in.is_open()) { + printf("Failed to open file: %s", argv[2]); + return 0; + } + string line; + while(getline(in, line)) + { + istringstream iss(line); + string column1, column2; + iss >> column1 >> column2; + wav_list.push_back(column2); + } + in.close(); + + // model init + struct timeval start, end; + gettimeofday(&start, NULL); + int nThreadNum = 1; + // is quantize + bool quantize = false; + istringstream(argv[3]) >> boolalpha >> quantize; + + RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum, quantize); + if (!AsrHanlde) + { + printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]); + exit(-1); + } + gettimeofday(&end, NULL); + long seconds = (end.tv_sec - start.tv_sec); + long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); + printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000); + + // warm up + for (size_t i = 0; i < 30; i++) + { + RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, wav_list[0].c_str(), RASR_NONE, NULL); + } + + // forward + float snippet_time = 0.0f; + float total_length = 0.0f; + long total_time = 0.0f; + + for (size_t i = 0; i < wav_list.size(); i++) + { + gettimeofday(&start, NULL); + RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, wav_list[i].c_str(), RASR_NONE, NULL); + gettimeofday(&end, NULL); + seconds = (end.tv_sec - start.tv_sec); + long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); + total_time += taking_micros; + + if(Result){ + string msg = RapidAsrGetResult(Result, 0); + printf("Result: %s \n", msg); + + snippet_time = RapidAsrGetRetSnippetTime(Result); + total_length += snippet_time; + RapidAsrFreeResult(Result); + }else{ + cout <<"No return data!"; + } + } + + printf("total_time_wav %ld ms.\n", (long)(total_length * 1000)); + printf("total_time_comput %ld ms.\n", total_time / 1000); + printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000)); + + RapidAsrUninit(AsrHanlde); + return 0; +}