mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
commit
948b68774c
@ -61,7 +61,6 @@ FunASR has open-sourced a large number of pre-trained models on industrial data.
|
||||
| paraformer-zh-spk <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [🤗]() ) | speech recognition with speaker diarization, with timestamps, non-streaming | 60000 hours, Mandarin | 220M |
|
||||
| <nobr>paraformer-zh-online <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() )</nobr> | speech recognition, streaming | 60000 hours, Mandarin | 220M |
|
||||
| paraformer-en <br> ( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() ) | speech recognition, with timestamps, non-streaming | 50000 hours, English | 220M |
|
||||
| paraformer-en-spk <br> ([⭐]()[🤗]() ) | speech recognition with speaker diarization, non-streaming | Undo | Undo |
|
||||
| conformer-en <br> ( [⭐](https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [🤗]() ) | speech recognition, non-streaming | 50000 hours, English | 220M |
|
||||
| ct-punc <br> ( [⭐](https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [🤗]() ) | punctuation restoration | 100M, Mandarin and English | 1.1G |
|
||||
| fsmn-vad <br> ( [⭐](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [🤗]() ) | voice activity detection | 5000 hours, Mandarin and English | 0.4M |
|
||||
@ -97,7 +96,7 @@ model = AutoModel(model="paraformer-zh", model_revision="v2.0.2", \
|
||||
punc_model="ct-punc-c", punc_model_revision="v2.0.2", \
|
||||
spk_model="cam++", spk_model_revision="v2.0.2")
|
||||
res = model(input=f"{model.model_path}/example/asr_example.wav",
|
||||
batch_size=16,
|
||||
batch_size=64,
|
||||
hotword='魔搭')
|
||||
print(res)
|
||||
```
|
||||
@ -135,7 +134,6 @@ Note: `chunk_size` is the configuration for streaming latency.` [0,10,5]` indica
|
||||
from funasr import AutoModel
|
||||
|
||||
model = AutoModel(model="fsmn-vad", model_revision="v2.0.2")
|
||||
|
||||
wav_file = f"{model.model_path}/example/asr_example.wav"
|
||||
res = model(input=wav_file)
|
||||
print(res)
|
||||
@ -167,7 +165,6 @@ for i in range(total_chunk_num):
|
||||
from funasr import AutoModel
|
||||
|
||||
model = AutoModel(model="ct-punc", model_revision="v2.0.2")
|
||||
|
||||
res = model(input="那今天的会就到这里吧 happy new year 明年见")
|
||||
print(res)
|
||||
```
|
||||
@ -176,9 +173,8 @@ print(res)
|
||||
from funasr import AutoModel
|
||||
|
||||
model = AutoModel(model="fa-zh", model_revision="v2.0.2")
|
||||
|
||||
wav_file = f"{model.model_path}/example/asr_example.wav"
|
||||
text_file = f"{model.model_path}/example/asr_example.wav"
|
||||
text_file = f"{model.model_path}/example/text.txt"
|
||||
res = model(input=(wav_file, text_file), data_type=("sound", "text"))
|
||||
print(res)
|
||||
```
|
||||
|
||||
36
README_zh.md
36
README_zh.md
@ -60,14 +60,13 @@ FunASR开源了大量在工业数据上预训练模型,您可以在[模型许
|
||||
| 模型名字 | 任务详情 | 训练数据 | 参数量 |
|
||||
|:------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------:|:------------:|:----:|
|
||||
| paraformer-zh <br> ([⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) [🤗]() ) | 语音识别,带时间戳输出,非实时 | 60000小时,中文 | 220M |
|
||||
| paraformer-zh-spk <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [🤗]() ) | 分角色语音识别,带时间戳输出,非实时 | 60000小时,中文 | 220M |
|
||||
| paraformer-zh-streaming <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() ) | 语音识别,实时 | 60000小时,中文 | 220M |
|
||||
| paraformer-en <br> ( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() ) | 语音识别,非实时 | 50000小时,英文 | 220M |
|
||||
| paraformer-en-spk <br> ([⭐]() [🤗]() ) | 语音识别,非实时 | 50000小时,英文 | 220M |
|
||||
| conformer-en <br> ( [⭐](https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [🤗]() ) | 语音识别,非实时 | 50000小时,英文 | 220M |
|
||||
| ct-punc <br> ( [⭐](https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [🤗]() ) | 标点恢复 | 100M,中文与英文 | 1.1G |
|
||||
| fsmn-vad <br> ( [⭐](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [🤗]() ) | 语音端点检测,实时 | 5000小时,中文与英文 | 0.4M |
|
||||
| fa-zh <br> ( [⭐](https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [🤗]() ) | 字级别时间戳预测 | 50000小时,中文 | 38M |
|
||||
| paraformer-zh-spk <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [🤗]() ) | 分角色语音识别,带时间戳输出,非实时 | 60000小时,中文 | 220M |
|
||||
| paraformer-zh-streaming <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() ) | 语音识别,实时 | 60000小时,中文 | 220M |
|
||||
| paraformer-en <br> ( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() ) | 语音识别,非实时 | 50000小时,英文 | 220M |
|
||||
| conformer-en <br> ( [⭐](https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [🤗]() ) | 语音识别,非实时 | 50000小时,英文 | 220M |
|
||||
| ct-punc <br> ( [⭐](https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [🤗]() ) | 标点恢复 | 100M,中文与英文 | 1.1G |
|
||||
| fsmn-vad <br> ( [⭐](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [🤗]() ) | 语音端点检测,实时 | 5000小时,中文与英文 | 0.4M |
|
||||
| fa-zh <br> ( [⭐](https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [🤗]() ) | 字级别时间戳预测 | 50000小时,中文 | 38M |
|
||||
|
||||
|
||||
<a name="快速开始"></a>
|
||||
@ -86,12 +85,15 @@ funasr +model=paraformer-zh +vad_model="fsmn-vad" +punc_model="ct-punc" +input=a
|
||||
### 非实时语音识别
|
||||
```python
|
||||
from funasr import AutoModel
|
||||
|
||||
model = AutoModel(model="paraformer-zh")
|
||||
# for the long duration wav, you could add vad model
|
||||
# model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc")
|
||||
|
||||
res = model(input="asr_example_zh.wav", batch_size=64)
|
||||
# paraformer-zh is a multi-functional asr model
|
||||
# use vad, punc, spk or not as you need
|
||||
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2", \
|
||||
vad_model="fsmn-vad", vad_model_revision="v2.0.2", \
|
||||
punc_model="ct-punc-c", punc_model_revision="v2.0.2", \
|
||||
spk_model="cam++", spk_model_revision="v2.0.2")
|
||||
res = model(input=f"{model.model_path}/example/asr_example.wav",
|
||||
batch_size=64,
|
||||
hotword='魔搭')
|
||||
print(res)
|
||||
```
|
||||
注:`model_hub`:表示模型仓库,`ms`为选择modelscope下载,`hf`为选择huggingface下载。
|
||||
@ -105,7 +107,7 @@ chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
|
||||
encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
|
||||
decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
|
||||
|
||||
model = AutoModel(model="paraformer-zh-streaming", model_revision="v2.0.0")
|
||||
model = AutoModel(model="paraformer-zh-streaming", model_revision="v2.0.2")
|
||||
|
||||
import soundfile
|
||||
import os
|
||||
@ -163,7 +165,7 @@ for i in range(total_chunk_num):
|
||||
```python
|
||||
from funasr import AutoModel
|
||||
|
||||
model = AutoModel(model="ct-punc", model_revision="v2.0.1")
|
||||
model = AutoModel(model="ct-punc", model_revision="v2.0.2")
|
||||
|
||||
res = model(input="那今天的会就到这里吧 happy new year 明年见")
|
||||
print(res)
|
||||
@ -176,7 +178,7 @@ from funasr import AutoModel
|
||||
model = AutoModel(model="fa-zh", model_revision="v2.0.0")
|
||||
|
||||
wav_file = f"{model.model_path}/example/asr_example.wav"
|
||||
text_file = f"{model.model_path}/example/asr_example.wav"
|
||||
text_file = f"{model.model_path}/example/text.txt"
|
||||
res = model(input=(wav_file, text_file), data_type=("sound", "text"))
|
||||
print(res)
|
||||
```
|
||||
|
||||
@ -144,10 +144,10 @@ void runReg(FUNASR_HANDLE tpass_handle, std::vector<int> chunk_size, vector<stri
|
||||
} else {
|
||||
is_final = false;
|
||||
}
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle, speech_buff+sample_offset, step, punc_cache, is_final,
|
||||
sampling_rate_, "pcm", (ASR_TYPE)asr_mode_, hotwords_embedding, true, decoder_handle);
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
n_total_time += taking_micros;
|
||||
@ -272,7 +272,7 @@ int main(int argc, char** argv)
|
||||
GetValue(asr_mode, ASR_MODE, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
int thread_num = onnx_thread.getValue();
|
||||
int asr_mode_ = -1;
|
||||
if(model_path[ASR_MODE] == "offline"){
|
||||
@ -301,7 +301,7 @@ int main(int argc, char** argv)
|
||||
am_sc = am_scale.getValue();
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
|
||||
@ -97,7 +97,7 @@ int main(int argc, char** argv)
|
||||
GetValue(asr_mode, ASR_MODE, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
int thread_num = onnx_thread.getValue();
|
||||
int asr_mode_ = -1;
|
||||
if(model_path[ASR_MODE] == "offline"){
|
||||
@ -128,7 +128,7 @@ int main(int argc, char** argv)
|
||||
// init wfst decoder
|
||||
FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, glob_beam, lat_beam, am_sc);
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
@ -214,11 +214,11 @@ int main(int argc, char** argv)
|
||||
} else {
|
||||
is_final = false;
|
||||
}
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle,
|
||||
speech_buff+sample_offset, step, punc_cache, is_final, sampling_rate_, "pcm",
|
||||
(ASR_TYPE)asr_mode_, hotwords_embedding, true, decoder_handle);
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
|
||||
|
||||
@ -49,7 +49,7 @@ int main(int argc, char *argv[])
|
||||
GetValue(txt_path, TXT_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
int thread_num = 1;
|
||||
FUNASR_HANDLE punc_hanlde=CTTransformerInit(model_path, thread_num);
|
||||
|
||||
@ -59,7 +59,7 @@ int main(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
@ -83,9 +83,9 @@ int main(int argc, char *argv[])
|
||||
|
||||
long taking_micros = 0;
|
||||
for(auto& txt_str : txt_list){
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result=CTTransformerInfer(punc_hanlde, txt_str.c_str(), RASR_NONE, NULL);
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result=CTTransformerInfer(punc_hanlde, txt_str.c_str(), RASR_NONE, nullptr);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
string msg = FunASRGetResult(result, 0);
|
||||
|
||||
@ -54,7 +54,7 @@ void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, vector<string> wa
|
||||
// warm up
|
||||
for (size_t i = 0; i < 1; i++)
|
||||
{
|
||||
FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs, true, decoder_handle);
|
||||
FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, nullptr, hotwords_embedding, audio_fs, true, decoder_handle);
|
||||
if(result){
|
||||
FunASRFreeResult(result);
|
||||
}
|
||||
@ -67,10 +67,10 @@ void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, vector<string> wa
|
||||
break;
|
||||
}
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs, true, decoder_handle);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, nullptr, hotwords_embedding, audio_fs, true, decoder_handle);
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
n_total_time += taking_micros;
|
||||
@ -115,10 +115,8 @@ bool is_target_file(const std::string& filename, const std::string target) {
|
||||
|
||||
void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
|
||||
{
|
||||
if (value_arg.isSet()){
|
||||
model_path.insert({key, value_arg.getValue()});
|
||||
LOG(INFO)<< key << " : " << value_arg.getValue();
|
||||
}
|
||||
model_path.insert({key, value_arg.getValue()});
|
||||
LOG(INFO)<< key << " : " << value_arg.getValue();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
@ -176,7 +174,7 @@ int main(int argc, char *argv[])
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_HANDLE asr_handle=FunOfflineInit(model_path, 1);
|
||||
|
||||
if (!asr_handle)
|
||||
@ -185,7 +183,7 @@ int main(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
|
||||
@ -82,7 +82,7 @@ int main(int argc, char *argv[])
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
int thread_num = 1;
|
||||
FUNASR_HANDLE vad_hanlde=FsmnVadInit(model_path, thread_num);
|
||||
|
||||
@ -92,7 +92,7 @@ int main(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
@ -132,9 +132,9 @@ int main(int argc, char *argv[])
|
||||
for (int i = 0; i < wav_list.size(); i++) {
|
||||
auto& wav_file = wav_list[i];
|
||||
auto& wav_id = wav_ids[i];
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), NULL, audio_fs.getValue());
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), nullptr, audio_fs.getValue());
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
|
||||
|
||||
@ -32,10 +32,8 @@ bool is_target_file(const std::string& filename, const std::string target) {
|
||||
|
||||
void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
|
||||
{
|
||||
if (value_arg.isSet()){
|
||||
model_path.insert({key, value_arg.getValue()});
|
||||
LOG(INFO)<< key << " : " << value_arg.getValue();
|
||||
}
|
||||
model_path.insert({key, value_arg.getValue()});
|
||||
LOG(INFO)<< key << " : " << value_arg.getValue();
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
@ -89,7 +87,7 @@ int main(int argc, char** argv)
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
int thread_num = 1;
|
||||
FUNASR_HANDLE asr_hanlde=FunOfflineInit(model_path, thread_num);
|
||||
|
||||
@ -116,7 +114,7 @@ int main(int argc, char** argv)
|
||||
LOG(INFO) << "hotword path: " << hotword_path;
|
||||
funasr::ExtractHws(hotword_path, hws_map, nn_hotwords_);
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
@ -158,9 +156,9 @@ int main(int argc, char** argv)
|
||||
for (int i = 0; i < wav_list.size(); i++) {
|
||||
auto& wav_file = wav_list[i];
|
||||
auto& wav_id = wav_ids[i];
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs.getValue(), true, decoder_handle);
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, nullptr, hotwords_embedding, audio_fs.getValue(), true, decoder_handle);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
|
||||
|
||||
@ -63,7 +63,7 @@ int main(int argc, char *argv[])
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
int thread_num = 1;
|
||||
FUNASR_HANDLE asr_handle=FunASRInit(model_path, thread_num, ASR_ONLINE);
|
||||
|
||||
@ -73,7 +73,7 @@ int main(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
@ -144,9 +144,9 @@ int main(int argc, char *argv[])
|
||||
} else {
|
||||
is_final = false;
|
||||
}
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, nullptr, is_final, sampling_rate_);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
|
||||
|
||||
@ -69,7 +69,7 @@ int main(int argc, char *argv[])
|
||||
GetValue(txt_path, TXT_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
int thread_num = 1;
|
||||
FUNASR_HANDLE punc_hanlde=CTTransformerInit(model_path, thread_num, PUNC_ONLINE);
|
||||
|
||||
@ -79,7 +79,7 @@ int main(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
@ -107,16 +107,16 @@ int main(int argc, char *argv[])
|
||||
splitString(vad_strs, txt_str, "|");
|
||||
string str_out;
|
||||
FUNASR_RESULT result = nullptr;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
for(auto& vad_str:vad_strs){
|
||||
result=CTTransformerInfer(punc_hanlde, vad_str.c_str(), RASR_NONE, NULL, PUNC_ONLINE, result);
|
||||
result=CTTransformerInfer(punc_hanlde, vad_str.c_str(), RASR_NONE, nullptr, PUNC_ONLINE, result);
|
||||
if(result){
|
||||
string msg = CTTransformerGetResult(result, 0);
|
||||
str_out += msg;
|
||||
LOG(INFO)<<"Online result: "<<msg;
|
||||
}
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO)<<"Results: "<<str_out;
|
||||
|
||||
@ -84,7 +84,7 @@ void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, vector<string> wa
|
||||
} else {
|
||||
is_final = false;
|
||||
}
|
||||
FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
|
||||
FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, nullptr, is_final, sampling_rate_);
|
||||
if (result)
|
||||
{
|
||||
FunASRFreeResult(result);
|
||||
@ -130,9 +130,9 @@ void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, vector<string> wa
|
||||
} else {
|
||||
is_final = false;
|
||||
}
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, nullptr, is_final, sampling_rate_);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
n_total_time += taking_micros;
|
||||
@ -210,7 +210,7 @@ int main(int argc, char *argv[])
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_HANDLE asr_handle=FunASRInit(model_path, 1, ASR_ONLINE);
|
||||
|
||||
if (!asr_handle)
|
||||
@ -219,7 +219,7 @@ int main(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
|
||||
@ -89,7 +89,7 @@ int main(int argc, char *argv[])
|
||||
GetValue(wav_path, WAV_PATH, model_path);
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
int thread_num = 1;
|
||||
FUNASR_HANDLE vad_hanlde=FsmnVadInit(model_path, thread_num);
|
||||
|
||||
@ -99,7 +99,7 @@ int main(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
@ -170,9 +170,9 @@ int main(int argc, char *argv[])
|
||||
} else {
|
||||
is_final = false;
|
||||
}
|
||||
gettimeofday(&start, NULL);
|
||||
FUNASR_RESULT result = FsmnVadInferBuffer(online_hanlde, speech_buff+sample_offset, step, NULL, is_final, sampling_rate_);
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
FUNASR_RESULT result = FsmnVadInferBuffer(online_hanlde, speech_buff+sample_offset, step, nullptr, is_final, sampling_rate_);
|
||||
gettimeofday(&end, nullptr);
|
||||
seconds = (end.tv_sec - start.tv_sec);
|
||||
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
|
||||
|
||||
@ -6,8 +6,8 @@ void *AlignedMalloc(size_t alignment, size_t required_bytes)
|
||||
void *p1; // original block
|
||||
void **p2; // aligned block
|
||||
int offset = alignment - 1 + sizeof(void *);
|
||||
if ((p1 = (void *)malloc(required_bytes + offset)) == NULL) {
|
||||
return NULL;
|
||||
if ((p1 = (void *)malloc(required_bytes + offset)) == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
p2 = (void **)(((size_t)(p1) + offset) & ~(alignment - 1));
|
||||
p2[-1] = p1;
|
||||
|
||||
@ -133,6 +133,7 @@ class AudioWindow {
|
||||
};
|
||||
~AudioWindow(){
|
||||
free(window);
|
||||
window = nullptr;
|
||||
};
|
||||
int put(int val)
|
||||
{
|
||||
@ -160,8 +161,9 @@ AudioFrame::AudioFrame(int start, int end, bool is_final):start(start),end(end),
|
||||
len = end - start;
|
||||
}
|
||||
AudioFrame::~AudioFrame(){
|
||||
if(data != NULL){
|
||||
if(data != nullptr){
|
||||
free(data);
|
||||
data = nullptr;
|
||||
}
|
||||
}
|
||||
int AudioFrame::SetStart(int val)
|
||||
@ -195,38 +197,41 @@ int AudioFrame::Disp()
|
||||
|
||||
Audio::Audio(int data_type) : dest_sample_rate(MODEL_SAMPLE_RATE), data_type(data_type)
|
||||
{
|
||||
speech_buff = NULL;
|
||||
speech_data = NULL;
|
||||
speech_buff = nullptr;
|
||||
speech_data = nullptr;
|
||||
align_size = 1360;
|
||||
seg_sample = dest_sample_rate / 1000;
|
||||
}
|
||||
|
||||
Audio::Audio(int model_sample_rate, int data_type) : dest_sample_rate(model_sample_rate), data_type(data_type)
|
||||
{
|
||||
speech_buff = NULL;
|
||||
speech_data = NULL;
|
||||
speech_buff = nullptr;
|
||||
speech_data = nullptr;
|
||||
align_size = 1360;
|
||||
seg_sample = dest_sample_rate / 1000;
|
||||
}
|
||||
|
||||
Audio::Audio(int model_sample_rate, int data_type, int size) : dest_sample_rate(model_sample_rate), data_type(data_type)
|
||||
{
|
||||
speech_buff = NULL;
|
||||
speech_data = NULL;
|
||||
speech_buff = nullptr;
|
||||
speech_data = nullptr;
|
||||
align_size = (float)size;
|
||||
seg_sample = dest_sample_rate / 1000;
|
||||
}
|
||||
|
||||
Audio::~Audio()
|
||||
{
|
||||
if (speech_buff != NULL) {
|
||||
if (speech_buff != nullptr) {
|
||||
free(speech_buff);
|
||||
speech_buff = nullptr;
|
||||
}
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
if (speech_char != NULL) {
|
||||
if (speech_char != nullptr) {
|
||||
free(speech_char);
|
||||
speech_char = nullptr;
|
||||
}
|
||||
ClearQueue(frame_queue);
|
||||
ClearQueue(asr_online_queue);
|
||||
@ -269,8 +274,9 @@ void Audio::WavResample(int32_t sampling_rate, const float *waveform,
|
||||
resampler->Resample(waveform, n, true, &samples);
|
||||
//reset speech_data
|
||||
speech_len = samples.size();
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
memset(speech_data, 0, sizeof(float) * speech_len);
|
||||
@ -283,21 +289,21 @@ bool Audio::FfmpegLoad(const char *filename, bool copy2char){
|
||||
#else
|
||||
// from file
|
||||
AVFormatContext* formatContext = avformat_alloc_context();
|
||||
if (avformat_open_input(&formatContext, filename, NULL, NULL) != 0) {
|
||||
if (avformat_open_input(&formatContext, filename, nullptr, nullptr) != 0) {
|
||||
LOG(ERROR) << "Error: Could not open input file.";
|
||||
avformat_close_input(&formatContext);
|
||||
avformat_free_context(formatContext);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (avformat_find_stream_info(formatContext, NULL) < 0) {
|
||||
if (avformat_find_stream_info(formatContext, nullptr) < 0) {
|
||||
LOG(ERROR) << "Error: Could not open input file.";
|
||||
avformat_close_input(&formatContext);
|
||||
avformat_free_context(formatContext);
|
||||
return false;
|
||||
}
|
||||
const AVCodec* codec = NULL;
|
||||
AVCodecParameters* codecParameters = NULL;
|
||||
const AVCodec* codec = nullptr;
|
||||
AVCodecParameters* codecParameters = nullptr;
|
||||
int audioStreamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
|
||||
if (audioStreamIndex >= 0) {
|
||||
codecParameters = formatContext->streams[audioStreamIndex]->codecpar;
|
||||
@ -321,7 +327,7 @@ bool Audio::FfmpegLoad(const char *filename, bool copy2char){
|
||||
avcodec_free_context(&codecContext);
|
||||
return false;
|
||||
}
|
||||
if (avcodec_open2(codecContext, codec, NULL) < 0) {
|
||||
if (avcodec_open2(codecContext, codec, nullptr) < 0) {
|
||||
LOG(ERROR) << "Error: Could not open audio decoder.";
|
||||
avformat_close_input(&formatContext);
|
||||
avformat_free_context(formatContext);
|
||||
@ -400,14 +406,13 @@ bool Audio::FfmpegLoad(const char *filename, bool copy2char){
|
||||
av_packet_free(&packet);
|
||||
av_frame_free(&frame);
|
||||
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
if (speech_buff != NULL) {
|
||||
free(speech_buff);
|
||||
}
|
||||
if (speech_char != NULL) {
|
||||
if (speech_char != nullptr) {
|
||||
free(speech_char);
|
||||
speech_char = nullptr;
|
||||
}
|
||||
offset = 0;
|
||||
|
||||
@ -418,30 +423,25 @@ bool Audio::FfmpegLoad(const char *filename, bool copy2char){
|
||||
}
|
||||
|
||||
speech_len = (resampled_buffers.size()) / 2;
|
||||
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
|
||||
if (speech_buff)
|
||||
{
|
||||
memset(speech_buff, 0, sizeof(int16_t) * speech_len);
|
||||
memcpy((void*)speech_buff, (const void*)resampled_buffers.data(), speech_len * sizeof(int16_t));
|
||||
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
if(speech_data){
|
||||
memset(speech_data, 0, sizeof(float) * speech_len);
|
||||
|
||||
float scale = 1;
|
||||
if (data_type == 1) {
|
||||
scale = 32768;
|
||||
scale = 32768.0f;
|
||||
}
|
||||
for (int32_t i = 0; i != speech_len; ++i) {
|
||||
speech_data[i] = (float)speech_buff[i] / scale;
|
||||
for (int32_t i = 0; i < speech_len; ++i) {
|
||||
int16_t val = (int16_t)((resampled_buffers[2 * i + 1] << 8) | resampled_buffers[2 * i]);
|
||||
speech_data[i] = (float)val / scale;
|
||||
}
|
||||
|
||||
AudioFrame* frame = new AudioFrame(speech_len);
|
||||
frame_queue.push(frame);
|
||||
|
||||
return true;
|
||||
}
|
||||
else
|
||||
}else{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -468,7 +468,7 @@ bool Audio::FfmpegLoad(const char* buf, int n_file_len){
|
||||
}
|
||||
AVFormatContext* formatContext = avformat_alloc_context();
|
||||
formatContext->pb = avio_ctx;
|
||||
if (avformat_open_input(&formatContext, "", NULL, NULL) != 0) {
|
||||
if (avformat_open_input(&formatContext, "", nullptr, nullptr) != 0) {
|
||||
LOG(ERROR) << "Error: Could not open input file.";
|
||||
avio_context_free(&avio_ctx);
|
||||
avformat_close_input(&formatContext);
|
||||
@ -476,15 +476,15 @@ bool Audio::FfmpegLoad(const char* buf, int n_file_len){
|
||||
return false;
|
||||
}
|
||||
|
||||
if (avformat_find_stream_info(formatContext, NULL) < 0) {
|
||||
if (avformat_find_stream_info(formatContext, nullptr) < 0) {
|
||||
LOG(ERROR) << "Error: Could not find stream information.";
|
||||
avio_context_free(&avio_ctx);
|
||||
avformat_close_input(&formatContext);
|
||||
avformat_free_context(formatContext);
|
||||
return false;
|
||||
}
|
||||
const AVCodec* codec = NULL;
|
||||
AVCodecParameters* codecParameters = NULL;
|
||||
const AVCodec* codec = nullptr;
|
||||
AVCodecParameters* codecParameters = nullptr;
|
||||
int audioStreamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
|
||||
if (audioStreamIndex >= 0) {
|
||||
codecParameters = formatContext->streams[audioStreamIndex]->codecpar;
|
||||
@ -505,7 +505,7 @@ bool Audio::FfmpegLoad(const char* buf, int n_file_len){
|
||||
avcodec_free_context(&codecContext);
|
||||
return false;
|
||||
}
|
||||
if (avcodec_open2(codecContext, codec, NULL) < 0) {
|
||||
if (avcodec_open2(codecContext, codec, nullptr) < 0) {
|
||||
LOG(ERROR) << "Error: Could not open audio decoder.";
|
||||
avio_context_free(&avio_ctx);
|
||||
avformat_close_input(&formatContext);
|
||||
@ -590,39 +590,31 @@ bool Audio::FfmpegLoad(const char* buf, int n_file_len){
|
||||
av_packet_free(&packet);
|
||||
av_frame_free(&frame);
|
||||
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
if (speech_buff != NULL) {
|
||||
free(speech_buff);
|
||||
}
|
||||
offset = 0;
|
||||
|
||||
speech_len = (resampled_buffers.size()) / 2;
|
||||
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
|
||||
if (speech_buff)
|
||||
{
|
||||
memset(speech_buff, 0, sizeof(int16_t) * speech_len);
|
||||
memcpy((void*)speech_buff, (const void*)resampled_buffers.data(), speech_len * sizeof(int16_t));
|
||||
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
if(speech_data){
|
||||
memset(speech_data, 0, sizeof(float) * speech_len);
|
||||
|
||||
float scale = 1;
|
||||
if (data_type == 1) {
|
||||
scale = 32768;
|
||||
scale = 32768.0f;
|
||||
}
|
||||
for (int32_t i = 0; i != speech_len; ++i) {
|
||||
speech_data[i] = (float)speech_buff[i] / scale;
|
||||
for (int32_t i = 0; i < speech_len; ++i) {
|
||||
int16_t val = (int16_t)((resampled_buffers[2 * i + 1] << 8) | resampled_buffers[2 * i]);
|
||||
speech_data[i] = (float)val / scale;
|
||||
}
|
||||
|
||||
AudioFrame* frame = new AudioFrame(speech_len);
|
||||
frame_queue.push(frame);
|
||||
|
||||
return true;
|
||||
}
|
||||
else
|
||||
}else{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -630,11 +622,13 @@ bool Audio::FfmpegLoad(const char* buf, int n_file_len){
|
||||
bool Audio::LoadWav(const char *filename, int32_t* sampling_rate, bool resample)
|
||||
{
|
||||
WaveHeader header;
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
if (speech_buff != NULL) {
|
||||
if (speech_buff != nullptr) {
|
||||
free(speech_buff);
|
||||
speech_buff = nullptr;
|
||||
}
|
||||
|
||||
offset = 0;
|
||||
@ -705,8 +699,9 @@ bool Audio::LoadWav(const char *filename, int32_t* sampling_rate, bool resample)
|
||||
bool Audio::LoadWav2Char(const char *filename, int32_t* sampling_rate)
|
||||
{
|
||||
WaveHeader header;
|
||||
if (speech_char != NULL) {
|
||||
if (speech_char != nullptr) {
|
||||
free(speech_char);
|
||||
speech_char = nullptr;
|
||||
}
|
||||
offset = 0;
|
||||
std::ifstream is(filename, std::ifstream::binary);
|
||||
@ -744,13 +739,14 @@ bool Audio::LoadWav2Char(const char *filename, int32_t* sampling_rate)
|
||||
bool Audio::LoadWav(const char* buf, int n_file_len, int32_t* sampling_rate)
|
||||
{
|
||||
WaveHeader header;
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
if (speech_buff != NULL) {
|
||||
if (speech_buff != nullptr) {
|
||||
free(speech_buff);
|
||||
speech_buff = nullptr;
|
||||
}
|
||||
offset = 0;
|
||||
|
||||
std::memcpy(&header, buf, sizeof(header));
|
||||
|
||||
@ -790,33 +786,24 @@ bool Audio::LoadWav(const char* buf, int n_file_len, int32_t* sampling_rate)
|
||||
|
||||
bool Audio::LoadPcmwav(const char* buf, int n_buf_len, int32_t* sampling_rate)
|
||||
{
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
if (speech_buff != NULL) {
|
||||
free(speech_buff);
|
||||
}
|
||||
offset = 0;
|
||||
|
||||
speech_len = n_buf_len / 2;
|
||||
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
|
||||
if (speech_buff)
|
||||
{
|
||||
memset(speech_buff, 0, sizeof(int16_t) * speech_len);
|
||||
memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
|
||||
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
memset(speech_data, 0, sizeof(float) * speech_len);
|
||||
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
if(speech_data){
|
||||
float scale = 1;
|
||||
if (data_type == 1) {
|
||||
scale = 32768;
|
||||
scale = 32768.0f;
|
||||
}
|
||||
const uint8_t* byte_buf = reinterpret_cast<const uint8_t*>(buf);
|
||||
for (int32_t i = 0; i < speech_len; ++i) {
|
||||
int16_t val = (int16_t)((byte_buf[2 * i + 1] << 8) | byte_buf[2 * i]);
|
||||
speech_data[i] = (float)val / scale;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i != speech_len; ++i) {
|
||||
speech_data[i] = (float)speech_buff[i] / scale;
|
||||
}
|
||||
|
||||
//resample
|
||||
if(*sampling_rate != dest_sample_rate){
|
||||
WavResample(*sampling_rate, speech_data, speech_len);
|
||||
@ -824,44 +811,33 @@ bool Audio::LoadPcmwav(const char* buf, int n_buf_len, int32_t* sampling_rate)
|
||||
|
||||
AudioFrame* frame = new AudioFrame(speech_len);
|
||||
frame_queue.push(frame);
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
else
|
||||
}else{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Audio::LoadPcmwavOnline(const char* buf, int n_buf_len, int32_t* sampling_rate)
|
||||
{
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
}
|
||||
if (speech_buff != NULL) {
|
||||
free(speech_buff);
|
||||
}
|
||||
if (speech_char != NULL) {
|
||||
free(speech_char);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
|
||||
speech_len = n_buf_len / 2;
|
||||
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
|
||||
if (speech_buff)
|
||||
{
|
||||
memset(speech_buff, 0, sizeof(int16_t) * speech_len);
|
||||
memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
|
||||
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
memset(speech_data, 0, sizeof(float) * speech_len);
|
||||
|
||||
speech_data = (float*)malloc(sizeof(float) * speech_len);
|
||||
if(speech_data){
|
||||
float scale = 1;
|
||||
if (data_type == 1) {
|
||||
scale = 32768;
|
||||
scale = 32768.0f;
|
||||
}
|
||||
const uint8_t* byte_buf = reinterpret_cast<const uint8_t*>(buf);
|
||||
for (int32_t i = 0; i < speech_len; ++i) {
|
||||
int16_t val = (int16_t)((byte_buf[2 * i + 1] << 8) | byte_buf[2 * i]);
|
||||
speech_data[i] = (float)val / scale;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i != speech_len; ++i) {
|
||||
speech_data[i] = (float)speech_buff[i] / scale;
|
||||
}
|
||||
|
||||
//resample
|
||||
if(*sampling_rate != dest_sample_rate){
|
||||
WavResample(*sampling_rate, speech_data, speech_len);
|
||||
@ -873,20 +849,22 @@ bool Audio::LoadPcmwavOnline(const char* buf, int n_buf_len, int32_t* sampling_r
|
||||
|
||||
AudioFrame* frame = new AudioFrame(speech_len);
|
||||
frame_queue.push(frame);
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
else
|
||||
}else{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample)
|
||||
{
|
||||
if (speech_data != NULL) {
|
||||
if (speech_data != nullptr) {
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
}
|
||||
if (speech_buff != NULL) {
|
||||
if (speech_buff != nullptr) {
|
||||
free(speech_buff);
|
||||
speech_buff = nullptr;
|
||||
}
|
||||
offset = 0;
|
||||
|
||||
@ -937,8 +915,9 @@ bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resamp
|
||||
|
||||
bool Audio::LoadPcmwav2Char(const char* filename, int32_t* sampling_rate)
|
||||
{
|
||||
if (speech_char != NULL) {
|
||||
if (speech_char != nullptr) {
|
||||
free(speech_char);
|
||||
speech_char = nullptr;
|
||||
}
|
||||
offset = 0;
|
||||
|
||||
@ -964,8 +943,9 @@ bool Audio::LoadPcmwav2Char(const char* filename, int32_t* sampling_rate)
|
||||
|
||||
bool Audio::LoadOthers2Char(const char* filename)
|
||||
{
|
||||
if (speech_char != NULL) {
|
||||
if (speech_char != nullptr) {
|
||||
free(speech_char);
|
||||
speech_char = nullptr;
|
||||
}
|
||||
|
||||
FILE* fp;
|
||||
@ -1070,6 +1050,7 @@ void Audio::Padding()
|
||||
new_data[tmp_off + i] = speech_data[ii];
|
||||
}
|
||||
free(speech_data);
|
||||
speech_data = nullptr;
|
||||
speech_data = new_data;
|
||||
speech_len = num_new_samples;
|
||||
|
||||
@ -1088,7 +1069,7 @@ void Audio::Split(OfflineStream* offline_stream)
|
||||
frame_queue.pop();
|
||||
int sp_len = frame->GetLen();
|
||||
delete frame;
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
|
||||
std::vector<float> pcm_data(speech_data, speech_data+sp_len);
|
||||
vector<std::vector<int>> vad_segments = (offline_stream->vad_handle)->Infer(pcm_data);
|
||||
@ -1100,7 +1081,7 @@ void Audio::Split(OfflineStream* offline_stream)
|
||||
frame->SetStart(start);
|
||||
frame->SetEnd(end);
|
||||
frame_queue.push(frame);
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1112,7 +1093,7 @@ void Audio::Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, boo
|
||||
frame_queue.pop();
|
||||
int sp_len = frame->GetLen();
|
||||
delete frame;
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
|
||||
std::vector<float> pcm_data(speech_data, speech_data+sp_len);
|
||||
vad_segments = vad_obj->Infer(pcm_data, input_finished);
|
||||
@ -1127,7 +1108,7 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP
|
||||
frame_queue.pop();
|
||||
int sp_len = frame->GetLen();
|
||||
delete frame;
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
|
||||
std::vector<float> pcm_data(speech_data, speech_data+sp_len);
|
||||
vector<std::vector<int>> vad_segments = vad_obj->Infer(pcm_data, input_finished);
|
||||
@ -1148,7 +1129,7 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP
|
||||
frame->data = (float*)malloc(sizeof(float) * step);
|
||||
memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
|
||||
asr_online_queue.push(frame);
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
speech_start += step/seg_sample;
|
||||
}
|
||||
}
|
||||
@ -1176,7 +1157,7 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP
|
||||
frame->data = (float*)malloc(sizeof(float) * (end-start));
|
||||
memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
|
||||
asr_online_queue.push(frame);
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
}
|
||||
|
||||
if(asr_mode != ASR_ONLINE){
|
||||
@ -1187,7 +1168,7 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP
|
||||
frame->data = (float*)malloc(sizeof(float) * (end-start));
|
||||
memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
|
||||
asr_offline_queue.push(frame);
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
}
|
||||
|
||||
speech_start = -1;
|
||||
@ -1210,7 +1191,7 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP
|
||||
frame->data = (float*)malloc(sizeof(float) * step);
|
||||
memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
|
||||
asr_online_queue.push(frame);
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
speech_start += step/seg_sample;
|
||||
}
|
||||
}
|
||||
@ -1235,7 +1216,7 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP
|
||||
frame->data = (float*)malloc(sizeof(float) * (end-offline_start));
|
||||
memcpy(frame->data, all_samples.data()+offline_start-offset, (end-offline_start)*sizeof(float));
|
||||
asr_offline_queue.push(frame);
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
}
|
||||
|
||||
if(asr_mode != ASR_OFFLINE){
|
||||
@ -1253,7 +1234,7 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP
|
||||
frame->data = (float*)malloc(sizeof(float) * step);
|
||||
memcpy(frame->data, all_samples.data()+start-offset+sample_offset, step*sizeof(float));
|
||||
asr_online_queue.push(frame);
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
}
|
||||
}else{
|
||||
frame = new AudioFrame(0);
|
||||
@ -1261,7 +1242,7 @@ void Audio::Split(VadModel* vad_obj, int chunk_len, bool input_finished, ASR_TYP
|
||||
frame->global_start = speech_start; // in this case start >= end
|
||||
frame->global_end = speech_end_i;
|
||||
asr_online_queue.push(frame);
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
}
|
||||
}
|
||||
speech_start = -1;
|
||||
|
||||
@ -48,7 +48,7 @@ class BiasLm {
|
||||
std::vector<std::vector<int>> split_id_vec;
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
|
||||
LoadCfgFromYaml(cfg_file.c_str(), opt_);
|
||||
while (getline(ifs_hws, line)) {
|
||||
@ -86,7 +86,7 @@ class BiasLm {
|
||||
BuildGraph(split_id_vec, custom_weight);
|
||||
ifs_hws.close();
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Build bias lm takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
@ -99,7 +99,7 @@ class BiasLm {
|
||||
std::vector<std::vector<int>> split_id_vec;
|
||||
|
||||
struct timeval start, end;
|
||||
gettimeofday(&start, NULL);
|
||||
gettimeofday(&start, nullptr);
|
||||
opt_.incre_bias_ = inc_bias;
|
||||
for (const pair<string, int>& kv : hws_map) {
|
||||
float score = 1.0f;
|
||||
@ -128,7 +128,7 @@ class BiasLm {
|
||||
}
|
||||
BuildGraph(split_id_vec, custom_weight);
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
gettimeofday(&end, nullptr);
|
||||
long seconds = (end.tv_sec - start.tv_sec);
|
||||
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
|
||||
LOG(INFO) << "Build bias lm takes " << (double)modle_init_micros / 1000000 << " s";
|
||||
|
||||
@ -441,7 +441,7 @@ uint16_t EncodeConverter::ToUni(const char* sc, int &len)
|
||||
}
|
||||
|
||||
bool EncodeConverter::IsAllChineseCharactor(const U8CHAR_T* pu8, size_t ilen) {
|
||||
if (pu8 == NULL || ilen <= 0) {
|
||||
if (pu8 == nullptr || ilen <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -458,7 +458,7 @@ bool EncodeConverter::IsAllChineseCharactor(const U8CHAR_T* pu8, size_t ilen) {
|
||||
}
|
||||
|
||||
bool EncodeConverter::HasAlpha(const U8CHAR_T* pu8, size_t ilen) {
|
||||
if (pu8 == NULL || ilen <= 0) {
|
||||
if (pu8 == nullptr || ilen <= 0) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < ilen; i++) {
|
||||
@ -471,7 +471,7 @@ bool EncodeConverter::HasAlpha(const U8CHAR_T* pu8, size_t ilen) {
|
||||
|
||||
|
||||
bool EncodeConverter::IsAllAlpha(const U8CHAR_T* pu8, size_t ilen) {
|
||||
if (pu8 == NULL || ilen <= 0) {
|
||||
if (pu8 == nullptr || ilen <= 0) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < ilen; i++) {
|
||||
@ -483,7 +483,7 @@ bool EncodeConverter::IsAllAlpha(const U8CHAR_T* pu8, size_t ilen) {
|
||||
}
|
||||
|
||||
bool EncodeConverter::IsAllAlphaAndPunct(const U8CHAR_T* pu8, size_t ilen) {
|
||||
if (pu8 == NULL || ilen <= 0) {
|
||||
if (pu8 == nullptr || ilen <= 0) {
|
||||
return false;
|
||||
}
|
||||
bool flag1 = HasAlpha(pu8, ilen);
|
||||
@ -500,7 +500,7 @@ bool EncodeConverter::IsAllAlphaAndPunct(const U8CHAR_T* pu8, size_t ilen) {
|
||||
}
|
||||
|
||||
bool EncodeConverter::IsAllAlphaAndDigit(const U8CHAR_T* pu8, size_t ilen) {
|
||||
if (pu8 == NULL || ilen <= 0) {
|
||||
if (pu8 == nullptr || ilen <= 0) {
|
||||
return false;
|
||||
}
|
||||
bool flag1 = HasAlpha(pu8, ilen);
|
||||
@ -516,7 +516,7 @@ bool EncodeConverter::IsAllAlphaAndDigit(const U8CHAR_T* pu8, size_t ilen) {
|
||||
return true;
|
||||
}
|
||||
bool EncodeConverter::IsAllAlphaAndDigitAndBlank(const U8CHAR_T* pu8, size_t ilen) {
|
||||
if (pu8 == NULL || ilen <= 0) {
|
||||
if (pu8 == nullptr || ilen <= 0) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < ilen; i++) {
|
||||
@ -529,7 +529,7 @@ bool EncodeConverter::IsAllAlphaAndDigitAndBlank(const U8CHAR_T* pu8, size_t ile
|
||||
bool EncodeConverter::NeedAddTailBlank(std::string str) {
|
||||
U8CHAR_T *pu8 = (U8CHAR_T*)str.data();
|
||||
size_t ilen = str.size();
|
||||
if (pu8 == NULL || ilen <= 0) {
|
||||
if (pu8 == nullptr || ilen <= 0) {
|
||||
return false;
|
||||
}
|
||||
if (IsAllAlpha(pu8, ilen) || IsAllAlphaAndPunct(pu8, ilen) || IsAllAlphaAndDigit(pu8, ilen)) {
|
||||
|
||||
@ -88,15 +88,15 @@ namespace funasr {
|
||||
#ifdef _MSC_VER
|
||||
// convert to the local ansi page
|
||||
static std::string UTF8ToLocaleAnsi(const std::string& strUTF8) {
|
||||
int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
|
||||
int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, nullptr, 0);
|
||||
unsigned short*wszGBK = new unsigned short[len + 1];
|
||||
memset(wszGBK, 0, len * 2 + 2);
|
||||
MultiByteToWideChar(CP_UTF8, 0, (LPCCH)strUTF8.c_str(), -1, (LPWSTR)wszGBK, len);
|
||||
|
||||
len = WideCharToMultiByte(CP_ACP, 0, (LPCWCH)wszGBK, -1, NULL, 0, NULL, NULL);
|
||||
len = WideCharToMultiByte(CP_ACP, 0, (LPCWCH)wszGBK, -1, nullptr, 0, nullptr, nullptr);
|
||||
char *szGBK = new char[len + 1];
|
||||
memset(szGBK, 0, len + 1);
|
||||
WideCharToMultiByte(CP_ACP, 0, (LPCWCH)wszGBK, -1, szGBK, len, NULL, NULL);
|
||||
WideCharToMultiByte(CP_ACP, 0, (LPCWCH)wszGBK, -1, szGBK, len, nullptr, nullptr);
|
||||
std::string strTemp(szGBK);
|
||||
delete[]szGBK;
|
||||
delete[]wszGBK;
|
||||
|
||||
@ -480,7 +480,7 @@
|
||||
|
||||
audio->Split(vad_online_handle, chunk_len, input_finished, mode);
|
||||
|
||||
funasr::AudioFrame* frame = NULL;
|
||||
funasr::AudioFrame* frame = nullptr;
|
||||
while(audio->FetchChunck(frame) > 0){
|
||||
string msg = ((funasr::ParaformerOnline*)asr_online_handle)->Forward(frame->data, frame->len, frame->is_final);
|
||||
if(mode == ASR_ONLINE){
|
||||
@ -504,9 +504,9 @@
|
||||
}else if(mode == ASR_TWO_PASS){
|
||||
p_result->msg += msg;
|
||||
}
|
||||
if(frame != NULL){
|
||||
if(frame != nullptr){
|
||||
delete frame;
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -561,9 +561,9 @@
|
||||
if (!(p_result->stamp).empty()){
|
||||
p_result->stamp_sents = funasr::TimestampSentence(p_result->tpass_msg, p_result->stamp);
|
||||
}
|
||||
if(frame != NULL){
|
||||
if(frame != nullptr){
|
||||
delete frame;
|
||||
frame = NULL;
|
||||
frame = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -53,8 +53,8 @@ void CTokenizer::JiebaInit(std::string punc_config){
|
||||
|
||||
SetJiebaRes(jieba_dict_trie_, jieba_model_);
|
||||
}else {
|
||||
jieba_dict_trie_ = NULL;
|
||||
jieba_model_ = NULL;
|
||||
jieba_dict_trie_ = nullptr;
|
||||
jieba_model_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -2,20 +2,21 @@
|
||||
download_model_dir="/workspace/models"
|
||||
model_dir="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx"
|
||||
vad_dir="damo/speech_fsmn_vad_zh-cn-16k-common-onnx"
|
||||
punc_dir="damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx"
|
||||
itn_dir="thuduj12/fst_itn_zh"
|
||||
lm_dir="damo/speech_ngram_lm_zh-cn-ai-wesp-fst"
|
||||
punc_dir=""
|
||||
itn_dir=""
|
||||
lm_dir=""
|
||||
port=10095
|
||||
certfile="../../../ssl_key/server.crt"
|
||||
keyfile="../../../ssl_key/server.key"
|
||||
hotword="../../hotwords.txt"
|
||||
# set decoder_thread_num
|
||||
decoder_thread_num=$(cat /proc/cpuinfo | grep "processor"|wc -l) || { echo "Get cpuinfo failed. Set decoder_thread_num = 32"; decoder_thread_num=32; }
|
||||
decoder_thread_num=8
|
||||
multiple_io=16
|
||||
io_thread_num=$(( (decoder_thread_num + multiple_io - 1) / multiple_io ))
|
||||
model_thread_num=1
|
||||
model_thread_num=5
|
||||
|
||||
. ../egs/aishell/transformer/utils/parse_options.sh || exit 1;
|
||||
. ./tools/utils/parse_options.sh || exit 1;
|
||||
|
||||
if [ -z "$certfile" ] || [ "$certfile" = "0" ]; then
|
||||
certfile=""
|
||||
|
||||
@ -16,7 +16,7 @@ multiple_io=16
|
||||
io_thread_num=$(( (decoder_thread_num + multiple_io - 1) / multiple_io ))
|
||||
model_thread_num=1
|
||||
|
||||
. ../egs/aishell/transformer/utils/parse_options.sh || exit 1;
|
||||
. ./tools/utils/parse_options.sh || exit 1;
|
||||
|
||||
if [ -z "$certfile" ] || [ "$certfile" = "0" ]; then
|
||||
certfile=""
|
||||
|
||||
97
runtime/tools/utils/parse_options.sh
Executable file
97
runtime/tools/utils/parse_options.sh
Executable file
@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey);
|
||||
# Arnab Ghoshal, Karel Vesely
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Parse command-line options.
|
||||
# To be sourced by another script (as in ". parse_options.sh").
|
||||
# Option format is: --option-name arg
|
||||
# and shell variable "option_name" gets set to value "arg."
|
||||
# The exception is --help, which takes no arguments, but prints the
|
||||
# $help_message variable (if defined).
|
||||
|
||||
|
||||
###
|
||||
### The --config file options have lower priority to command line
|
||||
### options, so we need to import them first...
|
||||
###
|
||||
|
||||
# Now import all the configs specified by command-line, in left-to-right order
|
||||
for ((argpos=1; argpos<$#; argpos++)); do
|
||||
if [ "${!argpos}" == "--config" ]; then
|
||||
argpos_plus1=$((argpos+1))
|
||||
config=${!argpos_plus1}
|
||||
[ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
|
||||
. $config # source the config file.
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
###
|
||||
### Now we process the command line options
|
||||
###
|
||||
while true; do
|
||||
[ -z "${1:-}" ] && break; # break if there are no arguments
|
||||
case "$1" in
|
||||
# If the enclosing script is called with --help option, print the help
|
||||
# message and exit. Scripts should put help messages in $help_message
|
||||
--help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
|
||||
else printf "$help_message\n" 1>&2 ; fi;
|
||||
exit 0 ;;
|
||||
--*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
|
||||
exit 1 ;;
|
||||
# If the first command-line argument begins with "--" (e.g. --foo-bar),
|
||||
# then work out the variable name as $name, which will equal "foo_bar".
|
||||
--*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
|
||||
# Next we test whether the variable in question is undefned-- if so it's
|
||||
# an invalid option and we die. Note: $0 evaluates to the name of the
|
||||
# enclosing script.
|
||||
# The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
|
||||
# is undefined. We then have to wrap this test inside "eval" because
|
||||
# foo_bar is itself inside a variable ($name).
|
||||
eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
||||
|
||||
oldval="`eval echo \\$$name`";
|
||||
# Work out whether we seem to be expecting a Boolean argument.
|
||||
if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
|
||||
was_bool=true;
|
||||
else
|
||||
was_bool=false;
|
||||
fi
|
||||
|
||||
# Set the variable to the right value-- the escaped quotes make it work if
|
||||
# the option had spaces, like --cmd "queue.pl -sync y"
|
||||
eval $name=\"$2\";
|
||||
|
||||
# Check that Boolean-valued arguments are really Boolean.
|
||||
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
||||
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
||||
exit 1;
|
||||
fi
|
||||
shift 2;
|
||||
;;
|
||||
*) break;
|
||||
esac
|
||||
done
|
||||
|
||||
|
||||
# Check for an empty argument to the --cmd option, which can easily occur as a
|
||||
# result of scripting errors.
|
||||
[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
|
||||
|
||||
|
||||
true; # so this script returns exit code 0.
|
||||
@ -409,7 +409,7 @@ void WebSocketServer::on_message(websocketpp::connection_hdl hdl,
|
||||
}
|
||||
|
||||
// hotwords: fst/nn
|
||||
if(msg_data->hotwords_embedding == NULL){
|
||||
if(msg_data->hotwords_embedding == nullptr){
|
||||
std::unordered_map<std::string, int> merged_hws_map;
|
||||
std::string nn_hotwords = "";
|
||||
|
||||
@ -458,7 +458,7 @@ void WebSocketServer::on_message(websocketpp::connection_hdl hdl,
|
||||
msg_data->msg["audio_fs"] = jsonresult["audio_fs"];
|
||||
}
|
||||
if (jsonresult.contains("chunk_size")) {
|
||||
if (msg_data->tpass_online_handle == NULL) {
|
||||
if (msg_data->tpass_online_handle == nullptr) {
|
||||
std::vector<int> chunk_size_vec =
|
||||
jsonresult["chunk_size"].get<std::vector<int>>();
|
||||
// check chunk_size_vec
|
||||
@ -480,7 +480,7 @@ void WebSocketServer::on_message(websocketpp::connection_hdl hdl,
|
||||
if ((jsonresult["is_speaking"] == false ||
|
||||
jsonresult["is_finished"] == true) &&
|
||||
msg_data->msg["is_eof"] != true &&
|
||||
msg_data->hotwords_embedding != NULL) {
|
||||
msg_data->hotwords_embedding != nullptr) {
|
||||
LOG(INFO) << "client done";
|
||||
|
||||
// if it is in final message, post the sample_data to decode
|
||||
@ -532,7 +532,7 @@ void WebSocketServer::on_message(websocketpp::connection_hdl hdl,
|
||||
|
||||
try{
|
||||
// post to decode
|
||||
if (msg_data->msg["is_eof"] != true && msg_data->hotwords_embedding != NULL) {
|
||||
if (msg_data->msg["is_eof"] != true && msg_data->hotwords_embedding != nullptr) {
|
||||
std::vector<std::vector<float>> hotwords_embedding_(*(msg_data->hotwords_embedding));
|
||||
msg_data->strand_->post(
|
||||
std::bind(&WebSocketServer::do_decoder, this,
|
||||
|
||||
@ -55,13 +55,13 @@ typedef struct {
|
||||
nlohmann::json msg;
|
||||
std::shared_ptr<std::vector<char>> samples;
|
||||
std::shared_ptr<std::vector<std::vector<std::string>>> punc_cache;
|
||||
std::shared_ptr<std::vector<std::vector<float>>> hotwords_embedding=NULL;
|
||||
std::shared_ptr<std::vector<std::vector<float>>> hotwords_embedding=nullptr;
|
||||
std::shared_ptr<websocketpp::lib::mutex> thread_lock; // lock for each connection
|
||||
FUNASR_HANDLE tpass_online_handle=NULL;
|
||||
FUNASR_HANDLE tpass_online_handle=nullptr;
|
||||
std::string online_res = "";
|
||||
std::string tpass_res = "";
|
||||
std::shared_ptr<asio::io_context::strand> strand_; // for data execute in order
|
||||
FUNASR_DEC_HANDLE decoder_handle=NULL;
|
||||
FUNASR_DEC_HANDLE decoder_handle=nullptr;
|
||||
} FUNASR_MESSAGE;
|
||||
|
||||
// See https://wiki.mozilla.org/Security/Server_Side_TLS for more details about
|
||||
@ -139,7 +139,7 @@ class WebSocketServer {
|
||||
asio::io_context& io_decoder_; // threads for asr decoder
|
||||
// std::ofstream fout;
|
||||
// FUNASR_HANDLE asr_handle; // asr engine handle
|
||||
FUNASR_HANDLE tpass_handle=NULL;
|
||||
FUNASR_HANDLE tpass_handle=nullptr;
|
||||
bool isonline = true; // online or offline engine, now only support offline
|
||||
bool is_ssl = true;
|
||||
server* server_; // websocket server
|
||||
|
||||
@ -77,15 +77,16 @@ void WebSocketServer::do_decoder(const std::vector<char>& buffer,
|
||||
std::string stamp_sents="";
|
||||
try{
|
||||
FUNASR_RESULT Result = FunOfflineInferBuffer(
|
||||
asr_handle, buffer.data(), buffer.size(), RASR_NONE, NULL,
|
||||
asr_handle, buffer.data(), buffer.size(), RASR_NONE, nullptr,
|
||||
hotwords_embedding, audio_fs, wav_format, itn, decoder_handle);
|
||||
if (Result != NULL){
|
||||
if (Result != nullptr){
|
||||
asr_result = FunASRGetResult(Result, 0); // get decode result
|
||||
stamp_res = FunASRGetStamp(Result);
|
||||
stamp_sents = FunASRGetStampSents(Result);
|
||||
FunASRFreeResult(Result);
|
||||
} else{
|
||||
LOG(ERROR) << "FUNASR_RESULT is NULL.";
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(20));
|
||||
LOG(ERROR) << "FUNASR_RESULT is nullptr.";
|
||||
}
|
||||
}catch (std::exception const& e) {
|
||||
LOG(ERROR) << e.what();
|
||||
@ -306,7 +307,7 @@ void WebSocketServer::on_message(websocketpp::connection_hdl hdl,
|
||||
}
|
||||
|
||||
// hotwords: fst/nn
|
||||
if(msg_data->hotwords_embedding == NULL){
|
||||
if(msg_data->hotwords_embedding == nullptr){
|
||||
std::unordered_map<std::string, int> merged_hws_map;
|
||||
std::string nn_hotwords = "";
|
||||
|
||||
@ -359,7 +360,7 @@ void WebSocketServer::on_message(websocketpp::connection_hdl hdl,
|
||||
if ((jsonresult["is_speaking"] == false ||
|
||||
jsonresult["is_finished"] == true) &&
|
||||
msg_data->msg["is_eof"] != true &&
|
||||
msg_data->hotwords_embedding != NULL) {
|
||||
msg_data->hotwords_embedding != nullptr) {
|
||||
LOG(INFO) << "client done";
|
||||
// for offline, send all receive data to decoder engine
|
||||
std::vector<std::vector<float>> hotwords_embedding_(*(msg_data->hotwords_embedding));
|
||||
|
||||
@ -58,9 +58,9 @@ typedef struct {
|
||||
typedef struct {
|
||||
nlohmann::json msg;
|
||||
std::shared_ptr<std::vector<char>> samples;
|
||||
std::shared_ptr<std::vector<std::vector<float>>> hotwords_embedding=NULL;
|
||||
std::shared_ptr<std::vector<std::vector<float>>> hotwords_embedding=nullptr;
|
||||
std::shared_ptr<websocketpp::lib::mutex> thread_lock; // lock for each connection
|
||||
FUNASR_DEC_HANDLE decoder_handle=NULL;
|
||||
FUNASR_DEC_HANDLE decoder_handle=nullptr;
|
||||
} FUNASR_MESSAGE;
|
||||
|
||||
// See https://wiki.mozilla.org/Security/Server_Side_TLS for more details about
|
||||
|
||||
Loading…
Reference in New Issue
Block a user