diff --git a/model_zoo/readme_zh.md b/model_zoo/readme_zh.md index 657b4f184..ca56526a6 100644 --- a/model_zoo/readme_zh.md +++ b/model_zoo/readme_zh.md @@ -23,6 +23,5 @@ | paraformer-zh-spk
( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [🤗]() ) | 分角色语音识别,带时间戳输出,非实时 | 60000小时,中文 | 220M | | paraformer-zh-streaming
( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() ) | 语音识别,实时 | 60000小时,中文 | 220M | | paraformer-zh-streaming-small
( [⭐](https://www.modelscope.cn/models/iic/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() ) | 语音识别,实时 | 60000小时,中文 | 220M | - -| paraformer-en
( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() ) | 语音识别,非实时 | 50000小时,英文 | 220M | +| paraformer-en
( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() ) | 语音识别,非实时 | 50000小时,英文 | 220M | diff --git a/runtime/onnxruntime/src/ct-transformer-online.cpp b/runtime/onnxruntime/src/ct-transformer-online.cpp index 769bb6544..83c03273a 100644 --- a/runtime/onnxruntime/src/ct-transformer-online.cpp +++ b/runtime/onnxruntime/src/ct-transformer-online.cpp @@ -42,6 +42,11 @@ string CTTransformerOnline::AddPunc(const char* sz_input, vector &arr_ca vector InputData; string strText; //full_text strText = accumulate(arr_cache.begin(), arr_cache.end(), strText); + + // 如果上一句的结尾是英语字母,并且这一句的开始也是英语字母,应该添加空格 + if ((strText.size() > 0 and !(strText[strText.size()-1] & 0x80)) && (strlen(sz_input) > 0 && !(sz_input[0] & 0x80))) + strText += " "; + strText += sz_input; // full_text = precache + text m_tokenizer.Tokenize(strText.c_str(), strOut, InputData); @@ -107,7 +112,7 @@ string CTTransformerOnline::AddPunc(const char* sz_input, vector &arr_ca { if (!(sentence_words_list[i][0] & 0x80) && (i + 1) < sentence_words_list.size() && !(sentence_words_list[i + 1][0] & 0x80)) { - sentence_words_list[i] = " " + sentence_words_list[i]; + sentence_words_list[i] = sentence_words_list[i] + " "; } if (nSkipNum < arr_cache.size()) // if skip_num < len(cache): nSkipNum++;