From 7263fb08e9170e90e67cb9b48884cc6a35cb3b62 Mon Sep 17 00:00:00 2001 From: Haitao Date: Fri, 13 Dec 2024 13:47:15 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=86=E5=88=AB=E7=BB=93=E6=9E=9C=E4=B8=AD?= =?UTF-8?q?=E6=9C=89=E8=8B=B1=E8=AF=AD=E6=97=B6=EF=BC=8C=E7=BC=BA=E5=B0=91?= =?UTF-8?q?=E7=A9=BA=E6=A0=BC=E6=88=96=E8=80=85=E7=AC=AC=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E5=8D=95=E8=AF=8D=E7=9A=84=E9=97=AE=E9=A2=98=20(#2284)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update ct-transformer-online.cpp 修复最后两个单词之间没有空格的问题 * Update ct-transformer-online.cpp 解决语音中连续两句英语,offline结果丢失第二句第一个单词的情况。 --- runtime/onnxruntime/src/ct-transformer-online.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/runtime/onnxruntime/src/ct-transformer-online.cpp b/runtime/onnxruntime/src/ct-transformer-online.cpp index 769bb6544..83c03273a 100644 --- a/runtime/onnxruntime/src/ct-transformer-online.cpp +++ b/runtime/onnxruntime/src/ct-transformer-online.cpp @@ -42,6 +42,11 @@ string CTTransformerOnline::AddPunc(const char* sz_input, vector &arr_ca vector InputData; string strText; //full_text strText = accumulate(arr_cache.begin(), arr_cache.end(), strText); + + // 如果上一句的结尾是英语字母,并且这一句的开始也是英语字母,应该添加空格 + if ((strText.size() > 0 and !(strText[strText.size()-1] & 0x80)) && (strlen(sz_input) > 0 && !(sz_input[0] & 0x80))) + strText += " "; + strText += sz_input; // full_text = precache + text m_tokenizer.Tokenize(strText.c_str(), strOut, InputData); @@ -107,7 +112,7 @@ string CTTransformerOnline::AddPunc(const char* sz_input, vector &arr_ca { if (!(sentence_words_list[i][0] & 0x80) && (i + 1) < sentence_words_list.size() && !(sentence_words_list[i + 1][0] & 0x80)) { - sentence_words_list[i] = " " + sentence_words_list[i]; + sentence_words_list[i] = sentence_words_list[i] + " "; } if (nSkipNum < arr_cache.size()) // if skip_num < len(cache): nSkipNum++;