diff --git a/runtime/docs/websocket_protocol.md b/runtime/docs/websocket_protocol.md index 867a18290..588339e2d 100644 --- a/runtime/docs/websocket_protocol.md +++ b/runtime/docs/websocket_protocol.md @@ -45,7 +45,7 @@ Parameter explanation: `text`: the text output of speech recognition `is_final`: indicating the end of recognition `timestamp`:If AM is a timestamp model, it will return this field, indicating the timestamp, in the format of "[[100,200], [200,500]]" -`stamp_sents`:If AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of "[{'text':'正 是 因 为','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]" +`stamp_sents`:If AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of "[{'text_seg':'正 是 因 为','punc':',','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]" ``` ## Real-time Speech Recognition @@ -94,5 +94,5 @@ Parameter explanation: `text`: the text output of speech recognition `is_final`: indicating the end of recognition `timestamp`:If AM is a timestamp model, it will return this field, indicating the timestamp, in the format of "[[100,200], [200,500]]" -`stamp_sents`:If AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of "[{'text':'正 是 因 为','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]" +`stamp_sents`:If AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of "[{'text_seg':'正 是 因 为','punc':',','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]" ``` diff --git a/runtime/docs/websocket_protocol_zh.md b/runtime/docs/websocket_protocol_zh.md index 98b5c2041..e00a989ef 100644 --- a/runtime/docs/websocket_protocol_zh.md +++ b/runtime/docs/websocket_protocol_zh.md @@ -46,7 +46,7 @@ message为(采用json序列化) `text`:表示语音识别输出文本 `is_final`:表示识别结束 `timestamp`:如果AM为时间戳模型,会返回此字段,表示时间戳,格式为 "[[100,200], [200,500]]"(ms) -`stamp_sents`:如果AM为时间戳模型,会返回此字段,表示句子级别时间戳,格式为 "[{'text':'正 是 因 为','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]" +`stamp_sents`:如果AM为时间戳模型,会返回此字段,表示句子级别时间戳,格式为 "[{'text_seg':'正 是 因 为','punc':',','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]" ``` ## 实时语音识别 @@ -96,5 +96,5 @@ message为(采用json序列化) `text`:表示语音识别输出文本 `is_final`:表示识别结束 `timestamp`:如果AM为时间戳模型,会返回此字段,表示时间戳,格式为 "[[100,200], [200,500]]"(ms) -`stamp_sents`:如果AM为时间戳模型,会返回此字段,表示句子级别时间戳,格式为 "[{'text':'正 是 因 为','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]" +`stamp_sents`:如果AM为时间戳模型,会返回此字段,表示句子级别时间戳,格式为 "[{'text_seg':'正 是 因 为','punc':',','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]" ``` diff --git a/runtime/onnxruntime/src/util.cpp b/runtime/onnxruntime/src/util.cpp index 661a50d8b..0fbec4f7b 100644 --- a/runtime/onnxruntime/src/util.cpp +++ b/runtime/onnxruntime/src/util.cpp @@ -584,7 +584,8 @@ std::string TimestampSentence(std::string &text, std::string &str_time){ } } // format - ts_sent += "{'text':'" + text_seg + "',"; + ts_sent += "{'text_seg':'" + text_seg + "',"; + ts_sent += "'punc':'" + characters[idx_str] + "',"; ts_sent += "'start':'" + to_string(start) + "',"; ts_sent += "'end':'" + to_string(end) + "',"; ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}"; @@ -620,7 +621,8 @@ std::string TimestampSentence(std::string &text, std::string &str_time){ end = ts_seg[ts_seg.size()-1][1]; } // format - ts_sent += "{'text':'" + text_seg + "',"; + ts_sent += "{'text_seg':'" + text_seg + "',"; + ts_sent += "'punc':'',"; ts_sent += "'start':'" + to_string(start) + "',"; ts_sent += "'end':'" + to_string(end) + "',"; ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}";