diff --git a/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/demo.py b/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/demo.py index c449ab296..583a1eb0a 100644 --- a/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/demo.py +++ b/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/demo.py @@ -9,7 +9,7 @@ logger.setLevel(logging.CRITICAL) inference_pipeline = pipeline( task=Tasks.punctuation, model='damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727', - model_revision = 'v1.0.2' + model_revision='v1.0.2' ) ##################text二进制数据##################### diff --git a/funasr/runtime/html5/readme_cn.md b/funasr/runtime/html5/readme_cn.md index d1a56eb46..d7fb1aa20 100644 --- a/funasr/runtime/html5/readme_cn.md +++ b/funasr/runtime/html5/readme_cn.md @@ -28,7 +28,7 @@ Recorder ```shell usage: h5Server.py [-h] [--host HOST] [--port PORT] [--certfile CERTFILE] [--keyfile KEYFILE] -python h5Server.py --port 1337 +python h5Server.py --port 1337 --keyfile server.key ``` ## 2.启动ws or wss asr online srv [具体请看online asr](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/websocket) diff --git a/funasr/runtime/python/websocket/ws_server_2pass.py b/funasr/runtime/python/websocket/ws_server_2pass.py index 186197a45..1d3c135fa 100644 --- a/funasr/runtime/python/websocket/ws_server_2pass.py +++ b/funasr/runtime/python/websocket/ws_server_2pass.py @@ -46,7 +46,7 @@ if args.punc_model != "": inference_pipeline_punc = pipeline( task=Tasks.punctuation, model=args.punc_model, - model_revision=None, + model_revision="v1.0.2", ngpu=args.ngpu, ncpu=args.ncpu, ) @@ -74,6 +74,7 @@ async def ws_serve(websocket, path): websocket.param_dict_punc = {'cache': list()} websocket.vad_pre_idx = 0 speech_start = False + speech_end_i = False websocket.wav_name = "microphone" print("new user connected", flush=True) @@ -99,7 +100,9 @@ async def ws_serve(websocket, path): # asr online frames_asr_online.append(message) - if len(frames_asr_online) % websocket.chunk_interval == 0: + websocket.param_dict_asr_online["is_final"] = speech_end_i + if len(frames_asr_online) % websocket.chunk_interval == 0 or websocket.param_dict_asr_online["is_final"]: + audio_in = b"".join(frames_asr_online) await async_asr_online(websocket, audio_in) frames_asr_online = [] @@ -115,12 +118,13 @@ async def ws_serve(websocket, path): frames_asr.extend(frames_pre) # asr punc offline if speech_end_i or not websocket.is_speaking: + # print("vad end point") audio_in = b"".join(frames_asr) await async_asr(websocket, audio_in) frames_asr = [] speech_start = False - frames_asr_online = [] - websocket.param_dict_asr_online = {"cache": dict()} + # frames_asr_online = [] + # websocket.param_dict_asr_online = {"cache": dict()} if not websocket.is_speaking: websocket.vad_pre_idx = 0 frames = [] @@ -173,10 +177,13 @@ async def async_asr(websocket, audio_in): async def async_asr_online(websocket, audio_in): if len(audio_in) > 0: audio_in = load_bytes(audio_in) + # print(websocket.param_dict_asr_online.get("is_final", False)) rec_result = inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online) + # print(rec_result) if websocket.param_dict_asr_online.get("is_final", False): - websocket.param_dict_asr_online["cache"] = dict() + return + # websocket.param_dict_asr_online["cache"] = dict() if "text" in rec_result: if rec_result["text"] != "sil" and rec_result["text"] != "waiting_for_more_voice": # print("online", rec_result) diff --git a/funasr/runtime/python/websocket/ws_server_online.py b/funasr/runtime/python/websocket/ws_server_online.py index 2255688c8..4cecd5f08 100644 --- a/funasr/runtime/python/websocket/ws_server_online.py +++ b/funasr/runtime/python/websocket/ws_server_online.py @@ -106,8 +106,10 @@ async def ws_serve(websocket, path): async def async_asr_online(websocket,audio_in): if len(audio_in) >= 0: audio_in = load_bytes(audio_in) + # print(websocket.param_dict_asr_online.get("is_final", False)) rec_result = inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online) + # print(rec_result) if websocket.param_dict_asr_online.get("is_final", False): websocket.param_dict_asr_online["cache"] = dict() if "text" in rec_result: