From 7a8bedace2ccccaa162b335cad3f3f00acf84b4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Thu, 23 Mar 2023 17:12:02 +0800 Subject: [PATCH] websocket --- funasr/bin/vad_inference_online.py | 7 ---- funasr/runtime/python/websocket/ASR_server.py | 40 ++++++++----------- 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/funasr/bin/vad_inference_online.py b/funasr/bin/vad_inference_online.py index faee1fc00..dadfd8cac 100644 --- a/funasr/bin/vad_inference_online.py +++ b/funasr/bin/vad_inference_online.py @@ -30,14 +30,7 @@ from funasr.models.frontend.wav_frontend import WavFrontendOnline from funasr.models.frontend.wav_frontend import WavFrontend from funasr.bin.vad_inference import Speech2VadSegment -header_colors = '\033[95m' -end_colors = '\033[0m' -global_asr_language: str = 'zh-cn' -global_sample_rate: Union[int, Dict[Any, int]] = { - 'audio_fs': 16000, - 'model_fs': 16000 -} class Speech2VadSegmentOnline(Speech2VadSegment): diff --git a/funasr/runtime/python/websocket/ASR_server.py b/funasr/runtime/python/websocket/ASR_server.py index ac63d3c91..cfa9a4224 100644 --- a/funasr/runtime/python/websocket/ASR_server.py +++ b/funasr/runtime/python/websocket/ASR_server.py @@ -56,7 +56,9 @@ vad_pipline = pipeline( model_revision="v1.2.0", output_dir=None, batch_size=1, + mode='online' ) +param_dict_vad = {'in_cache': dict(), "is_final": False} # 创建一个ASR对象 param_dict = dict() @@ -85,17 +87,20 @@ start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["b def vad(data): # 推理 - global vad_pipline + global vad_pipline, param_dict_vad #print(type(data)) - segments_result = vad_pipline(audio_in=data) - #print(segments_result) + # print(param_dict_vad) + segments_result = vad_pipline(audio_in=data, param_dict=param_dict_vad) + # print(segments_result) + # print(param_dict_vad) speech_start = False speech_end = False - if len(segments_result) == 0 or len(segments_result["text"] > 1): - return False - elif segments_result["text"][0][0] != -1: + + if len(segments_result) == 0 or len(segments_result["text"]) > 1: + return speech_start, speech_end + if segments_result["text"][0][0] != -1: speech_start = True - elif segments_result["text"][0][1] != -1: + if segments_result["text"][0][1] != -1: speech_end = True return speech_start, speech_end @@ -135,20 +140,21 @@ def main(): # 推理 frames.append(data) RECORD_NUM += 1 speech_start_i, speech_end_i = vad(data) + # print(speech_start_i, speech_end_i) if speech_start_i: speech_start = speech_start_i # if not speech_detected: - print("检测到人声...") + # print("检测到人声...") # speech_detected = True # 标记为检测到语音 frames = [] frames.extend(buffer) # 把之前2个语音数据快加入 # silence_count = 0 # 重置静音次数 - elif speech_end_i or RECORD_NUM > 300: + if speech_end_i or RECORD_NUM > 300: # silence_count += 1 # 增加静音次数 # speech_end = speech_end_i speech_start = False # if RECORD_NUM > 300: #这里 50 可根据需求改为合适的数据快数量 - print("说话结束或者超过设置最长时间...") + # print("说话结束或者超过设置最长时间...") audio_in = b"".join(frames) #asrt = threading.Thread(target=asr,args=(audio_in,)) #asrt.start() @@ -170,16 +176,4 @@ s = threading.Thread(target=asr) s.start() asyncio.get_event_loop().run_until_complete(start_server) -asyncio.get_event_loop().run_forever() - - - - - - - - - - - - +asyncio.get_event_loop().run_forever() \ No newline at end of file