From 4798614e683e94c1b8ed7b514fbc47381f6465ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Thu, 23 Mar 2023 15:41:57 +0800 Subject: [PATCH] websocket --- funasr/runtime/python/websocket/ASR_server.py | 68 +++++++++++-------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/funasr/runtime/python/websocket/ASR_server.py b/funasr/runtime/python/websocket/ASR_server.py index 0af520880..0aed2b498 100644 --- a/funasr/runtime/python/websocket/ASR_server.py +++ b/funasr/runtime/python/websocket/ASR_server.py @@ -85,10 +85,15 @@ def vad(data): # 推理 #print(type(data)) segments_result = vad_pipline(audio_in=data) #print(segments_result) - if len(segments_result) == 0: + speech_start = False + speech_end = False + if len(segments_result) == 0 or len(segments_result["text"] > 1): return False - else: - return True + elif segments_result["text"][0][0] != -1: + speech_start = True + elif segments_result["text"][0][1] != -1: + speech_end = True + return speech_start, speech_end def asr(): # 推理 global inference_pipeline2 @@ -106,11 +111,12 @@ def asr(): # 推理 def main(): # 推理 frames = [] # 存储所有的帧数据 buffer = [] # 存储缓存中的帧数据(最多两个片段) - silence_count = 0 # 统计连续静音的次数 - speech_detected = False # 标记是否检测到语音 + # silence_count = 0 # 统计连续静音的次数 + # speech_detected = False # 标记是否检测到语音 RECORD_NUM = 0 global voices global speek + speech_start, speech_end = False, False while True: while not voices.empty(): @@ -121,32 +127,34 @@ def main(): # 推理 if len(buffer) > 2: buffer.pop(0) # 如果缓存超过两个片段,则删除最早的一个 - if speech_detected: + if speech_start: frames.append(data) - RECORD_NUM += 1 - - if vad(data): - if not speech_detected: - print("检测到人声...") - speech_detected = True # 标记为检测到语音 - frames = [] - frames.extend(buffer) # 把之前2个语音数据快加入 - silence_count = 0 # 重置静音次数 - else: - silence_count += 1 # 增加静音次数 - - if speech_detected and (silence_count > 4 or RECORD_NUM > 50): #这里 50 可根据需求改为合适的数据快数量 - print("说话结束或者超过设置最长时间...") - audio_in = b"".join(frames) - #asrt = threading.Thread(target=asr,args=(audio_in,)) - #asrt.start() - speek.put(audio_in) - #rec_result = inference_pipeline2(audio_in=audio_in) # ASR 模型里跑一跑 - frames = [] # 清空所有的帧数据 - buffer = [] # 清空缓存中的帧数据(最多两个片段) - silence_count = 0 # 统计连续静音的次数清零 - speech_detected = False # 标记是否检测到语音 - RECORD_NUM = 0 + RECORD_NUM += 1 + speech_start_i, speech_end_i = vad(data) + if speech_start_i: + speech_start = speech_start_i + # if not speech_detected: + print("检测到人声...") + # speech_detected = True # 标记为检测到语音 + frames = [] + frames.extend(buffer) # 把之前2个语音数据快加入 + # silence_count = 0 # 重置静音次数 + elif speech_end_i or RECORD_NUM > 300: + # silence_count += 1 # 增加静音次数 + # speech_end = speech_end_i + speech_start = False + # if RECORD_NUM > 300: #这里 50 可根据需求改为合适的数据快数量 + print("说话结束或者超过设置最长时间...") + audio_in = b"".join(frames) + #asrt = threading.Thread(target=asr,args=(audio_in,)) + #asrt.start() + speek.put(audio_in) + #rec_result = inference_pipeline2(audio_in=audio_in) # ASR 模型里跑一跑 + frames = [] # 清空所有的帧数据 + buffer = [] # 清空缓存中的帧数据(最多两个片段) + # silence_count = 0 # 统计连续静音的次数清零 + # speech_detected = False # 标记是否检测到语音 + RECORD_NUM = 0 time.sleep(0.01) time.sleep(0.01)