python-websocket funasr1.0 (#1310)

* fix add_file bug (#1296) Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> * funasr1.0 uniasr * funasr1.0 uniasr * update with main (#1305) * v1.0.3 * update clients for 2pass * update download tools --------- Co-authored-by: 雾聪 <wucong.lyb@alibaba-inc.com> * vad streaming return [beg, -1], [], [-1, end], [beg, end]] * funasr1.0 websocket-python * funasr1.0 websocket-python --------- Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> Co-authored-by: 雾聪 <wucong.lyb@alibaba-inc.com>
2025-09-15 14:48:36 +08:00 · 2024-01-26 16:02:14 +08:00 · 2024-01-26 16:02:14 +08:00 · 4f224c8806
commit 4f224c8806
parent 65396eeeff
4 changed files with 290 additions and 226 deletions
--- a/examples/industrial_data_pretraining/ct_transformer_streaming/demo.py
+++ b/examples/industrial_data_pretraining/ct_transformer_streaming/demo.py
@ -5,7 +5,7 @@

 from funasr import AutoModel

-model = AutoModel(model="damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727", model_revision="v2.0.4")
+model = AutoModel(model="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727", model_revision="v2.0.4")

 inputs = "跨境河流是养育沿岸|人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员|在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险|向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切|愿意进一步完善双方联合工作机制|凡是|中方能做的我们|都会去做而且会做得更好我请印度朋友们放心中国在上游的|任何开发利用都会经过科学|规划和论证兼顾上下游的利益"
 vads = inputs.split("|")
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@ -88,7 +88,8 @@ def prepare_data_iterator(data_in, input_len=None, data_type=None, key=None):
 class AutoModel:
    
    def __init__(self, **kwargs):
-        tables.print()
+        if kwargs.get("disable_log", False):
+            tables.print()
        
        model, kwargs = self.build_model(**kwargs)
        
--- a/runtime/python/websocket/funasr_wss_client.py
+++ b/runtime/python/websocket/funasr_wss_client.py
@ -29,6 +29,14 @@ parser.add_argument("--chunk_size",
                    type=str,
                    default="5, 10, 5",
                    help="chunk")
+parser.add_argument("--encoder_chunk_look_back",
+                    type=int,
+                    default=4,
+                    help="chunk")
+parser.add_argument("--decoder_chunk_look_back",
+                    type=int,
+                    default=0,
+                    help="chunk")
 parser.add_argument("--chunk_interval",
                    type=int,
                    default=10,
@ -113,25 +121,36 @@ async def record_microphone():
    fst_dict = {}
    hotword_msg = ""
    if args.hotword.strip() != "":
-        f_scp = open(args.hotword)
-        hot_lines = f_scp.readlines()
-        for line in hot_lines:
-            words = line.strip().split(" ")
-            if len(words) < 2:
-                print("Please checkout format of hotwords")
-                continue
-            try:
-                fst_dict[" ".join(words[:-1])] = int(words[-1])
-            except ValueError:
-                print("Please checkout format of hotwords")
-        hotword_msg=json.dumps(fst_dict)
+        if os.path.exists(args.hotword):
+            f_scp = open(args.hotword)
+            hot_lines = f_scp.readlines()
+            for line in hot_lines:
+                words = line.strip().split(" ")
+                if len(words) < 2:
+                    print("Please checkout format of hotwords")
+                    continue
+                try:
+                    fst_dict[" ".join(words[:-1])] = int(words[-1])
+                except ValueError:
+                    print("Please checkout format of hotwords")
+            hotword_msg = json.dumps(fst_dict)
+        else:
+            hotword_msg = args.hotword

-    use_itn=True
+    use_itn = True
    if args.use_itn == 0:
        use_itn=False
    
-    message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval,
-                          "wav_name": "microphone", "is_speaking": True, "hotwords":hotword_msg, "itn": use_itn})
+    message = json.dumps({"mode": args.mode,
+                          "chunk_size": args.chunk_size,
+                          "chunk_interval": args.chunk_interval,
+                          "encoder_chunk_look_back": args.encoder_chunk_look_back,
+                          "decoder_chunk_look_back": args.decoder_chunk_look_back,
+                          "wav_name": "microphone",
+                          "is_speaking": True,
+                          "hotwords": hotword_msg,
+                          "itn": use_itn,
+                          })
    #voices.put(message)
    await websocket.send(message)
    while True:
@ -154,18 +173,21 @@ async def record_from_scp(chunk_begin, chunk_size):
    fst_dict = {}
    hotword_msg = ""
    if args.hotword.strip() != "":
-        f_scp = open(args.hotword)
-        hot_lines = f_scp.readlines()
-        for line in hot_lines:
-            words = line.strip().split(" ")
-            if len(words) < 2:
-                print("Please checkout format of hotwords")
-                continue
-            try:
-                fst_dict[" ".join(words[:-1])] = int(words[-1])
-            except ValueError:
-                print("Please checkout format of hotwords")
-        hotword_msg=json.dumps(fst_dict)
+        if os.path.exists(args.hotword):
+            f_scp = open(args.hotword)
+            hot_lines = f_scp.readlines()
+            for line in hot_lines:
+                words = line.strip().split(" ")
+                if len(words) < 2:
+                    print("Please checkout format of hotwords")
+                    continue
+                try:
+                    fst_dict[" ".join(words[:-1])] = int(words[-1])
+                except ValueError:
+                    print("Please checkout format of hotwords")
+            hotword_msg = json.dumps(fst_dict)
+        else:
+            hotword_msg = args.hotword
        print (hotword_msg)

    sample_rate = args.audio_fs
@ -203,8 +225,17 @@ async def record_from_scp(chunk_begin, chunk_size):
        # print(stride)

        # send first time
-        message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval, "audio_fs":sample_rate,
-                          "wav_name": wav_name, "wav_format": wav_format, "is_speaking": True, "hotwords":hotword_msg, "itn": use_itn})
+        message = json.dumps({"mode": args.mode,
+                              "chunk_size": args.chunk_size,
+                              "chunk_interval": args.chunk_interval,
+                              "encoder_chunk_look_back": args.encoder_chunk_look_back,
+                              "decoder_chunk_look_back": args.decoder_chunk_look_back,
+                              "audio_fs":sample_rate,
+                              "wav_name": wav_name,
+                              "wav_format": wav_format,
+                              "is_speaking": True,
+                              "hotwords": hotword_msg,
+                              "itn": use_itn})

        #voices.put(message)
        await websocket.send(message)
--- a/runtime/python/websocket/funasr_wss_server.py
+++ b/runtime/python/websocket/funasr_wss_server.py
@ -7,14 +7,7 @@ import tracemalloc
 import numpy as np
 import argparse
 import ssl
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-from modelscope.utils.logger import get_logger

-tracemalloc.start()
-
-logger = get_logger(log_level=logging.CRITICAL)
-logger.setLevel(logging.CRITICAL)

 parser = argparse.ArgumentParser()
 parser.add_argument("--host",
@ -29,24 +22,44 @@ parser.add_argument("--port",
                    help="grpc server port")
 parser.add_argument("--asr_model",
                    type=str,
-                    default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
+                    default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
                    help="model from modelscope")
+parser.add_argument("--asr_model_revision",
+                    type=str,
+                    default="v2.0.4",
+                    help="")
 parser.add_argument("--asr_model_online",
                    type=str,
-                    default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online",
+                    default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online",
                    help="model from modelscope")
+parser.add_argument("--asr_model_online_revision",
+                    type=str,
+                    default="v2.0.4",
+                    help="")
 parser.add_argument("--vad_model",
                    type=str,
-                    default="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
+                    default="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
                    help="model from modelscope")
+parser.add_argument("--vad_model_revision",
+                    type=str,
+                    default="v2.0.4",
+                    help="")
 parser.add_argument("--punc_model",
                    type=str,
-                    default="damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727",
+                    default="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727",
                    help="model from modelscope")
+parser.add_argument("--punc_model_revision",
+                    type=str,
+                    default="v2.0.4",
+                    help="")
 parser.add_argument("--ngpu",
                    type=int,
                    default=1,
                    help="0 for cpu, 1 for gpu")
+parser.add_argument("--device",
+                    type=str,
+                    default="cuda",
+                    help="cuda, cpu")
 parser.add_argument("--ncpu",
                    type=int,
                    default=4,
@ -68,213 +81,232 @@ args = parser.parse_args()
 websocket_users = set()

 print("model loading")
+from funasr import AutoModel
+
 # asr
-inference_pipeline_asr = pipeline(
-    task=Tasks.auto_speech_recognition,
-    model=args.asr_model,
-    ngpu=args.ngpu,
-    ncpu=args.ncpu,
-    model_revision=None)
-
-
+model_asr = AutoModel(model=args.asr_model,
+                      model_revision=args.asr_model_revision,
+                      ngpu=args.ngpu,
+                      ncpu=args.ncpu,
+                      device=args.device,
+                      disable_pbar=True,
+                      disable_log=True,
+                      )
+# asr
+model_asr_streaming = AutoModel(model=args.asr_model_online,
+                                model_revision=args.asr_model_online_revision,
+                                ngpu=args.ngpu,
+                                ncpu=args.ncpu,
+                                device=args.device,
+                                disable_pbar=True,
+                                disable_log=True,
+                                )
 # vad
-inference_pipeline_vad = pipeline(
-    task=Tasks.voice_activity_detection,
-    model=args.vad_model,
-    model_revision=None,
-    mode='online',
-    ngpu=args.ngpu,
-    ncpu=args.ncpu,
-)
+model_vad = AutoModel(model=args.vad_model,
+                      model_revision=args.vad_model_revision,
+                      ngpu=args.ngpu,
+                      ncpu=args.ncpu,
+                      device=args.device,
+                      disable_pbar=True,
+                      disable_log=True,
+                      # chunk_size=60,
+                      )

 if args.punc_model != "":
-    inference_pipeline_punc = pipeline(
-        task=Tasks.punctuation,
-        model=args.punc_model,
-        model_revision="v1.0.2",
-        ngpu=args.ngpu,
-        ncpu=args.ncpu,
-    )
+	model_punc = AutoModel(model=args.punc_model,
+	                       model_revision=args.punc_model_revision,
+	                       ngpu=args.ngpu,
+	                       ncpu=args.ncpu,
+	                       device=args.device,
+	                       disable_pbar=True,
+                           disable_log=True,
+	                       )
 else:
-    inference_pipeline_punc = None
+	model_punc = None
+

-inference_pipeline_asr_online = pipeline(
-    task=Tasks.auto_speech_recognition,
-    model=args.asr_model_online,
-    ngpu=args.ngpu,
-    ncpu=args.ncpu,
-    model_revision='v1.0.7',
-    update_model='v1.0.7',
-    mode='paraformer_streaming')

 print("model loaded! only support one client at the same time now!!!!")

 async def ws_reset(websocket):
-    print("ws reset now, total num is ",len(websocket_users))
-    websocket.param_dict_asr_online = {"cache": dict()}
-    websocket.param_dict_vad = {'in_cache': dict(), "is_final": True}
-    websocket.param_dict_asr_online["is_final"]=True
-    # audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
-    # inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
-    # inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
-    await websocket.close()
-    
-    
+	print("ws reset now, total num is ",len(websocket_users))
+
+	websocket.status_dict_asr_online["cache"] = {}
+	websocket.status_dict_asr_online["is_final"] = True
+	websocket.status_dict_vad["cache"] = {}
+	websocket.status_dict_vad["is_final"] = True
+	websocket.status_dict_punc["cache"] = {}
+	
+	await websocket.close()
+
+
 async def clear_websocket():
-   for websocket in websocket_users:
-       await ws_reset(websocket)
-   websocket_users.clear()
- 
- 
-       
+	for websocket in websocket_users:
+		await ws_reset(websocket)
+	websocket_users.clear()
+
+
+
 async def ws_serve(websocket, path):
-    frames = []
-    frames_asr = []
-    frames_asr_online = []
-    global websocket_users
-    await clear_websocket()
-    websocket_users.add(websocket)
-    websocket.param_dict_asr = {}
-    websocket.param_dict_asr_online = {"cache": dict()}
-    websocket.param_dict_vad = {'in_cache': dict(), "is_final": False}
-    websocket.param_dict_punc = {'cache': list()}
-    websocket.vad_pre_idx = 0
-    speech_start = False
-    speech_end_i = -1
-    websocket.wav_name = "microphone"
-    websocket.mode = "2pass"
-    print("new user connected", flush=True)
-
-    try:
-        async for message in websocket:
-            if isinstance(message, str):
-                messagejson = json.loads(message)
-        
-                if "is_speaking" in messagejson:
-                    websocket.is_speaking = messagejson["is_speaking"]
-                    websocket.param_dict_asr_online["is_final"] = not websocket.is_speaking
-                if "chunk_interval" in messagejson:
-                    websocket.chunk_interval = messagejson["chunk_interval"]
-                if "wav_name" in messagejson:
-                    websocket.wav_name = messagejson.get("wav_name")
-                if "chunk_size" in messagejson:
-                    websocket.param_dict_asr_online["chunk_size"] = messagejson["chunk_size"]
-                if "encoder_chunk_look_back" in messagejson:
-                    websocket.param_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
-                if "decoder_chunk_look_back" in messagejson:
-                    websocket.param_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
-                if "mode" in messagejson:
-                    websocket.mode = messagejson["mode"]
-            if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):
-                if not isinstance(message, str):
-                    frames.append(message)
-                    duration_ms = len(message)//32
-                    websocket.vad_pre_idx += duration_ms
-        
-                    # asr online
-                    frames_asr_online.append(message)
-                    websocket.param_dict_asr_online["is_final"] = speech_end_i != -1
-                    if len(frames_asr_online) % websocket.chunk_interval == 0 or websocket.param_dict_asr_online["is_final"]:
-                        if websocket.mode == "2pass" or websocket.mode == "online":
-                            audio_in = b"".join(frames_asr_online)
-                            await async_asr_online(websocket, audio_in)
-                        frames_asr_online = []
-                    if speech_start:
-                        frames_asr.append(message)
-                    # vad online
-                    speech_start_i, speech_end_i = await async_vad(websocket, message)
-                    if speech_start_i != -1:
-                        speech_start = True
-                        beg_bias = (websocket.vad_pre_idx-speech_start_i)//duration_ms
-                        frames_pre = frames[-beg_bias:]
-                        frames_asr = []
-                        frames_asr.extend(frames_pre)
-                # asr punc offline
-                if speech_end_i != -1 or not websocket.is_speaking:
-                    # print("vad end point")
-                    if websocket.mode == "2pass" or websocket.mode == "offline":
-                        audio_in = b"".join(frames_asr)
-                        await async_asr(websocket, audio_in)
-                    frames_asr = []
-                    speech_start = False
-                    # frames_asr_online = []
-                    # websocket.param_dict_asr_online = {"cache": dict()}
-                    if not websocket.is_speaking:
-                        websocket.vad_pre_idx = 0
-                        frames = []
-                        websocket.param_dict_vad = {'in_cache': dict()}
-                    else:
-                        frames = frames[-20:]
-
-     
-    except websockets.ConnectionClosed:
-        print("ConnectionClosed...", websocket_users,flush=True)
-        await ws_reset(websocket)
-        websocket_users.remove(websocket)
-    except websockets.InvalidState:
-        print("InvalidState...")
-    except Exception as e:
-        print("Exception:", e)
+	frames = []
+	frames_asr = []
+	frames_asr_online = []
+	global websocket_users
+	await clear_websocket()
+	websocket_users.add(websocket)
+	websocket.status_dict_asr = {}
+	websocket.status_dict_asr_online = {"cache": {}, "is_final": False}
+	websocket.status_dict_vad = {'cache': {}, "is_final": False}
+	websocket.status_dict_punc = {'cache': {}}
+	websocket.chunk_interval = 10
+	websocket.vad_pre_idx = 0
+	speech_start = False
+	speech_end_i = -1
+	websocket.wav_name = "microphone"
+	websocket.mode = "2pass"
+	print("new user connected", flush=True)
+	
+	try:
+		async for message in websocket:
+			if isinstance(message, str):
+				messagejson = json.loads(message)
+				
+				if "is_speaking" in messagejson:
+					websocket.is_speaking = messagejson["is_speaking"]
+					websocket.status_dict_asr_online["is_final"] = not websocket.is_speaking
+				if "chunk_interval" in messagejson:
+					websocket.chunk_interval = messagejson["chunk_interval"]
+				if "wav_name" in messagejson:
+					websocket.wav_name = messagejson.get("wav_name")
+				if "chunk_size" in messagejson:
+					websocket.status_dict_asr_online["chunk_size"] = messagejson["chunk_size"]
+				if "encoder_chunk_look_back" in messagejson:
+					websocket.status_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
+				if "decoder_chunk_look_back" in messagejson:
+					websocket.status_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
+				if "hotword" in messagejson:
+					websocket.status_dict_asr["hotword"] = messagejson["hotword"]
+				if "mode" in messagejson:
+					websocket.mode = messagejson["mode"]
+			
+			websocket.status_dict_vad["chunk_size"] = int(websocket.status_dict_asr_online["chunk_size"][1]*60/websocket.chunk_interval)
+			if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):
+				if not isinstance(message, str):
+					frames.append(message)
+					duration_ms = len(message)//32
+					websocket.vad_pre_idx += duration_ms
+					
+					# asr online
+					frames_asr_online.append(message)
+					websocket.status_dict_asr_online["is_final"] = speech_end_i != -1
+					if len(frames_asr_online) % websocket.chunk_interval == 0 or websocket.status_dict_asr_online["is_final"]:
+						if websocket.mode == "2pass" or websocket.mode == "online":
+							audio_in = b"".join(frames_asr_online)
+							try:
+								await async_asr_online(websocket, audio_in)
+							except:
+								print(f"error in asr streaming, {websocket.status_dict_asr_online}")
+						frames_asr_online = []
+					if speech_start:
+						frames_asr.append(message)
+					# vad online
+					try:
+						speech_start_i, speech_end_i = await async_vad(websocket, message)
+					except:
+						print("error in vad")
+					if speech_start_i != -1:
+						speech_start = True
+						beg_bias = (websocket.vad_pre_idx-speech_start_i)//duration_ms
+						frames_pre = frames[-beg_bias:]
+						frames_asr = []
+						frames_asr.extend(frames_pre)
+				# asr punc offline
+				if speech_end_i != -1 or not websocket.is_speaking:
+					# print("vad end point")
+					if websocket.mode == "2pass" or websocket.mode == "offline":
+						audio_in = b"".join(frames_asr)
+						try:
+							await async_asr(websocket, audio_in)
+						except:
+							print("error in asr offline")
+					frames_asr = []
+					speech_start = False
+					frames_asr_online = []
+					websocket.status_dict_asr_online["cache"] = {}
+					if not websocket.is_speaking:
+						websocket.vad_pre_idx = 0
+						frames = []
+						websocket.status_dict_vad["cache"] = {}
+					else:
+						frames = frames[-20:]
+	
+	
+	except websockets.ConnectionClosed:
+		print("ConnectionClosed...", websocket_users,flush=True)
+		await ws_reset(websocket)
+		websocket_users.remove(websocket)
+	except websockets.InvalidState:
+		print("InvalidState...")
+	except Exception as e:
+		print("Exception:", e)


 async def async_vad(websocket, audio_in):
-
-    segments_result = inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
-
-    speech_start = -1
-    speech_end = -1
-    
-    if len(segments_result) == 0 or len(segments_result["text"]) > 1:
-        return speech_start, speech_end
-    if segments_result["text"][0][0] != -1:
-        speech_start = segments_result["text"][0][0]
-    if segments_result["text"][0][1] != -1:
-        speech_end = segments_result["text"][0][1]
-    return speech_start, speech_end
+	
+	segments_result = model_vad.generate(input=audio_in, **websocket.status_dict_vad)[0]["value"]
+	# print(segments_result)
+	
+	speech_start = -1
+	speech_end = -1
+	
+	if len(segments_result) == 0 or len(segments_result) > 1:
+		return speech_start, speech_end
+	if segments_result[0][0] != -1:
+		speech_start = segments_result[0][0]
+	if segments_result[0][1] != -1:
+		speech_end = segments_result[0][1]
+	return speech_start, speech_end


 async def async_asr(websocket, audio_in):
-            if len(audio_in) > 0:
-                # print(len(audio_in))
-                rec_result = inference_pipeline_asr(audio_in=audio_in,
-                                                    param_dict=websocket.param_dict_asr)
-                # print(rec_result)
-                if inference_pipeline_punc is not None and 'text' in rec_result and len(rec_result["text"])>0:
-                    rec_result = inference_pipeline_punc(text_in=rec_result['text'],
-                                                         param_dict=websocket.param_dict_punc)
-                    # print("offline", rec_result)
-                if 'text' in rec_result:
-                    mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
-                    message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
-                    await websocket.send(message)
+	if len(audio_in) > 0:
+		# print(len(audio_in))
+		rec_result = model_asr.generate(input=audio_in, **websocket.status_dict_asr)[0]
+		# print("offline_asr, ", rec_result)
+		if model_punc is not None and len(rec_result["text"])>0:
+			# print("offline, before punc", rec_result, "cache", websocket.status_dict_punc)
+			rec_result = model_punc.generate(input=rec_result['text'], **websocket.status_dict_punc)[0]
+			# print("offline, after punc", rec_result)
+		if len(rec_result["text"])>0:
+			# print("offline", rec_result)
+			mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
+			message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
+			await websocket.send(message)


 async def async_asr_online(websocket, audio_in):
-    if len(audio_in) > 0:
-        # print(websocket.param_dict_asr_online.get("is_final", False))
-        rec_result = inference_pipeline_asr_online(audio_in=audio_in,
-                                                   param_dict=websocket.param_dict_asr_online)
-        # print(rec_result)
-        if websocket.mode == "2pass" and websocket.param_dict_asr_online.get("is_final", False):
-            return
-            #     websocket.param_dict_asr_online["cache"] = dict()
-        if "text" in rec_result:
-            if rec_result["text"] != "sil" and rec_result["text"] != "waiting_for_more_voice":
-                # print("online", rec_result)
-                mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode
-                message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
-                await websocket.send(message)
+	if len(audio_in) > 0:
+		# print(websocket.status_dict_asr_online.get("is_final", False))
+		rec_result = model_asr_streaming.generate(input=audio_in, **websocket.status_dict_asr_online)[0]
+		# print("online, ", rec_result)
+		if websocket.mode == "2pass" and websocket.status_dict_asr_online.get("is_final", False):
+			return
+			#     websocket.status_dict_asr_online["cache"] = dict()
+		if len(rec_result["text"]):
+			mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode
+			message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
+			await websocket.send(message)

 if len(args.certfile)>0:
-    ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
-    
-    # Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions
-    ssl_cert = args.certfile
-    ssl_key = args.keyfile
-    
-    ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
-    start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None,ssl=ssl_context)
+	ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+	
+	# Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions
+	ssl_cert = args.certfile
+	ssl_key = args.keyfile
+	
+	ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
+	start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None,ssl=ssl_context)
 else:
-    start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
+	start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
 asyncio.get_event_loop().run_until_complete(start_server)
 asyncio.get_event_loop().run_forever()