From 6761eb3efa625b8611a642e793dbdb5a29af5a84 Mon Sep 17 00:00:00 2001 From: zhaomingwork <61895407+zhaomingwork@users.noreply.github.com> Date: Mon, 18 Sep 2023 10:20:31 +0800 Subject: [PATCH] Add python websocket api (#960) * add python api for websocket * little change for README.md * little change for README.md * add wait time for send chunk --- funasr/runtime/python/websocket/README.md | 14 ++ .../python/websocket/funasr_client_api.py | 134 ++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 funasr/runtime/python/websocket/funasr_client_api.py diff --git a/funasr/runtime/python/websocket/README.md b/funasr/runtime/python/websocket/README.md index 686ad86ab..f318fd93f 100644 --- a/funasr/runtime/python/websocket/README.md +++ b/funasr/runtime/python/websocket/README.md @@ -107,6 +107,20 @@ Loadding from wav.scp(kaldi style) # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode 2pass --chunk_size "8,8,4" --audio_in "./data/wav.scp" --output_dir "./results" ``` + +#### Websocket api +```shell + # class Funasr_websocket_recognizer example with 3 step + # 1.create an recognizer + rcg=Funasr_websocket_recognizer(host="127.0.0.1",port="30035",is_ssl=True,mode="2pass") + # 2.send pcm data to asr engine and get asr result + text=rcg.feed_chunk(data) + print("text",text) + # 3.get last result, set timeout=3 + text=rcg.close(timeout=3) + print("text",text) +``` + ## Acknowledge 1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR). 2. We acknowledge [zhaoming](https://github.com/zhaomingwork/FunASR/tree/fix_bug_for_python_websocket) for contributing the websocket service. diff --git a/funasr/runtime/python/websocket/funasr_client_api.py b/funasr/runtime/python/websocket/funasr_client_api.py new file mode 100644 index 000000000..aa573c0f8 --- /dev/null +++ b/funasr/runtime/python/websocket/funasr_client_api.py @@ -0,0 +1,134 @@ +''' + Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights + Reserved. MIT License (https://opensource.org/licenses/MIT) + + 2022-2023 by zhaomingwork@qq.com +''' +# pip install websocket-client +import ssl +from websocket import ABNF +from websocket import create_connection +from queue import Queue +import threading +import traceback +import json +import time +import numpy as np +# class for recognizer in websocket +class Funasr_websocket_recognizer(): + ''' + python asr recognizer lib + + ''' + def __init__(self, host="127.0.0.1", port="30035", is_ssl=True,chunk_size="5, 10, 5",chunk_interval=10,mode="offline",wav_name="default"): + ''' + host: server host ip + port: server port + is_ssl: True for wss protocal, False for ws + ''' + try: + if is_ssl == True: + ssl_context = ssl.SSLContext() + ssl_context.check_hostname = False + ssl_context.verify_mode = ssl.CERT_NONE + uri = "wss://{}:{}".format(host, port) + ssl_opt={"cert_reqs": ssl.CERT_NONE} + else: + uri = "ws://{}:{}".format(host, port) + ssl_context = None + ssl_opt=None + self.host = host + self.port = port + + self.msg_queue = Queue() # used for recognized result text + + print("connect to url",uri) + self.websocket=create_connection(uri,ssl=ssl_context,sslopt=ssl_opt) + + self.thread_msg = threading.Thread(target=Funasr_websocket_recognizer.thread_rec_msg,args=(self,)) + self.thread_msg.start() + chunk_size = [int(x) for x in chunk_size.split(",")] + stride = int(60 * chunk_size[1]/ chunk_interval / 1000 * 16000 * 2) + chunk_num = (len(audio_bytes) - 1) // stride + 1 + + message = json.dumps({"mode": mode, "chunk_size": chunk_size, "chunk_interval": chunk_interval, + "wav_name": wav_name, "is_speaking": True}) + + self.websocket.send(message) + + print("send json",message) + + except Exception as e: + print("Exception:", e) + traceback.print_exc() + + # threads for rev msg + def thread_rec_msg(self): + try: + while(True): + msg=self.websocket.recv() + if msg is None or len(msg)==0: + continue + msg = json.loads(msg) + + self.msg_queue.put(msg) + except Exception as e: + print("client closed") + + # feed data to asr engine, wait_time means waiting for result until time out + def feed_chunk(self, chunk,wait_time=0.01): + try: + self.websocket.send(chunk, ABNF.OPCODE_BINARY) + # loop to check if there is a message, timeout in 0.01s + while(True): + msg = self.msg_queue.get(timeout=wait_time) + if self.msg_queue.empty(): + break + + return msg + except: + return "" + def close(self,timeout=1): + message = json.dumps({"is_speaking": False}) + self.websocket.send(message) + # sleep for timeout seconds to wait for result + time.sleep(timeout) + msg="" + while(not self.msg_queue.empty()): + msg = self.msg_queue.get() + + self.websocket.close() + # only resturn the last msg + return msg + +if __name__ == '__main__': + print('example for Funasr_websocket_recognizer') + import wave + wav_path="asr_example.wav" + with wave.open(wav_path, "rb") as wav_file: + params = wav_file.getparams() + frames = wav_file.readframes(wav_file.getnframes()) + audio_bytes = bytes(frames) + + + stride = int(60 * 10 / 10 / 1000 * 16000 * 2) + chunk_num = (len(audio_bytes) - 1) // stride + 1 + # create an recognizer + rcg=Funasr_websocket_recognizer(host="127.0.0.1",port="30035",is_ssl=True,mode="2pass") + # loop to send chunk + for i in range(chunk_num): + + beg = i * stride + data = audio_bytes[beg:beg + stride] + + text=rcg.feed_chunk(data,wait_time=0.02) + if len(text)>0: + print("text",text) + time.sleep(0.05) + + # get last message + text=rcg.close(timeout=3) + print("text",text) + + + \ No newline at end of file