From 6761eb3efa625b8611a642e793dbdb5a29af5a84 Mon Sep 17 00:00:00 2001
From: zhaomingwork <61895407+zhaomingwork@users.noreply.github.com>
Date: Mon, 18 Sep 2023 10:20:31 +0800
Subject: [PATCH] Add python websocket api (#960)

* add python api for websocket

* little change for README.md

* little change for README.md

* add wait time for send chunk
---
 funasr/runtime/python/websocket/README.md     |  14 ++
 .../python/websocket/funasr_client_api.py     | 134 ++++++++++++++++++
 2 files changed, 148 insertions(+)
 create mode 100644 funasr/runtime/python/websocket/funasr_client_api.py

diff --git a/funasr/runtime/python/websocket/README.md b/funasr/runtime/python/websocket/README.md
index 686ad86ab..f318fd93f 100644
--- a/funasr/runtime/python/websocket/README.md
+++ b/funasr/runtime/python/websocket/README.md
@@ -107,6 +107,20 @@ Loadding from wav.scp(kaldi style)
 # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms
 python funasr_wss_client.py --host "0.0.0.0" --port 10095 --mode 2pass --chunk_size "8,8,4" --audio_in "./data/wav.scp" --output_dir "./results"
 ```
+
+#### Websocket api
+```shell
+    # class Funasr_websocket_recognizer example with 3 step
+    # 1.create an recognizer 
+    rcg=Funasr_websocket_recognizer(host="127.0.0.1",port="30035",is_ssl=True,mode="2pass")
+    # 2.send pcm data to asr engine and get asr result
+    text=rcg.feed_chunk(data)
+    print("text",text)
+    # 3.get last result, set timeout=3
+    text=rcg.close(timeout=3)
+    print("text",text)
+```
+
 ## Acknowledge
 1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
 2. We acknowledge [zhaoming](https://github.com/zhaomingwork/FunASR/tree/fix_bug_for_python_websocket) for contributing the websocket service.
diff --git a/funasr/runtime/python/websocket/funasr_client_api.py b/funasr/runtime/python/websocket/funasr_client_api.py
new file mode 100644
index 000000000..aa573c0f8
--- /dev/null
+++ b/funasr/runtime/python/websocket/funasr_client_api.py
@@ -0,0 +1,134 @@
+'''
+  Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
+  Reserved. MIT License  (https://opensource.org/licenses/MIT)
+  
+  2022-2023 by zhaomingwork@qq.com  
+'''
+# pip install websocket-client
+import  ssl
+from websocket import ABNF
+from websocket import create_connection
+from queue import Queue
+import threading
+import traceback
+import json
+import time
+import numpy as np
+# class for recognizer in websocket
+class Funasr_websocket_recognizer():
+    '''
+    python asr recognizer lib
+
+    '''
+    def __init__(self, host="127.0.0.1", port="30035", is_ssl=True,chunk_size="5, 10, 5",chunk_interval=10,mode="offline",wav_name="default"):
+      '''
+          host: server host ip
+          port: server port
+          is_ssl: True for wss protocal, False for ws
+      '''
+      try:
+        if is_ssl == True:
+            ssl_context = ssl.SSLContext()
+            ssl_context.check_hostname = False
+            ssl_context.verify_mode = ssl.CERT_NONE
+            uri = "wss://{}:{}".format(host, port)
+            ssl_opt={"cert_reqs": ssl.CERT_NONE}
+        else:
+            uri = "ws://{}:{}".format(host, port)
+            ssl_context = None
+            ssl_opt=None
+        self.host = host
+        self.port = port
+ 
+        self.msg_queue = Queue() # used for recognized result text
+
+        print("connect to url",uri)
+        self.websocket=create_connection(uri,ssl=ssl_context,sslopt=ssl_opt)
+ 
+        self.thread_msg = threading.Thread(target=Funasr_websocket_recognizer.thread_rec_msg,args=(self,))
+        self.thread_msg.start()
+        chunk_size = [int(x) for x in  chunk_size.split(",")]
+        stride = int(60 *  chunk_size[1]/  chunk_interval / 1000 * 16000 * 2)
+        chunk_num = (len(audio_bytes) - 1) // stride + 1
+       
+        message = json.dumps({"mode":  mode, "chunk_size":  chunk_size, "chunk_interval":  chunk_interval,
+                              "wav_name": wav_name, "is_speaking": True})
+ 
+        self.websocket.send(message)
+ 
+        print("send json",message)
+      
+      except Exception as e:
+            print("Exception:", e)
+            traceback.print_exc()
+    
+    # threads for rev msg
+    def thread_rec_msg(self):
+        try:
+         while(True):
+           msg=self.websocket.recv()
+           if msg is None or len(msg)==0:
+             continue
+           msg = json.loads(msg)
+           
+           self.msg_queue.put(msg)
+        except Exception as e:
+            print("client closed")
+ 
+    # feed data to asr engine, wait_time means waiting for result until time out
+    def feed_chunk(self, chunk,wait_time=0.01):
+        try:
+            self.websocket.send(chunk,  ABNF.OPCODE_BINARY)
+            # loop to check if there is a message, timeout in 0.01s
+            while(True):
+               msg = self.msg_queue.get(timeout=wait_time)
+               if self.msg_queue.empty():
+                  break
+                  
+            return msg
+        except:
+            return ""
+    def close(self,timeout=1):
+        message = json.dumps({"is_speaking": False})
+        self.websocket.send(message)
+        # sleep for timeout seconds to wait for result
+        time.sleep(timeout)
+        msg=""
+        while(not self.msg_queue.empty()):
+            msg = self.msg_queue.get()
+        
+        self.websocket.close()
+        # only resturn the last msg
+        return msg
+        
+if __name__ == '__main__':
+    print('example for Funasr_websocket_recognizer') 
+    import wave
+    wav_path="asr_example.wav"
+    with wave.open(wav_path, "rb") as wav_file:
+                params = wav_file.getparams()
+                frames = wav_file.readframes(wav_file.getnframes())
+                audio_bytes = bytes(frames)
+    
+ 
+    stride = int(60 * 10 / 10 / 1000 * 16000 * 2)
+    chunk_num = (len(audio_bytes) - 1) // stride + 1
+    # create an recognizer 
+    rcg=Funasr_websocket_recognizer(host="127.0.0.1",port="30035",is_ssl=True,mode="2pass")
+    # loop to send chunk
+    for i in range(chunk_num):
+
+            beg = i * stride
+            data = audio_bytes[beg:beg + stride]
+ 
+            text=rcg.feed_chunk(data,wait_time=0.02)
+            if len(text)>0:
+               print("text",text)
+            time.sleep(0.05)
+ 
+    # get last message
+    text=rcg.close(timeout=3)
+    print("text",text)
+ 
+    
+            
\ No newline at end of file