ws

2025-09-15 14:48:36 +08:00 · 2024-09-02 19:15:59 +08:00 · 2024-09-02 19:15:59 +08:00 · 8fb3ce8796
commit 8fb3ce8796
parent 623fd16f34
1 changed files with 43 additions and 51 deletions
--- a/runtime/python/websocket/funasr_wss_server_llm.py
+++ b/runtime/python/websocket/funasr_wss_server_llm.py
@ -11,8 +11,16 @@ import nls
 from collections import deque
 import threading

+
 class NlsTtsSynthesizer:
-    def __init__(self, websocket, tts_fifo, token, appkey, url="wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1"):
+    def __init__(
+        self,
+        websocket,
+        tts_fifo,
+        token,
+        appkey,
+        url="wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1",
+    ):
        self.websocket = websocket
        self.tts_fifo = tts_fifo
        self.url = url
@ -36,30 +44,32 @@ class NlsTtsSynthesizer:
            on_completed=self.on_completed,
            on_error=self.on_error,
            on_close=self.on_close,
-            callback_args=[]
+            callback_args=[],
        )
+
    def on_data(self, data, *args):
        self.count += len(data)
        self.tts_fifo.append(data)
-        #with open('tts_server.pcm', 'ab') as file:
+        # with open('tts_server.pcm', 'ab') as file:
        #    file.write(data)
+
    def on_sentence_begin(self, message, *args):
-        print('on sentence begin =>{}'.format(message))
+        print("on sentence begin =>{}".format(message))

    def on_sentence_synthesis(self, message, *args):
-        print('on sentence synthesis =>{}'.format(message))
+        print("on sentence synthesis =>{}".format(message))

    def on_sentence_end(self, message, *args):
-        print('on sentence end =>{}'.format(message))
+        print("on sentence end =>{}".format(message))

    def on_completed(self, message, *args):
-        print('on completed =>{}'.format(message))
+        print("on completed =>{}".format(message))

    def on_error(self, message, *args):
-        print('on_error args=>{}'.format(args))
+        print("on_error args=>{}".format(args))

    def on_close(self, *args):
-        print('on_close: args=>{}'.format(args))
+        print("on_close: args=>{}".format(args))
        print("on message data cout: =>{}".format(self.count))
        self.started = False

@ -73,11 +83,12 @@ class NlsTtsSynthesizer:
    async def stop(self):
        self.sdk.stopStreamInputTts()

+
 parser = argparse.ArgumentParser()
 parser.add_argument(
-    "--host", type=str, default="127.0.0.1", required=False, help="host ip, localhost, 0.0.0.0"
+    "--host", type=str, default="0.0.0.0", required=False, help="host ip, localhost, 0.0.0.0"
 )
-parser.add_argument("--port", type=int, default=10095, required=False, help="grpc server port")
+parser.add_argument("--port", type=int, default=10096, required=False, help="grpc server port")
 parser.add_argument(
    "--asr_model",
    type=str,
@ -124,16 +135,6 @@ websocket_users = set()
 print("model loading")
 from funasr import AutoModel

-# # asr
-# model_asr = AutoModel(
-#     model=args.asr_model,
-#     model_revision=args.asr_model_revision,
-#     ngpu=args.ngpu,
-#     ncpu=args.ncpu,
-#     device=args.device,
-#     disable_pbar=False,
-#     disable_log=True,
-# )

 # vad
 model_vad = AutoModel(
@ -147,26 +148,6 @@ model_vad = AutoModel(
    # chunk_size=60,
 )

-# async def async_asr(websocket, audio_in):
-#     if len(audio_in) > 0:
-#         # print(len(audio_in))
-#         print(type(audio_in))
-#         rec_result = model_asr.generate(input=audio_in, **websocket.status_dict_asr)[0]
-#         print("offline_asr, ", rec_result)
-#
-#
-#         if len(rec_result["text"]) > 0:
-#             # print("offline", rec_result)
-#             mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
-#             message = json.dumps(
-#                 {
-#                     "mode": mode,
-#                     "text": rec_result["text"],
-#                     "wav_name": websocket.wav_name,
-#                     "is_final": websocket.is_speaking,
-#                 }
-#             )
-#             await websocket.send(message)

 import os

@ -205,20 +186,26 @@ if "key" in os.environ:
    key = os.environ["key"]
    api.login(key)

+appkey = "xxx"
+appkey_token = "xxx"
+if "appkey" in os.environ:
+    appkey = os.environ["appkey"]
+    appkey_token = os.environ["appkey_token"]
+
 from modelscope.hub.snapshot_download import snapshot_download

-# os.environ["MODELSCOPE_CACHE"] = "/nfs/zhifu.gzf/modelscope"
-# llm_dir = snapshot_download('qwen/Qwen2-7B-Instruct', cache_dir=None, revision='master')
-# audio_encoder_dir = snapshot_download('iic/SenseVoice', cache_dir=None, revision='master')
+os.environ["MODELSCOPE_CACHE"] = "/nfs/zhifu.gzf/modelscope"
+llm_dir = snapshot_download("qwen/Qwen2-7B-Instruct", cache_dir=None, revision="master")
+audio_encoder_dir = snapshot_download("iic/SenseVoice", cache_dir=None, revision="master")

-llm_dir = "/cpfs_speech/zhifu.gzf/init_model/qwen/Qwen2-7B-Instruct"
-audio_encoder_dir = "/nfs/zhifu.gzf/init_model/SenseVoiceLargeModelscope"
+# llm_dir = "/cpfs_speech/zhifu.gzf/init_model/qwen/Qwen2-7B-Instruct"
+# audio_encoder_dir = "/nfs/zhifu.gzf/init_model/SenseVoiceLargeModelscope"

 device = "cuda:0"

 all_file_paths = [
-    "/nfs/zhifu.gzf/init_model/Speech2Text_Align_V0712_modelscope"
-    # "FunAudioLLM/Speech2Text_Align_V0712",
+    # "/nfs/zhifu.gzf/init_model/Speech2Text_Align_V0712_modelscope"
+    "FunAudioLLM/Speech2Text_Align_V0712",
    # "FunAudioLLM/Speech2Text_Align_V0718",
    # "FunAudioLLM/Speech2Text_Align_V0628",
 ]
@ -234,7 +221,7 @@ model_llm = AutoModel(
    bf16=False,
    llm_dtype="bf16",
    max_length=1024,
-    llm_kwargs=llm_kwargs,
+    # llm_kwargs=llm_kwargs,
    llm_conf={"init_param_path": llm_dir},
    tokenizer_conf={"init_param_path": llm_dir},
    audio_encoder=audio_encoder_dir,
@ -246,6 +233,7 @@ tokenizer = model_llm.kwargs["tokenizer"]

 model_dict = {"model": model, "frontend": frontend, "tokenizer": tokenizer}

+
 async def send_to_client(websocket, syntheszier, tts_fifo):
    # Sending tts data to the client
    while True:
@ -260,6 +248,8 @@ async def send_to_client(websocket, syntheszier, tts_fifo):
        else:
            print("WebSocket connection is not open or syntheszier is not started.")
            break
+
+
 async def model_inference(
    websocket,
    audio_in,
@ -271,7 +261,9 @@ async def model_inference(
    text_usr="",
 ):
    fifo_queue = deque()
-    synthesizer = NlsTtsSynthesizer(websocket=websocket, tts_fifo=fifo_queue, token="xxx", appkey="xxx")
+    synthesizer = NlsTtsSynthesizer(
+        websocket=websocket, tts_fifo=fifo_queue, token=appkey_token, appkey=appkey
+    )
    synthesizer.start()
    beg0 = time.time()
    if his_state is None:
@ -506,7 +498,7 @@ async def async_vad(websocket, audio_in):
    return speech_start, speech_end


-if len(args.certfile) > 0:
+if False:  # len(args.certfile) > 0:
    ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)

    # Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions