This commit is contained in:
游雁 2024-09-02 19:15:59 +08:00
parent 623fd16f34
commit 8fb3ce8796

View File

@ -11,8 +11,16 @@ import nls
from collections import deque
import threading
class NlsTtsSynthesizer:
def __init__(self, websocket, tts_fifo, token, appkey, url="wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1"):
def __init__(
self,
websocket,
tts_fifo,
token,
appkey,
url="wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1",
):
self.websocket = websocket
self.tts_fifo = tts_fifo
self.url = url
@ -36,30 +44,32 @@ class NlsTtsSynthesizer:
on_completed=self.on_completed,
on_error=self.on_error,
on_close=self.on_close,
callback_args=[]
callback_args=[],
)
def on_data(self, data, *args):
self.count += len(data)
self.tts_fifo.append(data)
#with open('tts_server.pcm', 'ab') as file:
# with open('tts_server.pcm', 'ab') as file:
# file.write(data)
def on_sentence_begin(self, message, *args):
print('on sentence begin =>{}'.format(message))
print("on sentence begin =>{}".format(message))
def on_sentence_synthesis(self, message, *args):
print('on sentence synthesis =>{}'.format(message))
print("on sentence synthesis =>{}".format(message))
def on_sentence_end(self, message, *args):
print('on sentence end =>{}'.format(message))
print("on sentence end =>{}".format(message))
def on_completed(self, message, *args):
print('on completed =>{}'.format(message))
print("on completed =>{}".format(message))
def on_error(self, message, *args):
print('on_error args=>{}'.format(args))
print("on_error args=>{}".format(args))
def on_close(self, *args):
print('on_close: args=>{}'.format(args))
print("on_close: args=>{}".format(args))
print("on message data cout: =>{}".format(self.count))
self.started = False
@ -73,11 +83,12 @@ class NlsTtsSynthesizer:
async def stop(self):
self.sdk.stopStreamInputTts()
parser = argparse.ArgumentParser()
parser.add_argument(
"--host", type=str, default="127.0.0.1", required=False, help="host ip, localhost, 0.0.0.0"
"--host", type=str, default="0.0.0.0", required=False, help="host ip, localhost, 0.0.0.0"
)
parser.add_argument("--port", type=int, default=10095, required=False, help="grpc server port")
parser.add_argument("--port", type=int, default=10096, required=False, help="grpc server port")
parser.add_argument(
"--asr_model",
type=str,
@ -124,16 +135,6 @@ websocket_users = set()
print("model loading")
from funasr import AutoModel
# # asr
# model_asr = AutoModel(
# model=args.asr_model,
# model_revision=args.asr_model_revision,
# ngpu=args.ngpu,
# ncpu=args.ncpu,
# device=args.device,
# disable_pbar=False,
# disable_log=True,
# )
# vad
model_vad = AutoModel(
@ -147,26 +148,6 @@ model_vad = AutoModel(
# chunk_size=60,
)
# async def async_asr(websocket, audio_in):
# if len(audio_in) > 0:
# # print(len(audio_in))
# print(type(audio_in))
# rec_result = model_asr.generate(input=audio_in, **websocket.status_dict_asr)[0]
# print("offline_asr, ", rec_result)
#
#
# if len(rec_result["text"]) > 0:
# # print("offline", rec_result)
# mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
# message = json.dumps(
# {
# "mode": mode,
# "text": rec_result["text"],
# "wav_name": websocket.wav_name,
# "is_final": websocket.is_speaking,
# }
# )
# await websocket.send(message)
import os
@ -205,20 +186,26 @@ if "key" in os.environ:
key = os.environ["key"]
api.login(key)
appkey = "xxx"
appkey_token = "xxx"
if "appkey" in os.environ:
appkey = os.environ["appkey"]
appkey_token = os.environ["appkey_token"]
from modelscope.hub.snapshot_download import snapshot_download
# os.environ["MODELSCOPE_CACHE"] = "/nfs/zhifu.gzf/modelscope"
# llm_dir = snapshot_download('qwen/Qwen2-7B-Instruct', cache_dir=None, revision='master')
# audio_encoder_dir = snapshot_download('iic/SenseVoice', cache_dir=None, revision='master')
os.environ["MODELSCOPE_CACHE"] = "/nfs/zhifu.gzf/modelscope"
llm_dir = snapshot_download("qwen/Qwen2-7B-Instruct", cache_dir=None, revision="master")
audio_encoder_dir = snapshot_download("iic/SenseVoice", cache_dir=None, revision="master")
llm_dir = "/cpfs_speech/zhifu.gzf/init_model/qwen/Qwen2-7B-Instruct"
audio_encoder_dir = "/nfs/zhifu.gzf/init_model/SenseVoiceLargeModelscope"
# llm_dir = "/cpfs_speech/zhifu.gzf/init_model/qwen/Qwen2-7B-Instruct"
# audio_encoder_dir = "/nfs/zhifu.gzf/init_model/SenseVoiceLargeModelscope"
device = "cuda:0"
all_file_paths = [
"/nfs/zhifu.gzf/init_model/Speech2Text_Align_V0712_modelscope"
# "FunAudioLLM/Speech2Text_Align_V0712",
# "/nfs/zhifu.gzf/init_model/Speech2Text_Align_V0712_modelscope"
"FunAudioLLM/Speech2Text_Align_V0712",
# "FunAudioLLM/Speech2Text_Align_V0718",
# "FunAudioLLM/Speech2Text_Align_V0628",
]
@ -234,7 +221,7 @@ model_llm = AutoModel(
bf16=False,
llm_dtype="bf16",
max_length=1024,
llm_kwargs=llm_kwargs,
# llm_kwargs=llm_kwargs,
llm_conf={"init_param_path": llm_dir},
tokenizer_conf={"init_param_path": llm_dir},
audio_encoder=audio_encoder_dir,
@ -246,6 +233,7 @@ tokenizer = model_llm.kwargs["tokenizer"]
model_dict = {"model": model, "frontend": frontend, "tokenizer": tokenizer}
async def send_to_client(websocket, syntheszier, tts_fifo):
# Sending tts data to the client
while True:
@ -260,6 +248,8 @@ async def send_to_client(websocket, syntheszier, tts_fifo):
else:
print("WebSocket connection is not open or syntheszier is not started.")
break
async def model_inference(
websocket,
audio_in,
@ -271,7 +261,9 @@ async def model_inference(
text_usr="",
):
fifo_queue = deque()
synthesizer = NlsTtsSynthesizer(websocket=websocket, tts_fifo=fifo_queue, token="xxx", appkey="xxx")
synthesizer = NlsTtsSynthesizer(
websocket=websocket, tts_fifo=fifo_queue, token=appkey_token, appkey=appkey
)
synthesizer.start()
beg0 = time.time()
if his_state is None:
@ -506,7 +498,7 @@ async def async_vad(websocket, audio_in):
return speech_start, speech_end
if len(args.certfile) > 0:
if False: # len(args.certfile) > 0:
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
# Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions