diff --git a/funasr/runtime/python/websocket/README.md b/funasr/runtime/python/websocket/README.md index ed447950f..7653cf58e 100644 --- a/funasr/runtime/python/websocket/README.md +++ b/funasr/runtime/python/websocket/README.md @@ -36,7 +36,7 @@ python wss_srv_asr.py \ ``` ##### Usage examples ```shell -python wss_srv_asr.py --port 10095 --asr_model "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" --asr_model_online "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online" +python wss_srv_asr.py --port 10095 ``` ## For the client @@ -59,9 +59,8 @@ python wss_client_asr.py \ --words_max_print [max number of words to print] \ --audio_in [if set, loadding from wav.scp, else recording from mircrophone] \ --output_dir [if set, write the results to output_dir] \ ---send_without_sleep [only set for offline] \ ---ssl [1 for wss connect, 0 for ws, default is 1] \ --mode [`online` for streaming asr, `offline` for non-streaming, `2pass` for unifying streaming and non-streaming asr] \ +--thread_num [thread_num for send data] ``` #### Usage examples @@ -69,19 +68,19 @@ python wss_client_asr.py \ Recording from mircrophone ```shell # --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms -python wss_client_asr.py --host "0.0.0.0" --port 10095 --mode offline --chunk_interval 10 --words_max_print 100 +python wss_client_asr.py --host "0.0.0.0" --port 10095 --mode offline ``` Loadding from wav.scp(kaldi style) ```shell # --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms -python wss_client_asr.py --host "0.0.0.0" --port 10095 --mode offline --chunk_interval 10 --words_max_print 100 --audio_in "./data/wav.scp" --output_dir "./results" +python wss_client_asr.py --host "0.0.0.0" --port 10095 --mode offline --audio_in "./data/wav.scp" --output_dir "./results" ``` ##### ASR streaming client Recording from mircrophone ```shell # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms -python wss_client_asr.py --host "0.0.0.0" --port 10095 --mode online --chunk_size "5,10,5" --words_max_print 100 +python wss_client_asr.py --host "0.0.0.0" --port 10095 --mode online --chunk_size "5,10,5" ``` Loadding from wav.scp(kaldi style) ```shell diff --git a/funasr/runtime/python/websocket/parse_args.py b/funasr/runtime/python/websocket/parse_args.py deleted file mode 100644 index ffecff7c2..000000000 --- a/funasr/runtime/python/websocket/parse_args.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- encoding: utf-8 -*- -import argparse -parser = argparse.ArgumentParser() -parser.add_argument("--host", - type=str, - default="0.0.0.0", - required=False, - help="host ip, localhost, 0.0.0.0") -parser.add_argument("--port", - type=int, - default=10095, - required=False, - help="grpc server port") -parser.add_argument("--asr_model", - type=str, - default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", - help="model from modelscope") -parser.add_argument("--asr_model_online", - type=str, - default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online", - help="model from modelscope") -parser.add_argument("--vad_model", - type=str, - default="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", - help="model from modelscope") -parser.add_argument("--punc_model", - type=str, - default="damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727", - help="model from modelscope") -parser.add_argument("--ngpu", - type=int, - default=1, - help="0 for cpu, 1 for gpu") -parser.add_argument("--ncpu", - type=int, - default=4, - help="cpu cores") -parser.add_argument("--certfile", - type=str, - default="./ssl_key/server.crt", - required=False, - help="certfile for ssl") - -parser.add_argument("--keyfile", - type=str, - default="./ssl_key/server.key", - required=False, - help="keyfile for ssl") -args = parser.parse_args() -print(args) \ No newline at end of file diff --git a/funasr/runtime/python/websocket/wss_client_asr.py b/funasr/runtime/python/websocket/wss_client_asr.py index 0f1d1d034..dcd9576f4 100644 --- a/funasr/runtime/python/websocket/wss_client_asr.py +++ b/funasr/runtime/python/websocket/wss_client_asr.py @@ -42,10 +42,10 @@ parser.add_argument("--send_without_sleep", action="store_true", default=True, help="if audio_in is set, send_without_sleep") -parser.add_argument("--test_thread_num", +parser.add_argument("--thread_num", type=int, default=1, - help="test_thread_num") + help="thread_num") parser.add_argument("--words_max_print", type=int, default=10000, @@ -311,16 +311,16 @@ if __name__ == '__main__': f'Not supported audio type: {audio_type}') total_len = len(wavs) - if total_len >= args.test_thread_num: - chunk_size = int(total_len / args.test_thread_num) - remain_wavs = total_len - chunk_size * args.test_thread_num + if total_len >= args.thread_num: + chunk_size = int(total_len / args.thread_num) + remain_wavs = total_len - chunk_size * args.thread_num else: chunk_size = 1 remain_wavs = 0 process_list = [] chunk_begin = 0 - for i in range(args.test_thread_num): + for i in range(args.thread_num): now_chunk_size = chunk_size if remain_wavs > 0: now_chunk_size = chunk_size + 1 diff --git a/funasr/runtime/python/websocket/wss_srv_asr.py b/funasr/runtime/python/websocket/wss_srv_asr.py index 09f2305f7..fd039aead 100644 --- a/funasr/runtime/python/websocket/wss_srv_asr.py +++ b/funasr/runtime/python/websocket/wss_srv_asr.py @@ -5,8 +5,8 @@ import time import logging import tracemalloc import numpy as np +import argparse import ssl -from parse_args import args from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger @@ -17,6 +17,54 @@ tracemalloc.start() logger = get_logger(log_level=logging.CRITICAL) logger.setLevel(logging.CRITICAL) +parser = argparse.ArgumentParser() +parser.add_argument("--host", + type=str, + default="0.0.0.0", + required=False, + help="host ip, localhost, 0.0.0.0") +parser.add_argument("--port", + type=int, + default=10095, + required=False, + help="grpc server port") +parser.add_argument("--asr_model", + type=str, + default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", + help="model from modelscope") +parser.add_argument("--asr_model_online", + type=str, + default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online", + help="model from modelscope") +parser.add_argument("--vad_model", + type=str, + default="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", + help="model from modelscope") +parser.add_argument("--punc_model", + type=str, + default="damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727", + help="model from modelscope") +parser.add_argument("--ngpu", + type=int, + default=1, + help="0 for cpu, 1 for gpu") +parser.add_argument("--ncpu", + type=int, + default=4, + help="cpu cores") +parser.add_argument("--certfile", + type=str, + default="./ssl_key/server.crt", + required=False, + help="certfile for ssl") + +parser.add_argument("--keyfile", + type=str, + default="./ssl_key/server.key", + required=False, + help="keyfile for ssl") +args = parser.parse_args() + websocket_users = set()