From a6b790bffc56b27932c9a9e985bb71c437372822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Mon, 8 May 2023 00:11:30 +0800 Subject: [PATCH 1/2] websocket offline --- funasr/runtime/python/websocket/README.md | 4 ++-- funasr/runtime/python/websocket/ws_client.py | 23 ++++++++++++++++--- .../python/websocket/ws_server_offline.py | 6 +++-- .../python/websocket/ws_server_online.py | 8 +++---- 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/funasr/runtime/python/websocket/README.md b/funasr/runtime/python/websocket/README.md index 76405eaf5..473c37a2c 100644 --- a/funasr/runtime/python/websocket/README.md +++ b/funasr/runtime/python/websocket/README.md @@ -58,7 +58,7 @@ python ws_client.py --host "0.0.0.0" --port 10095 --chunk_interval 10 --words_ma ##### Loadding from wav.scp(kaldi style) ```shell # --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms -python ws_client.py --host "0.0.0.0" --port 10095 --chunk_interval 10 --words_max_print 100 --audio_in "./data/wav.scp" --send_without_sleep +python ws_client.py --host "0.0.0.0" --port 10095 --chunk_interval 10 --words_max_print 100 --audio_in "./data/wav.scp" --send_without_sleep --output_dir "./results" ``` #### ASR streaming client ##### Recording from mircrophone @@ -69,7 +69,7 @@ python ws_client.py --host "0.0.0.0" --port 10095 --chunk_size "5,10,5" --words_ ##### Loadding from wav.scp(kaldi style) ```shell # --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms -python ws_client.py --host "0.0.0.0" --port 10095 --chunk_size "5,10,5" --audio_in "./data/wav.scp" --words_max_print 100 +python ws_client.py --host "0.0.0.0" --port 10095 --chunk_size "5,10,5" --audio_in "./data/wav.scp" --words_max_print 100 --output_dir "./results" ``` #### ASR offline/online 2pass client diff --git a/funasr/runtime/python/websocket/ws_client.py b/funasr/runtime/python/websocket/ws_client.py index d8bbb6596..bbc49b104 100644 --- a/funasr/runtime/python/websocket/ws_client.py +++ b/funasr/runtime/python/websocket/ws_client.py @@ -7,7 +7,9 @@ import asyncio import argparse import json import traceback -from multiprocessing import Process +from multiprocessing import Process +from funasr.fileio.datadir_writer import DatadirWriter + parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, @@ -43,6 +45,10 @@ parser.add_argument("--words_max_print", type=int, default=100, help="chunk") +parser.add_argument("--output_dir", + type=str, + default=None, + help="output_dir") args = parser.parse_args() args.chunk_size = [int(x) for x in args.chunk_size.split(",")] @@ -51,6 +57,11 @@ print(args) from queue import Queue voices = Queue() +ibest_writer = None +if args.output_dir is not None: + writer = DatadirWriter(args.output_dir) + ibest_writer = writer[f"1best_recog"] + async def record_microphone(): is_finished = False import pyaudio @@ -91,7 +102,9 @@ async def record_from_scp(): wavs = [args.audio_in] for wav in wavs: wav_splits = wav.strip().split() + wav_name = wav_splits[0] if len(wav_splits) > 1 else "demo" wav_path = wav_splits[1] if len(wav_splits) > 1 else wav_splits[0] + # bytes_f = open(wav_path, "rb") # bytes_data = bytes_f.read() with wave.open(wav_path, "rb") as wav_file: @@ -112,7 +125,7 @@ async def record_from_scp(): beg = i*stride data = audio_bytes[beg:beg+stride] data = data.decode('ISO-8859-1') - message = json.dumps({"chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval, "is_speaking": is_speaking, "audio": data, "is_finished": is_finished}) + message = json.dumps({"chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval, "is_speaking": is_speaking, "audio": data, "is_finished": is_finished, "wav_name": wav_name}) voices.put(message) # print("data_chunk: ", len(data_chunk)) # print(voices.qsize()) @@ -152,14 +165,18 @@ async def message(id): # print(meg, end = '') # print("\r") # print(meg) + wav_name = meg.get("wav_name", "demo") + print(wav_name) text = meg["text"][0] + if ibest_writer is not None: + ibest_writer["text"][wav_name] = text if meg["mode"] == "online": text_print += " {}".format(text) else: text_print += "{}".format(text) text_print = text_print[-args.words_max_print:] os.system('clear') - print("\r"+str(id)+":"+text_print) + print("\rpid"+str(id)+": "+text_print) except Exception as e: print("Exception:", e) traceback.print_exc() diff --git a/funasr/runtime/python/websocket/ws_server_offline.py b/funasr/runtime/python/websocket/ws_server_offline.py index 787391871..c60ea6f16 100644 --- a/funasr/runtime/python/websocket/ws_server_offline.py +++ b/funasr/runtime/python/websocket/ws_server_offline.py @@ -78,6 +78,7 @@ async def ws_serve(websocket, path): is_speaking = message["is_speaking"] websocket.param_dict_vad["is_final"] = not is_speaking + websocket.wav_name = message.get("wav_name", "demo") if speech_start: frames_asr.append(audio) speech_start_i, speech_end_i = await async_vad(websocket, audio) @@ -136,8 +137,9 @@ async def async_asr(websocket, audio_in): rec_result = inference_pipeline_punc(text_in=rec_result['text'], param_dict=websocket.param_dict_punc) # print(rec_result) - message = json.dumps({"mode": "offline", "text": [rec_result["text"]]}) - await websocket.send(message) + message = json.dumps({"mode": "offline", "text": [rec_result["text"]], "wav_name": websocket.wav_name}) + await websocket.send(message) + diff --git a/funasr/runtime/python/websocket/ws_server_online.py b/funasr/runtime/python/websocket/ws_server_online.py index 6ea8f397a..b1cd4eaea 100644 --- a/funasr/runtime/python/websocket/ws_server_online.py +++ b/funasr/runtime/python/websocket/ws_server_online.py @@ -53,7 +53,7 @@ async def ws_serve(websocket, path): is_speaking = message["is_speaking"] websocket.param_dict_asr_online["is_final"] = not is_speaking - + websocket.wav_name = message.get("wav_name", "demo") websocket.param_dict_asr_online["chunk_size"] = message["chunk_size"] frames_online.append(audio) @@ -81,9 +81,9 @@ async def async_asr_online(websocket,audio_in): websocket.param_dict_asr_online["cache"] = dict() if "text" in rec_result: if rec_result["text"] != "sil" and rec_result["text"] != "waiting_for_more_voice": - if len(rec_result["text"])>0: - rec_result["text"][0]=rec_result["text"][0] #.replace(" ","") - message = json.dumps({"mode": "online", "text": rec_result["text"]}) + # if len(rec_result["text"])>0: + # rec_result["text"][0]=rec_result["text"][0] #.replace(" ","") + message = json.dumps({"mode": "online", "text": rec_result["text"], "wav_name": websocket.wav_name}) await websocket.send(message) From 63f1496cbb9d1cf5893a77489a9c8e5ef589ccf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Mon, 8 May 2023 00:13:17 +0800 Subject: [PATCH 2/2] v0.4.6 --- funasr/version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/funasr/version.txt b/funasr/version.txt index 0bfccb080..ef52a6480 100644 --- a/funasr/version.txt +++ b/funasr/version.txt @@ -1 +1 @@ -0.4.5 +0.4.6