diff --git a/egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py b/egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py index d3674a8ee..7399ee223 100644 --- a/egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py +++ b/egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py @@ -5,10 +5,10 @@ from modelscope.utils.constant import Tasks input = 'https://modelscope.cn/api/v1/models/damo/speech_separation_mossformer_8k_pytorch/repo?Revision=master&FilePath=examples/mix_speech1.wav' separation = pipeline( - Tasks.funasr_speech_separation, + Tasks.speech_separation, model='damo/speech_separation_mossformer_8k_pytorch', output_dir='./', - model_revision='v1.0.1') + model_revision='v1.0.2') result = separation(audio_in=input) for i, signal in enumerate(result): save_file = f'output_spk_{i+1}.wav' diff --git a/funasr/export/models/__init__.py b/funasr/export/models/__init__.py index cba92a865..94447dca3 100644 --- a/funasr/export/models/__init__.py +++ b/funasr/export/models/__init__.py @@ -25,8 +25,9 @@ def get_model(model, export_config=None): elif isinstance(model, BiCifParaformer): return BiCifParaformer_export(model, **export_config) elif isinstance(model, ParaformerOnline): - return (ParaformerOnline_encoder_predictor_export(model, model_name="model"), - ParaformerOnline_decoder_export(model, model_name="decoder")) + encoder = ParaformerOnline_encoder_predictor_export(model, model_name="model") + decoder = ParaformerOnline_decoder_export(model, model_name="decoder") + return [encoder, decoder] elif isinstance(model, Paraformer): return Paraformer_export(model, **export_config) elif isinstance(model, Conformer_export): diff --git a/funasr/runtime/html5/h5Server.py b/funasr/runtime/html5/h5Server.py index d0ecf27fd..96392f800 100644 --- a/funasr/runtime/html5/h5Server.py +++ b/funasr/runtime/html5/h5Server.py @@ -6,7 +6,7 @@ ### 2022-2023 by zhaoming,mali aihealthx.com -from flask import Flask,render_template,request,send_from_directory,jsonify +from flask import Flask,render_template,request,send_from_directory,jsonify,redirect,url_for #from gevent.pywsgi import WSGIServer import datetime @@ -20,7 +20,7 @@ app = Flask(__name__,static_folder='static',static_url_path="/static") @app.route('/') def homePage(): - return render_template('recorderapp_test.html') + return redirect('/static/index.html') parser = argparse.ArgumentParser() @@ -62,4 +62,4 @@ if __name__ == '__main__': #flask print("srv run on ",port) - app.run(debug=True,host=args.host,port=port, ssl_context=(args.certfile,args.keyfile)) + app.run(debug=False,threaded=True,host=args.host,port=port, ssl_context=(args.certfile,args.keyfile)) diff --git a/funasr/runtime/onnxruntime/src/funasrruntime.cpp b/funasr/runtime/onnxruntime/src/funasrruntime.cpp index 4946a221c..a10e3ecc7 100644 --- a/funasr/runtime/onnxruntime/src/funasrruntime.cpp +++ b/funasr/runtime/onnxruntime/src/funasrruntime.cpp @@ -390,7 +390,7 @@ extern "C" { // if (!audio->FfmpegLoad(sz_buf, n_len)) // return nullptr; LOG(ERROR) <<"Wrong wav_format: " << wav_format ; - exit(-1); + return nullptr; } funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; diff --git a/funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs b/funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs index a0039e7b3..cfdddeb28 100644 --- a/funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs +++ b/funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs @@ -19,11 +19,13 @@ namespace FunASRWSClient_Offline { public static string host = "0.0.0.0"; public static string port = "10095"; + public static string hotword = null; private static CWebSocketClient m_websocketclient = new CWebSocketClient(); [STAThread] public async void FunASR_Main() { loadconfig(); + loadhotword(); //初始化通信连接 string errorStatus = string.Empty; string commstatus = ClientConnTest(); @@ -72,6 +74,34 @@ namespace FunASRWSClient_Offline } } } + + } + static void loadhotword() + { + string filePath = "hotword.txt"; + try + { + // 使用 StreamReader 打开文本文件 + using (StreamReader sr = new StreamReader(filePath)) + { + string line; + // 逐行读取文件内容 + while ((line = sr.ReadLine()) != null) + { + hotword += line; + hotword += " "; + } + } + } + catch (Exception ex) + { + Console.WriteLine("读取文件时发生错误:" + ex.Message); + } + finally + { + if (hotword.Length > 0 && hotword[hotword.Length - 1] == ' ') + hotword = hotword.Substring(0,hotword.Length - 1); + } } private static string ClientConnTest() { diff --git a/funasr/runtime/wss-client/FunASRWSClient_Offline/README.md b/funasr/runtime/wss-client/FunASRWSClient_Offline/README.md index 3563560e6..8a5742c00 100644 --- a/funasr/runtime/wss-client/FunASRWSClient_Offline/README.md +++ b/funasr/runtime/wss-client/FunASRWSClient_Offline/README.md @@ -1,9 +1,11 @@ -# cshape-client-offline - -这是一个基于FunASR-Websocket服务器的CShape客户端,用于转录本地音频文件。 - -将配置文件放在与程序相同目录下的config文件夹中,并在config.ini中配置服务器ip地址和端口号。 - -配置好服务端ip和端口号,在vs中打开需添加Websocket.Client的Nuget程序包后,可直接进行测试,按照控制台提示操作即可。 - -注:本客户端暂支持wav文件,在win11下完成测试,编译环境VS2022。 \ No newline at end of file +# cshape-client-offline + +这是一个基于FunASR-Websocket服务器的CShape客户端,用于转录本地音频文件。 + +将配置文件放在与程序相同目录下的config文件夹中,并在config.ini中配置服务器ip地址和端口号。 + +配置好服务端ip和端口号,在vs中打开需添加Websocket.Client的Nuget程序包后,可直接进行测试,按照控制台提示操作即可。 + +更新:支持热词和时间戳,热词需将config文件夹下的hotword.txt放置在执行路径下。 + +注:运行后台须注意热词和时间戳为不同模型,本客户端在win11下完成测试,编译环境VS2022。 diff --git a/funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs b/funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs index 920852409..350aa20d3 100644 --- a/funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs +++ b/funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs @@ -2,6 +2,7 @@ using System.Text.Json; using System.Reactive.Linq; using FunASRWSClient_Offline; +using System.Text.RegularExpressions; namespace WebSocketSpace { @@ -45,15 +46,31 @@ namespace WebSocketSpace public async Task ClientSendFileFunc(string file_name)//文件转录 { + string fileExtension = Path.GetExtension(file_name); + fileExtension = fileExtension.Replace(".", ""); + if (!(fileExtension == "mp3" || fileExtension == "mp4" || fileExtension == "wav" || fileExtension == "pcm")) + return Task.CompletedTask; + try { if (client.IsRunning) { - var exitEvent = new ManualResetEvent(false); - string path = Path.GetFileName(file_name); - string firstbuff = string.Format("{{\"mode\": \"offline\", \"wav_name\": \"{0}\", \"is_speaking\": true}}", Path.GetFileName(file_name)); - client.Send(firstbuff); - showWAVForm(client, file_name); + if (fileExtension == "wav") + { + var exitEvent = new ManualResetEvent(false); + string path = Path.GetFileName(file_name); + string firstbuff = string.Format("{{\"mode\": \"offline\", \"wav_name\": \"{0}\", \"is_speaking\": true,\"hotwords\":\"{1}\"}}", Path.GetFileName(file_name), WSClient_Offline.hotword); + client.Send(firstbuff); + showWAVForm(client, file_name); + } + else + { + var exitEvent = new ManualResetEvent(false); + string path = Path.GetFileName(file_name); + string firstbuff = string.Format("{{\"mode\": \"offline\", \"wav_name\": \"{0}\", \"is_speaking\": true,\"hotwords\":\"{1}\", \"wav_format\":\"{2}\"}}", Path.GetFileName(file_name), WSClient_Offline.hotword, fileExtension); + client.Send(firstbuff); + showWAVForm_All(client, file_name); + } } } catch (Exception ex) @@ -69,15 +86,42 @@ namespace WebSocketSpace { try { + string timestamp = string.Empty; JsonDocument jsonDoc = JsonDocument.Parse(message); JsonElement root = jsonDoc.RootElement; string mode = root.GetProperty("mode").GetString(); - string text = root.GetProperty("text").GetString(); + string text = root.GetProperty("text").GetString(); string name = root.GetProperty("wav_name").GetString(); - if(name == "asr_stream") - Console.WriteLine($"实时识别内容: {text}"); + if (message.IndexOf("timestamp") != -1) + { + Console.WriteLine($"文件名称:{name}"); + //识别内容处理 + text = text.Replace(",", "。"); + text = text.Replace("?", "。"); + List sens = text.Split("。").ToList(); + //时间戳处理 + timestamp = root.GetProperty("timestamp").GetString(); + List> data = new List>(); + string pattern = @"\[(\d+),(\d+)\]"; + foreach (Match match in Regex.Matches(timestamp, pattern)) + { + int start = int.Parse(match.Groups[1].Value); + int end = int.Parse(match.Groups[2].Value); + data.Add(new List { start, end }); + } + int count = 0; + for (int i = 0; i< sens.Count; i++) + { + if (sens[i].Length == 0) + continue; + Console.WriteLine(string.Format($"[{data[count][0]}-{data[count + sens[i].Length - 1][1]}]:{sens[i]}")); + count += sens[i].Length; + } + } else - Console.WriteLine($"文件名称:{name} 文件转录内容: {text}"); + { + Console.WriteLine($"文件名称:{name} 文件转录内容: {text} 时间戳:{timestamp}"); + } } catch (JsonException ex) { @@ -100,6 +144,19 @@ namespace WebSocketSpace client.Send("{\"is_speaking\": false}"); } + private void showWAVForm_All(WebsocketClient client, string file_name) + { + byte[] getbyte = FileToByte(file_name).ToArray(); + for (int i = 0; i < getbyte.Length; i += 1024000) + { + byte[] send = getbyte.Skip(i).Take(1024000).ToArray(); + client.Send(send); + Thread.Sleep(5); + } + Thread.Sleep(10); + client.Send("{\"is_speaking\": false}"); + } + public byte[] FileToByte(string fileUrl) { try diff --git a/funasr/runtime/wss-client/confg/hotword.txt b/funasr/runtime/wss-client/confg/hotword.txt new file mode 100644 index 000000000..c5468ea77 --- /dev/null +++ b/funasr/runtime/wss-client/confg/hotword.txt @@ -0,0 +1,3 @@ +阿里巴巴 +达摩院 +FunASR \ No newline at end of file