diff --git a/README.md b/README.md
index e9c6ef9bb..996cde4b6 100644
--- a/README.md
+++ b/README.md
@@ -75,8 +75,8 @@ If you have any questions about FunASR, please contact us by
## Contributors
-|
| |
-|:---------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------:|:-------------------------------------------------------:|:-----------------------------------------------------------:|
+| | | |
+|:---------------------------------------------------------------:|:---------------------------------------------------------------:|:--------------------------------------------------------------:|:-------------------------------------------------------:|:-----------------------------------------------------------:|:-----------------------------------------------------------:|
## Acknowledge
@@ -86,6 +86,7 @@ If you have any questions about FunASR, please contact us by
4. We acknowledge [ChinaTelecom](https://github.com/zhuzizyf/damo-fsmn-vad-infer-httpserver) for contributing the VAD runtime.
5. We acknowledge [RapidAI](https://github.com/RapidAI) for contributing the Paraformer and CT_Transformer-punc runtime.
6. We acknowledge [DeepScience](https://www.deepscience.cn) for contributing the grpc service.
+6. We acknowledge [AiHealthx](http://www.aihealthx.com/) for contributing the websocket service and html5.
## License
This project is licensed under the [The MIT License](https://opensource.org/licenses/MIT). FunASR also contains various third-party components and some code modified from other repos under other open source licenses.
diff --git a/docs/images/aihealthx.png b/docs/images/aihealthx.png
new file mode 100644
index 000000000..5727c5bd6
Binary files /dev/null and b/docs/images/aihealthx.png differ
diff --git a/docs/index.rst b/docs/index.rst
index c2656bded..cb98f3573 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -68,10 +68,12 @@ Overview
./runtime/onnxruntime_python.md
./runtime/onnxruntime_cpp.md
./runtime/libtorch_python.md
- ./runtime/grpc_python.md
- ./runtime/grpc_cpp.md
+ ./runtime/html5.md
./runtime/websocket_python.md
./runtime/websocket_cpp.md
+ ./runtime/grpc_python.md
+ ./runtime/grpc_cpp.md
+
.. toctree::
:maxdepth: 1
diff --git a/docs/runtime/html5.md b/docs/runtime/html5.md
new file mode 120000
index 000000000..bf47840ed
--- /dev/null
+++ b/docs/runtime/html5.md
@@ -0,0 +1 @@
+../../funasr/runtime/html5/readme.md
\ No newline at end of file
diff --git a/funasr/runtime/html5/demo.gif b/funasr/runtime/html5/demo.gif
new file mode 100644
index 000000000..f487f2c66
Binary files /dev/null and b/funasr/runtime/html5/demo.gif differ
diff --git a/funasr/runtime/html5/h5Server.py b/funasr/runtime/html5/h5Server.py
new file mode 100644
index 000000000..fa794301c
--- /dev/null
+++ b/funasr/runtime/html5/h5Server.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+###
+### Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
+### Reserved. MIT License (https://opensource.org/licenses/MIT)
+###
+### 2022-2023 by zhaoming,mali aihealthx.com
+
+
+from flask import Flask,render_template,request,send_from_directory,jsonify
+#from gevent.pywsgi import WSGIServer
+
+import datetime
+import random
+import string
+import time
+import argparse
+
+
+app = Flask(__name__,static_folder='static',static_url_path="/static")
+
+@app.route('/')
+def homePage():
+ return render_template('recorderapp_test.html')
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--host",
+ type=str,
+ default="0.0.0.0",
+ required=False,
+ help="host ip, localhost, 0.0.0.0")
+parser.add_argument("--port",
+ type=int,
+ default=1337,
+ required=False,
+ help="html5 server port")
+
+parser.add_argument("--certfile",
+ type=str,
+ default="server.crt",
+ required=False,
+ help="certfile for ssl")
+
+parser.add_argument("--keyfile",
+ type=str,
+ default="server.key",
+ required=False,
+ help="keyfile for ssl")
+
+if __name__ == '__main__':
+ args = parser.parse_args()
+ port=args.port
+
+ #WSGIServer
+ #ssl = {
+ # 'certfile': 'server.crt',
+ # 'keyfile': 'server.key'
+ #}
+ #httpsServer = WSGIServer(("0.0.0.0",port), app, **ssl)
+ #httpsServer.serve_forever()
+
+ #flask
+ print("srv run on ",port)
+
+ app.run(debug=True,host=args.host,port=port, ssl_context=(args.certfile,args.keyfile))
diff --git a/funasr/runtime/html5/readme.md b/funasr/runtime/html5/readme.md
new file mode 100644
index 000000000..5dd462b81
--- /dev/null
+++ b/funasr/runtime/html5/readme.md
@@ -0,0 +1,109 @@
+# online asr demo for html5
+
+## requirement
+### python
+```shell
+flask
+gevent
+pyOpenSSL
+```
+
+### javascript
+[html5 recorder.js](https://github.com/xiangyuecn/Recorder)
+```shell
+Recorder
+```
+
+### demo
+
+
+## wss or ws protocol for ws_server_online
+1) wss: browser microphone data --> html5 demo server --> js wss api --> wss asr online srv #for certificate generation just look back
+
+2) ws: browser microphone data --> html5 demo server --> js wss api --> nginx wss server --> ws asr online srv
+
+## 1.html5 demo start
+### ssl certificate is required
+
+```shell
+usage: h5Server.py [-h] [--host HOST] [--port PORT] [--certfile CERTFILE]
+ [--keyfile KEYFILE]
+python h5Server.py --port 1337
+```
+## 2.asr online srv start
+[detail for online asr](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/websocket)
+Online asr provides wss or ws way. if started in ws way, nginx is required for relay.
+### wss way, ssl certificate is required
+```shell
+python ws_server_online.py --certfile server.crt --keyfile server.key --port 5921
+```
+### ws way
+```shell
+python ws_server_online.py --port 5921
+```
+## 3.modify asr address in wsconnecter.js according to your environment
+asr address in wsconnecter.js must be wss, just like
+var Uri = "wss://xxx:xxx/"
+
+## 4.open browser to access html5 demo
+https://youraddress:port/static/index.html
+
+
+
+
+## certificate generation by yourself
+generated certificate may not suitable for all browsers due to security concerns. you'd better buy or download an authenticated ssl certificate from authorized agency.
+
+```shell
+### 1) Generate a private key
+openssl genrsa -des3 -out server.key 1024
+
+### 2) Generate a csr file
+openssl req -new -key server.key -out server.csr
+
+### 3) Remove pass
+cp server.key server.key.org
+openssl rsa -in server.key.org -out server.key
+
+### 4) Generated a crt file, valid for 1 year
+openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt
+```
+
+## nginx configuration (you can skip it if you known)
+https and wss protocol are required by browsers when want to open microphone and websocket.
+if [online asr](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/websocket) run in ws way, you should use nginx to convert wss to ws.
+### nginx wss->ws configuration example
+```shell
+events { [0/1548]
+ worker_connections 1024;
+ accept_mutex on;
+ }
+http {
+ error_log error.log;
+ access_log access.log;
+ server {
+
+ listen 5921 ssl http2; # nginx listen port for wss
+ server_name www.test.com;
+
+ ssl_certificate /funasr/server.crt;
+ ssl_certificate_key /funasr/server.key;
+ ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
+ ssl_ciphers HIGH:!aNULL:!MD5;
+
+ location /wss/ {
+
+
+ proxy_pass http://127.0.0.1:1111/; # asr online model ws address and port
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+ proxy_read_timeout 600s;
+
+ }
+ }
+```
+
+## Acknowledge
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
+2. We acknowledge [AiHealthx](http://www.aihealthx.com/) for contributing the html5 demo.
\ No newline at end of file
diff --git a/funasr/runtime/html5/readme_cn.md b/funasr/runtime/html5/readme_cn.md
new file mode 100644
index 000000000..612dc2064
--- /dev/null
+++ b/funasr/runtime/html5/readme_cn.md
@@ -0,0 +1,109 @@
+# online asr demo for html5
+
+## requirement
+### python
+```shell
+flask
+gevent
+pyOpenSSL
+```
+
+### javascript
+[html5录音](https://github.com/xiangyuecn/Recorder)
+```shell
+Recorder
+```
+
+### demo页面如下
+
+
+## 两种ws_server_online连接模式
+### 1)直接连接模式,浏览器https麦克风 --> html5 demo服务 --> js wss接口 --> wss asr online srv(证书生成请往后看)
+
+### 2)nginx中转,浏览器https麦克风 --> html5 demo服务 --> js wss接口 --> nginx服务 --> ws asr online srv
+
+## 1.html5 demo服务启动
+### 启动html5服务,需要ssl证书(自己生成请往后看)
+
+```shell
+usage: h5Server.py [-h] [--host HOST] [--port PORT] [--certfile CERTFILE]
+ [--keyfile KEYFILE]
+python h5Server.py --port 1337
+```
+## 2.启动ws or wss asr online srv
+[具体请看online asr](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/websocket)
+online asr提供两种ws和wss模式,wss模式可以直接启动,无需nginx中转。否则需要通过nginx将wss转发到该online asr的ws端口上
+### wss方式
+```shell
+python ws_server_online.py --certfile server.crt --keyfile server.key --port 5921
+```
+### ws方式
+```shell
+python ws_server_online.py --port 5921
+```
+## 3.修改wsconnecter.js里asr接口地址
+wsconnecter.js里配置online asr服务地址路径,这里配置的是wss端口
+var Uri = "wss://xxx:xxx/"
+
+## 4.浏览器打开地址测试
+https://127.0.0.1:1337/static/index.html
+
+
+
+
+## 自行生成证书
+生成证书(注意这种证书并不能被所有浏览器认可,部分手动授权可以访问,最好使用其他认证的官方ssl证书)
+
+```shell
+### 1)生成私钥,按照提示填写内容
+openssl genrsa -des3 -out server.key 1024
+
+### 2)生成csr文件 ,按照提示填写内容
+openssl req -new -key server.key -out server.csr
+
+### 去掉pass
+cp server.key server.key.org
+openssl rsa -in server.key.org -out server.key
+
+### 生成crt文件,有效期1年(365天)
+openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt
+```
+
+## nginx配置说明(了解的可以跳过)
+h5打开麦克风需要https协议,同时后端的asr websocket也必须是wss协议,如果[online asr](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/websocket)以ws方式运行,我们可以通过nginx配置实现wss协议到ws协议的转换。
+
+### nginx转发配置示例
+```shell
+events { [0/1548]
+ worker_connections 1024;
+ accept_mutex on;
+ }
+http {
+ error_log error.log;
+ access_log access.log;
+ server {
+
+ listen 5921 ssl http2; # nginx listen port for wss
+ server_name www.test.com;
+
+ ssl_certificate /funasr/server.crt;
+ ssl_certificate_key /funasr/server.key;
+ ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
+ ssl_ciphers HIGH:!aNULL:!MD5;
+
+ location /wss/ {
+
+
+ proxy_pass http://127.0.0.1:1111/; # asr online model ws address and port
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+ proxy_read_timeout 600s;
+
+ }
+ }
+```
+
+## Acknowledge
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
+2. We acknowledge [AiHealthx](http://www.aihealthx.com/) for contributing the html5 demo.
\ No newline at end of file
diff --git a/funasr/runtime/html5/requirement.txt b/funasr/runtime/html5/requirement.txt
new file mode 100644
index 000000000..7f8721ac9
--- /dev/null
+++ b/funasr/runtime/html5/requirement.txt
@@ -0,0 +1,3 @@
+flask
+gevent
+pyOpenSSL
diff --git a/funasr/runtime/html5/server.crt b/funasr/runtime/html5/server.crt
new file mode 100644
index 000000000..808b73e6e
--- /dev/null
+++ b/funasr/runtime/html5/server.crt
@@ -0,0 +1,15 @@
+-----BEGIN CERTIFICATE-----
+MIICSDCCAbECFCObiVAMkMlCGmMDGDFx5Nx3XYvOMA0GCSqGSIb3DQEBCwUAMGMx
+CzAJBgNVBAYTAkNOMRAwDgYDVQQIDAdCZWlqaW5nMRAwDgYDVQQHDAdCZWlqaW5n
+MRAwDgYDVQQKDAdhbGliYWJhMQwwCgYDVQQLDANhc3IxEDAOBgNVBAMMB2FsaWJh
+YmEwHhcNMjMwNTEyMTQzNjAxWhcNMjQwNTExMTQzNjAxWjBjMQswCQYDVQQGEwJD
+TjEQMA4GA1UECAwHQmVpamluZzEQMA4GA1UEBwwHQmVpamluZzEQMA4GA1UECgwH
+YWxpYmFiYTEMMAoGA1UECwwDYXNyMRAwDgYDVQQDDAdhbGliYWJhMIGfMA0GCSqG
+SIb3DQEBAQUAA4GNADCBiQKBgQDEINLLMasJtJQPoesCfcwJsjiUkx3hLnoUyETS
+NBrrRfjbBv6ucAgZIF+/V15IfJZR6u2ULpJN0wUg8xNQReu4kdpjSdNGuQ0aoWbc
+38+VLo9UjjsoOeoeCro6b0u+GosPoEuI4t7Ky09zw+FBibD95daJ3GDY1DGCbDdL
+mV/toQIDAQABMA0GCSqGSIb3DQEBCwUAA4GBAB5KNWF1XIIYD1geMsyT6/ZRnGNA
+dmeUyMcwYvIlQG3boSipNk/JI4W5fFOg1O2sAqflYHmwZfmasAQsC2e5bSzHZ+PB
+uMJhKYxfj81p175GumHTw5Lbp2CvFSLrnuVB0ThRdcCqEh1MDt0D3QBuBr/ZKgGS
+hXtozVCgkSJzX6uD
+-----END CERTIFICATE-----
diff --git a/funasr/runtime/html5/server.key b/funasr/runtime/html5/server.key
new file mode 100644
index 000000000..aac8b2646
--- /dev/null
+++ b/funasr/runtime/html5/server.key
@@ -0,0 +1,15 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIICXQIBAAKBgQDEINLLMasJtJQPoesCfcwJsjiUkx3hLnoUyETSNBrrRfjbBv6u
+cAgZIF+/V15IfJZR6u2ULpJN0wUg8xNQReu4kdpjSdNGuQ0aoWbc38+VLo9Ujjso
+OeoeCro6b0u+GosPoEuI4t7Ky09zw+FBibD95daJ3GDY1DGCbDdLmV/toQIDAQAB
+AoGARpA0pwygp+ZDWvh7kDLoZRitCK+BkZHiNHX1ZNeAU+Oh7FOw79u43ilqqXHq
+pxPEFYb7oVO8Kanhb4BlE32EmApBlvhd3SW07kn0dS7WVGsTvPFwKKpF88W8E+pc
+2i8At5tr2O1DZhvqNdIN7r8FRrGQ/Hpm3ItypUdz2lZnMwECQQD3dILOMJ84O2JE
+NxUwk8iOYefMJftQUO57Gm7XBVke/i3r9uajSqB2xmOvUaSyaHoJfx/mmfgfxYcD
+M+Re6mERAkEAyuaV5+eD82eG2I8PgxJ2p5SOb1x5F5qpb4KuKAlfHEkdolttMwN3
+7vl1ZWUZLVu2rHnUmvbYV2gkQO1os7/DkQJBAIDYfbN2xbC12vjB5ZqhmG/qspMt
+w6mSOlqG7OewtTLaDncq2/RySxMNQaJr1GHA3KpNMwMTcIq6gw472tFBIMECQF0z
+fjiASEROkcp4LI/ws0BXJPZSa+1DxgDK7mTFqUK88zfY91gvh6/mNt7UibQkJM0l
+SVvFd6ru03hflXC77YECQQDDQrB9ApwVOMGQw+pwbxn9p8tPYVi3oBiUfYgd1RDO
+uhcRgxv7gT4BSiyI4nFBMCYyI28azTLlUiJhMr9MNUpB
+-----END RSA PRIVATE KEY-----
diff --git a/funasr/runtime/html5/static/index.html b/funasr/runtime/html5/static/index.html
new file mode 100644
index 000000000..1cdc7d7d3
--- /dev/null
+++ b/funasr/runtime/html5/static/index.html
@@ -0,0 +1,43 @@
+
+
+
+
+
+ 语音识别
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 语音识别结果显示:
+
+
+
+
+
请点击开始
+
+ 开始
+ 停止
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/funasr/runtime/html5/static/main.js b/funasr/runtime/html5/static/main.js
new file mode 100644
index 000000000..5f6bb108b
--- /dev/null
+++ b/funasr/runtime/html5/static/main.js
@@ -0,0 +1,167 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
+ * Reserved. MIT License (https://opensource.org/licenses/MIT)
+ */
+/* 2022-2023 by zhaoming,mali aihealthx.com */
+
+
+// 连接; 定义socket连接类对象与语音对象
+var wsconnecter = new WebSocketConnectMethod({msgHandle:getJsonMessage,stateHandle:getConnState});
+var audioBlob;
+
+// 录音; 定义录音对象,wav格式
+var rec = Recorder({
+ type:"pcm",
+ bitRate:16,
+ sampleRate:16000,
+ onProcess:recProcess
+});
+
+
+
+
+var sampleBuf=new Int16Array();
+// 定义按钮响应事件
+var btnStart = document.getElementById('btnStart');
+btnStart.onclick = start;
+var btnStop = document.getElementById('btnStop');
+btnStop.onclick = stop;
+btnStop.disabled = true;
+
+
+
+var rec_text=""
+var info_div = document.getElementById('info_div');
+
+
+// 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中
+function getJsonMessage( jsonMsg ) {
+ console.log( "message: " + JSON.parse(jsonMsg.data)['text'] );
+ var rectxt=""+JSON.parse(jsonMsg.data)['text'];
+ var varArea=document.getElementById('varArea');
+ rec_text=rec_text+rectxt.replace(/ +/g,"");
+ varArea.value=rec_text;
+
+
+}
+
+// 连接状态响应
+function getConnState( connState ) {
+ if ( connState === 0 ) {
+
+ rec.open( function(){
+ rec.start();
+ console.log("开始录音");
+
+ });
+ } else if ( connState === 1 ) {
+ //stop();
+ } else if ( connState === 2 ) {
+ stop();
+ console.log( 'connecttion error' );
+ setTimeout(function(){btnStart.disabled = true;info_div.innerHTML='connecttion error';}, 4000 );
+ }
+}
+
+
+// 识别启动、停止、清空操作
+function start() {
+ info_div.innerHTML="正在连接asr服务器,请等待...";
+ // 清除显示
+ clear();
+ //控件状态更新
+
+ isRec = true;
+ btnStart.disabled = true;
+ btnStop.disabled = false;
+ //启动连接
+ wsconnecter.wsStart();
+}
+
+
+function stop() {
+ var chunk_size = new Array( 5, 10, 5 );
+ var request = {
+ "chunk_size": chunk_size,
+ "wav_name": "h5",
+ "is_speaking": false,
+ "chunk_interval":10,
+ };
+ if(sampleBuf.length>0){
+ wsconnecter.wsSend(sampleBuf,false);
+ console.log("sampleBuf.length"+sampleBuf.length);
+ sampleBuf=new Int16Array();
+ }
+ wsconnecter.wsSend( JSON.stringify(request) ,false);
+
+
+
+
+
+
+ // 控件状态更新
+ isRec = false;
+ info_div.innerHTML="请等候...";
+ btnStop.disabled = true;
+ setTimeout(function(){btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
+ rec.stop(function(blob,duration){
+
+ console.log(blob);
+ var audioBlob = Recorder.pcm2wav(data = {sampleRate:16000, bitRate:16, blob:blob},
+ function(theblob,duration){
+ console.log(theblob);
+ var audio_record = document.getElementById('audio_record');
+ audio_record.src = (window.URL||webkitURL).createObjectURL(theblob);
+ audio_record.controls=true;
+ audio_record.play();
+
+
+ } ,function(msg){
+ console.log(msg);
+ }
+ );
+
+
+
+ },function(errMsg){
+ console.log("errMsg: " + errMsg);
+ });
+ // 停止连接
+
+
+
+}
+
+function clear() {
+
+ var varArea=document.getElementById('varArea');
+
+ varArea.value="";
+ rec_text="";
+
+}
+
+
+function recProcess( buffer, powerLevel, bufferDuration, bufferSampleRate,newBufferIdx,asyncEnd ) {
+ if ( isRec === true ) {
+ var data_48k = buffer[buffer.length-1];
+
+ var array_48k = new Array(data_48k);
+ var data_16k=Recorder.SampleData(array_48k,bufferSampleRate,16000).data;
+
+ sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]);
+ var chunk_size=960; // for asr chunk_size [5, 10, 5]
+ info_div.innerHTML=""+bufferDuration/1000+"s";
+ while(sampleBuf.length>=chunk_size){
+ sendBuf=sampleBuf.slice(0,chunk_size);
+ sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length);
+ wsconnecter.wsSend(sendBuf,false);
+
+
+
+ }
+
+
+
+ }
+}
\ No newline at end of file
diff --git a/funasr/runtime/html5/static/pcm.js b/funasr/runtime/html5/static/pcm.js
new file mode 100644
index 000000000..51c1efe2e
--- /dev/null
+++ b/funasr/runtime/html5/static/pcm.js
@@ -0,0 +1,96 @@
+/*
+pcm编码器+编码引擎
+https://github.com/xiangyuecn/Recorder
+
+编码原理:本编码器输出的pcm格式数据其实就是Recorder中的buffers原始数据(经过了重新采样),16位时为LE小端模式(Little Endian),并未经过任何编码处理
+
+编码的代码和wav.js区别不大,pcm加上一个44字节wav头即成wav文件;所以要播放pcm就很简单了,直接转成wav文件来播放,已提供转换函数 Recorder.pcm2wav
+*/
+(function(){
+"use strict";
+
+Recorder.prototype.enc_pcm={
+ stable:true
+ ,testmsg:"pcm为未封装的原始音频数据,pcm数据文件无法直接播放;支持位数8位、16位(填在比特率里面),采样率取值无限制"
+};
+Recorder.prototype.pcm=function(res,True,False){
+ var This=this,set=This.set
+ ,size=res.length
+ ,bitRate=set.bitRate==8?8:16;
+
+ var buffer=new ArrayBuffer(size*(bitRate/8));
+ var data=new DataView(buffer);
+ var offset=0;
+
+ // 写入采样数据
+ if(bitRate==8) {
+ for(var i=0;i>8)+128;
+ data.setInt8(offset,val,true);
+ };
+ }else{
+ for (var i=0;i=pcmSampleRate时不会进行任何处理,小于时会进行重新采样
+prevChunkInfo:{} 可选,上次调用时的返回值,用于连续转换,本次调用将从上次结束位置开始进行处理。或可自行定义一个ChunkInfo从pcmDatas指定的位置开始进行转换
+option:{ 可选,配置项
+ frameSize:123456 帧大小,每帧的PCM Int16的数量,采样率转换后的pcm长度为frameSize的整数倍,用于连续转换。目前仅在mp3格式时才有用,frameSize取值为1152,这样编码出来的mp3时长和pcm的时长完全一致,否则会因为mp3最后一帧录音不够填满时添加填充数据导致mp3的时长变长。
+ frameType:"" 帧类型,一般为rec.set.type,提供此参数时无需提供frameSize,会自动使用最佳的值给frameSize赋值,目前仅支持mp3=1152(MPEG1 Layer3的每帧采采样数),其他类型=1。
+ 以上两个参数用于连续转换时使用,最多使用一个,不提供时不进行帧的特殊处理,提供时必须同时提供prevChunkInfo才有作用。最后一段数据处理时无需提供帧大小以便输出最后一丁点残留数据。
+ }
+
+返回ChunkInfo:{
+ //可定义,从指定位置开始转换到结尾
+ index:0 pcmDatas已处理到的索引
+ offset:0.0 已处理到的index对应的pcm中的偏移的下一个位置
+
+ //仅作为返回值
+ frameNext:null||[Int16,...] 下一帧的部分数据,frameSize设置了的时候才可能会有
+ sampleRate:16000 结果的采样率,<=newSampleRate
+ data:[Int16,...] 转换后的PCM结果;如果是连续转换,并且pcmDatas中并没有新数据时,data的长度可能为0
+}
+*/
+Recorder.SampleData=function(pcmDatas,pcmSampleRate,newSampleRate,prevChunkInfo,option){
+ prevChunkInfo||(prevChunkInfo={});
+ var index=prevChunkInfo.index||0;
+ var offset=prevChunkInfo.offset||0;
+
+ var frameNext=prevChunkInfo.frameNext||[];
+ option||(option={});
+ var frameSize=option.frameSize||1;
+ if(option.frameType){
+ frameSize=option.frameType=="mp3"?1152:1;
+ };
+
+ var nLen=pcmDatas.length;
+ if(index>nLen+1){
+ CLog("SampleData似乎传入了未重置chunk "+index+">"+nLen,3);
+ };
+ var size=0;
+ for(var i=index;i1){//新采样低于录音采样,进行抽样
+ size=Math.floor(size/step);
+ }else{//新采样高于录音采样不处理,省去了插值处理
+ step=1;
+ newSampleRate=pcmSampleRate;
+ };
+
+ size+=frameNext.length;
+ var res=new Int16Array(size);
+ var idx=0;
+ //添加上一次不够一帧的剩余数据
+ for(var i=0;i0){
+ var u8Pos=(res.length-frameNextSize)*2;
+ frameNext=new Int16Array(res.buffer.slice(u8Pos));
+ res=new Int16Array(res.buffer.slice(0,u8Pos));
+ };
+
+ return {
+ index:index
+ ,offset:offset
+
+ ,frameNext:frameNext
+ ,sampleRate:newSampleRate
+ ,data:res
+ };
+};
+
+
+/*计算音量百分比的一个方法
+pcmAbsSum: pcm Int16所有采样的绝对值的和
+pcmLength: pcm长度
+返回值:0-100,主要当做百分比用
+注意:这个不是分贝,因此没用volume当做名称*/
+Recorder.PowerLevel=function(pcmAbsSum,pcmLength){
+ /*计算音量 https://blog.csdn.net/jody1989/article/details/73480259
+ 更高灵敏度算法:
+ 限定最大感应值10000
+ 线性曲线:低音量不友好
+ power/10000*100
+ 对数曲线:低音量友好,但需限定最低感应值
+ (1+Math.log10(power/10000))*100
+ */
+ var power=(pcmAbsSum/pcmLength) || 0;//NaN
+ var level;
+ if(power<1251){//1250的结果10%,更小的音量采用线性取值
+ level=Math.round(power/1250*10);
+ }else{
+ level=Math.round(Math.min(100,Math.max(0,(1+Math.log(power/10000)/Math.log(10))*100)));
+ };
+ return level;
+};
+
+/*计算音量,单位dBFS(满刻度相对电平)
+maxSample: 为16位pcm采样的绝对值中最大的一个(计算峰值音量),或者为pcm中所有采样的绝对值的平局值
+返回值:-100~0 (最大值0dB,最小值-100代替-∞)
+*/
+Recorder.PowerDBFS=function(maxSample){
+ var val=Math.max(0.1, maxSample||0),Pref=0x7FFF;
+ val=Math.min(val,Pref);
+ //https://www.logiclocmusic.com/can-you-tell-the-decibel/
+ //https://blog.csdn.net/qq_17256689/article/details/120442510
+ val=20*Math.log(val/Pref)/Math.log(10);
+ return Math.max(-100,Math.round(val));
+};
+
+
+
+
+//带时间的日志输出,可设为一个空函数来屏蔽日志输出
+//CLog(msg,errOrLogMsg, logMsg...) err为数字时代表日志类型1:error 2:log默认 3:warn,否则当做内容输出,第一个参数不能是对象因为要拼接时间,后面可以接无数个输出参数
+Recorder.CLog=function(msg,err){
+ var now=new Date();
+ var t=("0"+now.getMinutes()).substr(-2)
+ +":"+("0"+now.getSeconds()).substr(-2)
+ +"."+("00"+now.getMilliseconds()).substr(-3);
+ var recID=this&&this.envIn&&this.envCheck&&this.id;
+ var arr=["["+t+" "+RecTxt+(recID?":"+recID:"")+"]"+msg];
+ var a=arguments,console=window.console||{};
+ var i=2,fn=console.log;
+ if(typeof(err)=="number"){
+ fn=err==1?console.error:err==3?console.warn:fn;
+ }else{
+ i=1;
+ };
+ for(;i1?arr:"");
+ }else{
+ fn.apply(console,arr);
+ };
+};
+var CLog=function(){ Recorder.CLog.apply(this,arguments); };
+var IsLoser=true;try{IsLoser=!console.log.apply;}catch(e){};
+
+
+
+
+var ID=0;
+function initFn(set){
+ this.id=++ID;
+
+ //如果开启了流量统计,这里将发送一个图片请求
+ Traffic();
+
+
+ var o={
+ type:"mp3" //输出类型:mp3,wav,wav输出文件尺寸超大不推荐使用,但mp3编码支持会导致js文件超大,如果不需支持mp3可以使js文件大幅减小
+ ,bitRate:16 //比特率 wav:16或8位,MP3:8kbps 1k/s,8kbps 2k/s 录音文件很小
+
+ ,sampleRate:16000 //采样率,wav格式大小=sampleRate*时间;mp3此项对低比特率有影响,高比特率几乎无影响。
+ //wav任意值,mp3取值范围:48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000
+ //采样率参考https://www.cnblogs.com/devin87/p/mp3-recorder.html
+
+ ,onProcess:NOOP //fn(buffers,powerLevel,bufferDuration,bufferSampleRate,newBufferIdx,asyncEnd) buffers=[[Int16,...],...]:缓冲的PCM数据,为从开始录音到现在的所有pcm片段;powerLevel:当前缓冲的音量级别0-100,bufferDuration:已缓冲时长,bufferSampleRate:缓冲使用的采样率(当type支持边录边转码(Worker)时,此采样率和设置的采样率相同,否则不一定相同);newBufferIdx:本次回调新增的buffer起始索引;asyncEnd:fn() 如果onProcess是异步的(返回值为true时),处理完成时需要调用此回调,如果不是异步的请忽略此参数,此方法回调时必须是真异步(不能真异步时需用setTimeout包裹)。onProcess返回值:如果返回true代表开启异步模式,在某些大量运算的场合异步是必须的,必须在异步处理完成时调用asyncEnd(不能真异步时需用setTimeout包裹),在onProcess执行后新增的buffer会全部替换成空数组,因此本回调开头应立即将newBufferIdx到本次回调结尾位置的buffer全部保存到另外一个数组内,处理完成后写回buffers中本次回调的结尾位置。
+
+ //*******高级设置******
+ //,sourceStream:MediaStream Object
+ //可选直接提供一个媒体流,从这个流中录制、实时处理音频数据(当前Recorder实例独享此流);不提供时为普通的麦克风录音,由getUserMedia提供音频流(所有Recorder实例共享同一个流)
+ //比如:audio、video标签dom节点的captureStream方法(实验特性,不同浏览器支持程度不高)返回的流;WebRTC中的remote流;自己创建的流等
+ //注意:流内必须至少存在一条音轨(Audio Track),比如audio标签必须等待到可以开始播放后才会有音轨,否则open会失败
+
+ //,audioTrackSet:{ deviceId:"",groupId:"", autoGainControl:true, echoCancellation:true, noiseSuppression:true }
+ //普通麦克风录音时getUserMedia方法的audio配置参数,比如指定设备id,回声消除、降噪开关;注意:提供的任何配置值都不一定会生效
+ //由于麦克风是全局共享的,所以新配置后需要close掉以前的再重新open
+ //更多参考: https://developer.mozilla.org/en-US/docs/Web/API/MediaTrackConstraints
+
+ //,disableEnvInFix:false 内部参数,禁用设备卡顿时音频输入丢失补偿功能
+
+ //,takeoffEncodeChunk:NOOP //fn(chunkBytes) chunkBytes=[Uint8,...]:实时编码环境下接管编码器输出,当编码器实时编码出一块有效的二进制音频数据时实时回调此方法;参数为二进制的Uint8Array,就是编码出来的音频数据片段,所有的chunkBytes拼接在一起即为完整音频。本实现的想法最初由QQ2543775048提出
+ //当提供此回调方法时,将接管编码器的数据输出,编码器内部将放弃存储生成的音频数据;环境要求比较苛刻:如果当前环境不支持实时编码处理,将在open时直接走fail逻辑
+ //因此提供此回调后调用stop方法将无法获得有效的音频数据,因为编码器内没有音频数据,因此stop时返回的blob将是一个字节长度为0的blob
+ //目前只有mp3格式实现了实时编码,在支持实时处理的环境中将会实时的将编码出来的mp3片段通过此方法回调,所有的chunkBytes拼接到一起即为完整的mp3,此种拼接的结果比mock方法实时生成的音质更加,因为天然避免了首尾的静默
+ //目前除mp3外其他格式不可以提供此回调,提供了将在open时直接走fail逻辑
+ };
+
+ for(var k in set){
+ o[k]=set[k];
+ };
+ this.set=o;
+
+ this._S=9;//stop同步锁,stop可以阻止open过程中还未运行的start
+ this.Sync={O:9,C:9};//和Recorder.Sync一致,只不过这个是非全局的,仅用来简化代码逻辑,无实际作用
+};
+//同步锁,控制对Stream的竞争;用于close时中断异步的open;一个对象open如果变化了都要阻止close,Stream的控制权交个新的对象
+Recorder.Sync={/*open*/O:9,/*close*/C:9};
+
+Recorder.prototype=initFn.prototype={
+ CLog:CLog
+
+ //流相关的数据存储在哪个对象里面;如果提供了sourceStream,数据直接存储在当前对象中,否则存储在全局
+ ,_streamStore:function(){
+ if(this.set.sourceStream){
+ return this;
+ }else{
+ return Recorder;
+ }
+ }
+
+ //打开录音资源True(),False(msg,isUserNotAllow),需要调用close。注意:此方法是异步的;一般使用时打开,用完立即关闭;可重复调用,可用来测试是否能录音
+ ,open:function(True,False){
+ var This=this,streamStore=This._streamStore();
+ True=True||NOOP;
+ var failCall=function(errMsg,isUserNotAllow){
+ isUserNotAllow=!!isUserNotAllow;
+ This.CLog("录音open失败:"+errMsg+",isUserNotAllow:"+isUserNotAllow,1);
+ False&&False(errMsg,isUserNotAllow);
+ };
+
+ var ok=function(){
+ This.CLog("open ok id:"+This.id);
+ True();
+
+ This._SO=0;//解除stop对open中的start调用的阻止
+ };
+
+
+ //同步锁
+ var Lock=streamStore.Sync;
+ var lockOpen=++Lock.O,lockClose=Lock.C;
+ This._O=This._O_=lockOpen;//记住当前的open,如果变化了要阻止close,这里假定了新对象已取代当前对象并且不再使用
+ This._SO=This._S;//记住open过程中的stop,中途任何stop调用后都不能继续open中的start
+ var lockFail=function(){
+ //允许多次open,但不允许任何一次close,或者自身已经调用了关闭
+ if(lockClose!=Lock.C || !This._O){
+ var err="open被取消";
+ if(lockOpen==Lock.O){
+ //无新的open,已经调用了close进行取消,此处应让上次的close明确生效
+ This.close();
+ }else{
+ err="open被中断";
+ };
+ failCall(err);
+ return true;
+ };
+ };
+
+ //环境配置检查
+ var checkMsg=This.envCheck({envName:"H5",canProcess:true});
+ if(checkMsg){
+ failCall("不能录音:"+checkMsg);
+ return;
+ };
+
+
+ //***********已直接提供了音频流************
+ if(This.set.sourceStream){
+ if(!Recorder.GetContext()){
+ failCall("不支持此浏览器从流中获取录音");
+ return;
+ };
+
+ Disconnect(streamStore);//可能已open过,直接先尝试断开
+ This.Stream=This.set.sourceStream;
+ This.Stream._call={};
+
+ try{
+ Connect(streamStore);
+ }catch(e){
+ failCall("从流中打开录音失败:"+e.message);
+ return;
+ }
+ ok();
+ return;
+ };
+
+
+ //***********打开麦克风得到全局的音频流************
+ var codeFail=function(code,msg){
+ try{//跨域的优先检测一下
+ window.top.a;
+ }catch(e){
+ failCall('无权录音(跨域,请尝试给iframe添加麦克风访问策略,如allow="camera;microphone")');
+ return;
+ };
+
+ if(/Permission|Allow/i.test(code)){
+ failCall("用户拒绝了录音权限",true);
+ }else if(window.isSecureContext===false){
+ failCall("浏览器禁止不安全页面录音,可开启https解决");
+ }else if(/Found/i.test(code)){//可能是非安全环境导致的没有设备
+ failCall(msg+",无可用麦克风");
+ }else{
+ failCall(msg);
+ };
+ };
+
+
+ //如果已打开并且有效就不要再打开了
+ if(Recorder.IsOpen()){
+ ok();
+ return;
+ };
+ if(!Recorder.Support()){
+ codeFail("","此浏览器不支持录音");
+ return;
+ };
+
+ //请求权限,如果从未授权,一般浏览器会弹出权限请求弹框
+ var f1=function(stream){
+ //https://github.com/xiangyuecn/Recorder/issues/14 获取到的track.readyState!="live",刚刚回调时可能是正常的,但过一下可能就被关掉了,原因不明。延迟一下保证真异步。对正常浏览器不影响
+ setTimeout(function(){
+ stream._call={};
+ var oldStream=Recorder.Stream;
+ if(oldStream){
+ Disconnect(); //直接断开已存在的,旧的Connect未完成会自动终止
+ stream._call=oldStream._call;
+ };
+ Recorder.Stream=stream;
+ if(lockFail())return;
+
+ if(Recorder.IsOpen()){
+ if(oldStream)This.CLog("发现同时多次调用open",1);
+
+ Connect(streamStore,1);
+ ok();
+ }else{
+ failCall("录音功能无效:无音频流");
+ };
+ },100);
+ };
+ var f2=function(e){
+ var code=e.name||e.message||e.code+":"+e;
+ This.CLog("请求录音权限错误",1,e);
+
+ codeFail(code,"无法录音:"+code);
+ };
+
+ var trackSet={
+ noiseSuppression:false //默认禁用降噪,原声录制,免得移动端表现怪异(包括系统播放声音变小)
+ ,echoCancellation:false //回声消除
+ };
+ var trackSet2=This.set.audioTrackSet;
+ for(var k in trackSet2)trackSet[k]=trackSet2[k];
+ trackSet.sampleRate=Recorder.Ctx.sampleRate;//必须指明采样率,不然手机上MediaRecorder采样率16k
+
+ try{
+ var pro=Recorder.Scope[getUserMediaTxt]({audio:trackSet},f1,f2);
+ }catch(e){//不能设置trackSet就算了
+ This.CLog(getUserMediaTxt,3,e);
+ pro=Recorder.Scope[getUserMediaTxt]({audio:true},f1,f2);
+ };
+ if(pro&&pro.then){
+ pro.then(f1)[CatchTxt](f2); //fix 关键字,保证catch压缩时保持字符串形式
+ };
+ }
+ //关闭释放录音资源
+ ,close:function(call){
+ call=call||NOOP;
+
+ var This=this,streamStore=This._streamStore();
+ This._stop();
+
+ var Lock=streamStore.Sync;
+ This._O=0;
+ if(This._O_!=Lock.O){
+ //唯一资源Stream的控制权已交给新对象,这里不能关闭。此处在每次都弹权限的浏览器内可能存在泄漏,新对象被拒绝权限可能不会调用close,忽略这种不处理
+ This.CLog("close被忽略(因为同时open了多个rec,只有最后一个会真正close)",3);
+ call();
+ return;
+ };
+ Lock.C++;//获得控制权
+
+ Disconnect(streamStore);
+
+ This.CLog("close");
+ call();
+ }
+
+
+
+
+
+ /*模拟一段录音数据,后面可以调用stop进行编码,需提供pcm数据[1,2,3...],pcm的采样率*/
+ ,mock:function(pcmData,pcmSampleRate){
+ var This=this;
+ This._stop();//清理掉已有的资源
+
+ This.isMock=1;
+ This.mockEnvInfo=null;
+ This.buffers=[pcmData];
+ This.recSize=pcmData.length;
+ This[srcSampleRateTxt]=pcmSampleRate;
+ return This;
+ }
+ ,envCheck:function(envInfo){//平台环境下的可用性检查,任何时候都可以调用检查,返回errMsg:""正常,"失败原因"
+ //envInfo={envName:"H5",canProcess:true}
+ var errMsg,This=this,set=This.set;
+
+ //检测CPU的数字字节序,TypedArray字节序是个迷,直接拒绝罕见的大端模式,因为找不到这种CPU进行测试
+ var tag="CPU_BE";
+ if(!errMsg && !Recorder[tag] && window.Int8Array && !new Int8Array(new Int32Array([1]).buffer)[0]){
+ Traffic(tag); //如果开启了流量统计,这里将发送一个图片请求
+ errMsg="不支持"+tag+"架构";
+ };
+
+ //编码器检查环境下配置是否可用
+ if(!errMsg){
+ var type=set.type;
+ if(This[type+"_envCheck"]){//编码器已实现环境检查
+ errMsg=This[type+"_envCheck"](envInfo,set);
+ }else{//未实现检查的手动检查配置是否有效
+ if(set.takeoffEncodeChunk){
+ errMsg=type+"类型"+(This[type]?"":"(未加载编码器)")+"不支持设置takeoffEncodeChunk";
+ };
+ };
+ };
+
+ return errMsg||"";
+ }
+ ,envStart:function(mockEnvInfo,sampleRate){//平台环境相关的start调用
+ var This=this,set=This.set;
+ This.isMock=mockEnvInfo?1:0;//非H5环境需要启用mock,并提供envCheck需要的环境信息
+ This.mockEnvInfo=mockEnvInfo;
+ This.buffers=[];//数据缓冲
+ This.recSize=0;//数据大小
+
+ This.envInLast=0;//envIn接收到最后录音内容的时间
+ This.envInFirst=0;//envIn接收到的首个录音内容的录制时间
+ This.envInFix=0;//补偿的总时间
+ This.envInFixTs=[];//补偿计数列表
+
+ //engineCtx需要提前确定最终的采样率
+ var setSr=set[sampleRateTxt];
+ if(setSr>sampleRate){
+ set[sampleRateTxt]=sampleRate;
+ }else{ setSr=0 }
+ This[srcSampleRateTxt]=sampleRate;
+ This.CLog(srcSampleRateTxt+": "+sampleRate+" set."+sampleRateTxt+": "+set[sampleRateTxt]+(setSr?" 忽略"+setSr:""), setSr?3:0);
+
+ This.engineCtx=0;
+ //此类型有边录边转码(Worker)支持
+ if(This[set.type+"_start"]){
+ var engineCtx=This.engineCtx=This[set.type+"_start"](set);
+ if(engineCtx){
+ engineCtx.pcmDatas=[];
+ engineCtx.pcmSize=0;
+ };
+ };
+ }
+ ,envResume:function(){//和平台环境无关的恢复录音
+ //重新开始计数
+ this.envInFixTs=[];
+ }
+ ,envIn:function(pcm,sum){//和平台环境无关的pcm[Int16]输入
+ var This=this,set=This.set,engineCtx=This.engineCtx;
+ var bufferSampleRate=This[srcSampleRateTxt];
+ var size=pcm.length;
+ var powerLevel=Recorder.PowerLevel(sum,size);
+
+ var buffers=This.buffers;
+ var bufferFirstIdx=buffers.length;//之前的buffer都是经过onProcess处理好的,不允许再修改
+ buffers.push(pcm);
+
+ //有engineCtx时会被覆盖,这里保存一份
+ var buffersThis=buffers;
+ var bufferFirstIdxThis=bufferFirstIdx;
+
+ //卡顿丢失补偿:因为设备很卡的时候导致H5接收到的数据量不够造成播放时候变速,结果比实际的时长要短,此处保证了不会变短,但不能修复丢失的音频数据造成音质变差。当前算法采用输入时间侦测下一帧是否需要添加补偿帧,需要(6次输入||超过1秒)以上才会开始侦测,如果滑动窗口内丢失超过1/3就会进行补偿
+ var now=Date.now();
+ var pcmTime=Math.round(size/bufferSampleRate*1000);
+ This.envInLast=now;
+ if(This.buffers.length==1){//记下首个录音数据的录制时间
+ This.envInFirst=now-pcmTime;
+ };
+ var envInFixTs=This.envInFixTs;
+ envInFixTs.splice(0,0,{t:now,d:pcmTime});
+ //保留3秒的计数滑动窗口,另外超过3秒的停顿不补偿
+ var tsInStart=now,tsPcm=0;
+ for(var i=0;i3000){
+ envInFixTs.length=i;
+ break;
+ };
+ tsInStart=o.t;
+ tsPcm+=o.d;
+ };
+ //达到需要的数据量,开始侦测是否需要补偿
+ var tsInPrev=envInFixTs[1];
+ var tsIn=now-tsInStart;
+ var lost=tsIn-tsPcm;
+ if( lost>tsIn/3 && (tsInPrev&&tsIn>1000 || envInFixTs.length>=6) ){
+ //丢失过多,开始执行补偿
+ var addTime=now-tsInPrev.t-pcmTime;//距离上次输入丢失这么多ms
+ if(addTime>pcmTime/5){//丢失超过本帧的1/5
+ var fixOpen=!set.disableEnvInFix;
+ This.CLog("["+now+"]"+(fixOpen?"":"未")+"补偿"+addTime+"ms",3);
+ This.envInFix+=addTime;
+
+ //用静默进行补偿
+ if(fixOpen){
+ var addPcm=new Int16Array(addTime*bufferSampleRate/1000);
+ size+=addPcm.length;
+ buffers.push(addPcm);
+ };
+ };
+ };
+
+
+ var sizeOld=This.recSize,addSize=size;
+ var bufferSize=sizeOld+addSize;
+ This.recSize=bufferSize;//此值在onProcess后需要修正,可能新数据被修改
+
+
+ //此类型有边录边转码(Worker)支持,开启实时转码
+ if(engineCtx){
+ //转换成set的采样率
+ var chunkInfo=Recorder.SampleData(buffers,bufferSampleRate,set[sampleRateTxt],engineCtx.chunkInfo);
+ engineCtx.chunkInfo=chunkInfo;
+
+ sizeOld=engineCtx.pcmSize;
+ addSize=chunkInfo.data.length;
+ bufferSize=sizeOld+addSize;
+ engineCtx.pcmSize=bufferSize;//此值在onProcess后需要修正,可能新数据被修改
+
+ buffers=engineCtx.pcmDatas;
+ bufferFirstIdx=buffers.length;
+ buffers.push(chunkInfo.data);
+ bufferSampleRate=chunkInfo[sampleRateTxt];
+ };
+
+ var duration=Math.round(bufferSize/bufferSampleRate*1000);
+ var bufferNextIdx=buffers.length;
+ var bufferNextIdxThis=buffersThis.length;
+
+ //允许异步处理buffer数据
+ var asyncEnd=function(){
+ //重新计算size,异步的早已减去添加的,同步的需去掉本次添加的然后重新计算
+ var num=asyncBegin?0:-addSize;
+ var hasClear=buffers[0]==null;
+ for(var i=bufferFirstIdx;i10 && This.envInFirst-now>1000){ //1秒后开始onProcess性能监测
+ This.CLog(procTxt+"低性能,耗时"+slowT+"ms",3);
+ };
+
+ if(asyncBegin===true){
+ //开启了异步模式,onProcess已接管buffers新数据,立即清空,避免出现未处理的数据
+ var hasClear=0;
+ for(var i=bufferFirstIdx;i"+res.length+" 花:"+(Date.now()-t1)+"ms");
+
+ setTimeout(function(){
+ t1=Date.now();
+ This[set.type](res,function(blob){
+ ok(blob,duration);
+ },function(msg){
+ err(msg);
+ });
+ });
+ }
+
+};
+
+if(window[RecTxt]){
+ CLog("重复引入"+RecTxt,3);
+ window[RecTxt].Destroy();
+};
+window[RecTxt]=Recorder;
+
+
+
+
+//=======从WebM字节流中提取pcm数据,提取成功返回Float32Array,失败返回null||-1=====
+var WebM_Extract=function(inBytes, scope){
+ if(!scope.pos){
+ scope.pos=[0]; scope.tracks={}; scope.bytes=[];
+ };
+ var tracks=scope.tracks, position=[scope.pos[0]];
+ var endPos=function(){ scope.pos[0]=position[0] };
+
+ var sBL=scope.bytes.length;
+ var bytes=new Uint8Array(sBL+inBytes.length);
+ bytes.set(scope.bytes); bytes.set(inBytes,sBL);
+ scope.bytes=bytes;
+
+ //先读取文件头和Track信息
+ if(!scope._ht){
+ readMatroskaVInt(bytes, position);//EBML Header
+ readMatroskaBlock(bytes, position);//跳过EBML Header内容
+ if(!BytesEq(readMatroskaVInt(bytes, position), [0x18,0x53,0x80,0x67])){
+ return;//未识别到Segment
+ }
+ readMatroskaVInt(bytes, position);//跳过Segment长度值
+ while(position[0]1){//多声道,提取一个声道
+ var arr2=[];
+ for(var i=0;i=arr.length)return;
+ var b0=arr[i],b2=("0000000"+b0.toString(2)).substr(-8);
+ var m=/^(0*1)(\d*)$/.exec(b2);
+ if(!m)return;
+ var len=m[1].length, val=[];
+ if(i+len>arr.length)return;
+ for(var i2=0;i2arr.length)return;
+ for(var i2=0;i2>8)+128;
+ data.setInt8(offset,val,true);
+ };
+ }else{
+ for (var i=0;i [--quantize ]
+ [--vad-dir ] [--vad-quant ]
+ [--punc-dir ] [--punc-quant ]
--wav-path --thread-num
[--] [--version] [-h]
Where:
@@ -136,6 +138,17 @@ Where:
(required) the model path, which contains model.onnx, config.yaml, am.mvn
--quantize
false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir
+
+ --vad-dir
+ the vad model path, which contains model.onnx, vad.yaml, vad.mvn
+ --vad-quant
+ false (Default), load the model of model.onnx in vad_dir. If set true, load the model of model_quant.onnx in vad_dir
+
+ --punc-dir
+ the punc model path, which contains model.onnx, punc.yaml
+ --punc-quant
+ false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir
+
--wav-path
(required) the input could be:
wav_path, e.g.: asr_example.wav;
diff --git a/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp b/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
index 6ba65c6c4..cf1469d3e 100644
--- a/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
+++ b/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
@@ -39,7 +39,7 @@ void runReg(FUNASR_HANDLE asr_handle, vector wav_list,
// warm up
for (size_t i = 0; i < 1; i++)
{
- FUNASR_RESULT result=FunASRInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, 16000);
+ FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, 16000);
}
while (true) {
@@ -50,7 +50,7 @@ void runReg(FUNASR_HANDLE asr_handle, vector wav_list,
}
gettimeofday(&start, NULL);
- FUNASR_RESULT result=FunASRInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, 16000);
+ FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, 16000);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
@@ -102,12 +102,20 @@ int main(int argc, char *argv[])
TCLAP::CmdLine cmd("funasr-onnx-offline-rtf", ' ', "1.0");
TCLAP::ValueArg model_dir("", MODEL_DIR, "the model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string");
TCLAP::ValueArg quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
+ TCLAP::ValueArg vad_dir("", VAD_DIR, "the vad model path, which contains model.onnx, vad.yaml, vad.mvn", false, "", "string");
+ TCLAP::ValueArg vad_quant("", VAD_QUANT, "false (Default), load the model of model.onnx in vad_dir. If set true, load the model of model_quant.onnx in vad_dir", false, "false", "string");
+ TCLAP::ValueArg punc_dir("", PUNC_DIR, "the punc model path, which contains model.onnx, punc.yaml", false, "", "string");
+ TCLAP::ValueArg punc_quant("", PUNC_QUANT, "false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "false", "string");
TCLAP::ValueArg wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
TCLAP::ValueArg thread_num("", THREAD_NUM, "multi-thread num for rtf", true, 0, "int32_t");
cmd.add(model_dir);
cmd.add(quantize);
+ cmd.add(vad_dir);
+ cmd.add(vad_quant);
+ cmd.add(punc_dir);
+ cmd.add(punc_quant);
cmd.add(wav_path);
cmd.add(thread_num);
cmd.parse(argc, argv);
@@ -115,11 +123,15 @@ int main(int argc, char *argv[])
std::map model_path;
GetValue(model_dir, MODEL_DIR, model_path);
GetValue(quantize, QUANTIZE, model_path);
+ GetValue(vad_dir, VAD_DIR, model_path);
+ GetValue(vad_quant, VAD_QUANT, model_path);
+ GetValue(punc_dir, PUNC_DIR, model_path);
+ GetValue(punc_quant, PUNC_QUANT, model_path);
GetValue(wav_path, WAV_PATH, model_path);
struct timeval start, end;
gettimeofday(&start, NULL);
- FUNASR_HANDLE asr_handle=FunASRInit(model_path, 1);
+ FUNASR_HANDLE asr_handle=FunOfflineInit(model_path, 1);
if (!asr_handle)
{
@@ -132,7 +144,7 @@ int main(int argc, char *argv[])
long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
- // read wav_scp
+ // read wav_path
vector wav_list;
string wav_path_ = model_path.at(WAV_PATH);
if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){
diff --git a/funasr/runtime/python/websocket/parse_args.py b/funasr/runtime/python/websocket/parse_args.py
index d170be857..bfa5a87a9 100644
--- a/funasr/runtime/python/websocket/parse_args.py
+++ b/funasr/runtime/python/websocket/parse_args.py
@@ -35,6 +35,16 @@ parser.add_argument("--ncpu",
type=int,
default=1,
help="cpu cores")
-
+parser.add_argument("--certfile",
+ type=str,
+ default="",
+ required=False,
+ help="certfile for ssl")
+
+parser.add_argument("--keyfile",
+ type=str,
+ default="",
+ required=False,
+ help="keyfile for ssl")
args = parser.parse_args()
print(args)
\ No newline at end of file
diff --git a/funasr/runtime/python/websocket/ws_server_online.py b/funasr/runtime/python/websocket/ws_server_online.py
index a35b127cc..16a3abe56 100644
--- a/funasr/runtime/python/websocket/ws_server_online.py
+++ b/funasr/runtime/python/websocket/ws_server_online.py
@@ -7,7 +7,7 @@ import threading
import logging
import tracemalloc
import numpy as np
-
+import ssl
from parse_args import args
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
@@ -53,6 +53,9 @@ async def ws_serve(websocket, path):
if "is_speaking" in messagejson:
websocket.is_speaking = messagejson["is_speaking"]
websocket.param_dict_asr_online["is_final"] = not websocket.is_speaking
+ # need to fire engine manually if no data received any more
+ if not websocket.is_speaking:
+ await async_asr_online(websocket,b"")
if "chunk_interval" in messagejson:
websocket.chunk_interval=messagejson["chunk_interval"]
if "wav_name" in messagejson:
@@ -82,7 +85,7 @@ async def ws_serve(websocket, path):
async def async_asr_online(websocket,audio_in):
- if len(audio_in) > 0:
+ if len(audio_in) >=0:
audio_in = load_bytes(audio_in)
rec_result = inference_pipeline_asr_online(audio_in=audio_in,
param_dict=websocket.param_dict_asr_online)
@@ -94,7 +97,16 @@ async def async_asr_online(websocket,audio_in):
await websocket.send(message)
+if len(args.certfile)>0:
+ ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
-start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
+ # Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions
+ ssl_cert = args.certfile
+ ssl_key = args.keyfile
+
+ ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
+ start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None,ssl=ssl_context)
+else:
+ start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
asyncio.get_event_loop().run_until_complete(start_server)
asyncio.get_event_loop().run_forever()
\ No newline at end of file