mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
This commit is contained in:
commit
eee6af2ece
@ -13,7 +13,6 @@
|
|||||||
| [**Highlights**](#highlights)
|
| [**Highlights**](#highlights)
|
||||||
| [**Installation**](#installation)
|
| [**Installation**](#installation)
|
||||||
| [**Docs**](https://alibaba-damo-academy.github.io/FunASR/en/index.html)
|
| [**Docs**](https://alibaba-damo-academy.github.io/FunASR/en/index.html)
|
||||||
| [**Tutorial_CN**](https://github.com/alibaba-damo-academy/FunASR/wiki#funasr%E7%94%A8%E6%88%B7%E6%89%8B%E5%86%8C)
|
|
||||||
| [**Papers**](https://github.com/alibaba-damo-academy/FunASR#citations)
|
| [**Papers**](https://github.com/alibaba-damo-academy/FunASR#citations)
|
||||||
| [**Runtime**](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime)
|
| [**Runtime**](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime)
|
||||||
| [**Model Zoo**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/model_zoo/modelscope_models.md)
|
| [**Model Zoo**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/model_zoo/modelscope_models.md)
|
||||||
|
|||||||
@ -20,11 +20,13 @@ rec_result = inference_pipeline(audio_in='https://isv-data.oss-cn-hangzhou.aliyu
|
|||||||
print(rec_result)
|
print(rec_result)
|
||||||
```
|
```
|
||||||
#### [Paraformer-online Model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary)
|
#### [Paraformer-online Model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary)
|
||||||
|
##### Streaming Decoding
|
||||||
```python
|
```python
|
||||||
inference_pipeline = pipeline(
|
inference_pipeline = pipeline(
|
||||||
task=Tasks.auto_speech_recognition,
|
task=Tasks.auto_speech_recognition,
|
||||||
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
|
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
|
||||||
model_revision='v1.0.6',
|
model_revision='v1.0.4',
|
||||||
|
update_model='v1.0.4',
|
||||||
mode='paraformer_streaming'
|
mode='paraformer_streaming'
|
||||||
)
|
)
|
||||||
import soundfile
|
import soundfile
|
||||||
@ -42,6 +44,23 @@ speech_chunk = speech[chunk_stride:chunk_stride+chunk_stride]
|
|||||||
rec_result = inference_pipeline(audio_in=speech_chunk, param_dict=param_dict)
|
rec_result = inference_pipeline(audio_in=speech_chunk, param_dict=param_dict)
|
||||||
print(rec_result)
|
print(rec_result)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
##### Fake Streaming Decoding
|
||||||
|
```python
|
||||||
|
from modelscope.pipelines import pipeline
|
||||||
|
from modelscope.utils.constant import Tasks
|
||||||
|
|
||||||
|
inference_pipeline = pipeline(
|
||||||
|
task=Tasks.auto_speech_recognition,
|
||||||
|
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
|
||||||
|
model_revision='v1.0.6',
|
||||||
|
update_model='v1.0.6',
|
||||||
|
mode="paraformer_fake_streaming"
|
||||||
|
)
|
||||||
|
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
|
||||||
|
rec_result = inference_pipeline(audio_in=audio_in)
|
||||||
|
print(rec_result)
|
||||||
|
```
|
||||||
Full code of demo, please ref to [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241)
|
Full code of demo, please ref to [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241)
|
||||||
|
|
||||||
#### [UniASR Model](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)
|
#### [UniASR Model](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)
|
||||||
|
|||||||
@ -5,6 +5,7 @@ inference_pipeline = pipeline(
|
|||||||
task=Tasks.auto_speech_recognition,
|
task=Tasks.auto_speech_recognition,
|
||||||
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
|
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
|
||||||
model_revision='v1.0.6',
|
model_revision='v1.0.6',
|
||||||
|
update_model='v1.0.6',
|
||||||
mode="paraformer_fake_streaming"
|
mode="paraformer_fake_streaming"
|
||||||
)
|
)
|
||||||
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
|
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
|
||||||
|
|||||||
@ -14,7 +14,8 @@ os.environ["MODELSCOPE_CACHE"] = "./"
|
|||||||
inference_pipeline = pipeline(
|
inference_pipeline = pipeline(
|
||||||
task=Tasks.auto_speech_recognition,
|
task=Tasks.auto_speech_recognition,
|
||||||
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
|
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
|
||||||
model_revision='v1.0.6',
|
model_revision='v1.0.4',
|
||||||
|
update_model='v1.0.4',
|
||||||
mode="paraformer_streaming"
|
mode="paraformer_streaming"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -5,6 +5,7 @@ inference_pipeline = pipeline(
|
|||||||
task=Tasks.auto_speech_recognition,
|
task=Tasks.auto_speech_recognition,
|
||||||
model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
|
model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
|
||||||
model_revision='v1.0.6',
|
model_revision='v1.0.6',
|
||||||
|
update_model='v1.0.6',
|
||||||
mode="paraformer_fake_streaming"
|
mode="paraformer_fake_streaming"
|
||||||
)
|
)
|
||||||
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
|
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
|
||||||
|
|||||||
@ -14,7 +14,8 @@ os.environ["MODELSCOPE_CACHE"] = "./"
|
|||||||
inference_pipeline = pipeline(
|
inference_pipeline = pipeline(
|
||||||
task=Tasks.auto_speech_recognition,
|
task=Tasks.auto_speech_recognition,
|
||||||
model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
|
model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
|
||||||
model_revision='v1.0.6',
|
model_revision='v1.0.4',
|
||||||
|
update_model='v1.0.4',
|
||||||
mode="paraformer_streaming"
|
mode="paraformer_streaming"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@ from modelscope.utils.constant import Tasks
|
|||||||
inference_pipeline = pipeline(
|
inference_pipeline = pipeline(
|
||||||
task=Tasks.speech_timestamp,
|
task=Tasks.speech_timestamp,
|
||||||
model='damo/speech_timestamp_prediction-v1-16k-offline',
|
model='damo/speech_timestamp_prediction-v1-16k-offline',
|
||||||
output_dir=None)
|
model_revision='v1.1.0')
|
||||||
|
|
||||||
rec_result = inference_pipeline(
|
rec_result = inference_pipeline(
|
||||||
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav',
|
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav',
|
||||||
|
|||||||
@ -1,10 +1,10 @@
|
|||||||
dua ribu dua puluh dua 2022
|
dua ribu dua puluh dua 2022
|
||||||
tiga ribu 300
|
tiga ribu 3000
|
||||||
sembilan ribu sembilan ratus sembilan puluh sembilan 9999
|
sembilan ribu sembilan ratus sembilan puluh sembilan 9999
|
||||||
seribu satu 100001
|
seribu satu 1001
|
||||||
ribu 100
|
ribu 1000
|
||||||
seribu 1000
|
seribu 1000
|
||||||
seribu dua ratus delapan puluh sembilan 10289
|
seribu dua ratus delapan puluh sembilan 1289
|
||||||
ribu dua ratus delapan puluh sembilan 1289
|
ribu dua ratus delapan puluh sembilan 1289
|
||||||
nol satu dua tiga empat lima enam tujuh delapan sembilan 01 2345-6789
|
nol satu dua tiga empat lima enam tujuh delapan sembilan 01 2345-6789
|
||||||
empat belas 14
|
empat belas 14
|
||||||
@ -22,8 +22,8 @@ satu miliar 1 miliar
|
|||||||
seratus dua puluh tiga 123
|
seratus dua puluh tiga 123
|
||||||
ratus dua puluh tiga 123
|
ratus dua puluh tiga 123
|
||||||
dua puluh empat maret 24 maret
|
dua puluh empat maret 24 maret
|
||||||
ribu tujuh puluh enam 10076
|
ribu tujuh puluh enam 1076
|
||||||
seribu tujuh puluh enam 100076
|
seribu tujuh puluh enam 1076
|
||||||
ribu tujuh puluh enam rupiah 10076 rupiah
|
ribu tujuh puluh enam rupiah 1076 rupiah
|
||||||
tujuh puluh enam 76
|
tujuh puluh enam 76
|
||||||
ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima +62 21 6539-0605
|
ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima +62 21 6539-0605
|
||||||
|
|||||||
|
@ -26,11 +26,10 @@ class CardinalFst(GraphFst):
|
|||||||
graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
|
graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
|
||||||
graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv"))
|
graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv"))
|
||||||
graph_thousand = pynini.string_file(get_abs_path("data/numbers/thousand.tsv"))
|
graph_thousand = pynini.string_file(get_abs_path("data/numbers/thousand.tsv"))
|
||||||
|
|
||||||
graph_cents = pynini.cross("seratus", "100") | pynini.cross("ratus", "100") | pynini.union(graph_hundreds, pynutil.insert("0"))
|
|
||||||
graph_hundred = pynini.cross("ratus", "") | pynini.cross("seratus", "")
|
graph_hundred = pynini.cross("ratus", "") | pynini.cross("seratus", "")
|
||||||
|
|
||||||
graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("00"))
|
graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("0"))
|
||||||
graph_hundred_component += delete_space
|
graph_hundred_component += delete_space
|
||||||
graph_hundred_component += pynini.union(
|
graph_hundred_component += pynini.union(
|
||||||
graph_teen | pynutil.insert("00"),
|
graph_teen | pynutil.insert("00"),
|
||||||
@ -44,8 +43,8 @@ class CardinalFst(GraphFst):
|
|||||||
(graph_ties | pynutil.insert("0")) + delete_space + (
|
(graph_ties | pynutil.insert("0")) + delete_space + (
|
||||||
graph_digit | pynutil.insert("0")),
|
graph_digit | pynutil.insert("0")),
|
||||||
)
|
)
|
||||||
graph_hundred_component = graph_hundred_component | graph_cents | graph_one_hundred_component
|
graph_hundred_component = graph_hundred_component | graph_one_hundred_component
|
||||||
|
|
||||||
graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
|
graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
|
||||||
pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
|
pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
|
||||||
)
|
)
|
||||||
@ -54,14 +53,12 @@ class CardinalFst(GraphFst):
|
|||||||
)
|
)
|
||||||
graph_thousand = pynini.cross("ribu", "") | pynini.cross("seribu", "")
|
graph_thousand = pynini.cross("ribu", "") | pynini.cross("seribu", "")
|
||||||
graph_one_thousand_component = pynini.union(pynini.cross("ribu", "1") | pynini.cross("seribu", "1"))
|
graph_one_thousand_component = pynini.union(pynini.cross("ribu", "1") | pynini.cross("seribu", "1"))
|
||||||
graph_thousand_cents = pynini.cross("seribu", "10") | pynini.cross("ribu","10") | pynini.union(graph_thousand, pynutil.insert(""))
|
|
||||||
graph_thousands = pynini.union(
|
graph_thousands = pynini.union(
|
||||||
graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("ribu") | pynutil.delete("seribu")),
|
graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("ribu") | pynutil.delete("seribu")),
|
||||||
pynutil.insert("000", weight=0.1),
|
pynutil.insert("000", weight=0.1),
|
||||||
)
|
)
|
||||||
graph_thousand_component = pynini.union(graph_digit + delete_space + graph_thousand, pynutil.insert("000"))
|
graph_thousands = graph_thousands | (pynutil.insert("00") + graph_one_thousand_component)
|
||||||
graph_thousand_component += delete_space
|
|
||||||
graph_thousands = graph_thousands | graph_thousand_cents | graph_thousand_component | graph_one_thousand_component
|
|
||||||
|
|
||||||
graph_million = pynini.union(
|
graph_million = pynini.union(
|
||||||
graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("juta") | pynutil.delete("sejuta")),
|
graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("juta") | pynutil.delete("sejuta")),
|
||||||
|
|||||||
@ -145,7 +145,9 @@ function stop() {
|
|||||||
isRec = false;
|
isRec = false;
|
||||||
info_div.innerHTML="请等候...";
|
info_div.innerHTML="请等候...";
|
||||||
btnStop.disabled = true;
|
btnStop.disabled = true;
|
||||||
setTimeout(function(){btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
|
setTimeout(function(){
|
||||||
|
console.log("call stop ws!");
|
||||||
|
wsconnecter.wsStop();btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
|
||||||
rec.stop(function(blob,duration){
|
rec.stop(function(blob,duration){
|
||||||
|
|
||||||
console.log(blob);
|
console.log(blob);
|
||||||
|
|||||||
@ -28,7 +28,11 @@ function WebSocketConnectMethod( config ) { //定义socket连接方法类
|
|||||||
if ( 'WebSocket' in window ) {
|
if ( 'WebSocket' in window ) {
|
||||||
speechSokt = new WebSocket( Uri ); // 定义socket连接对象
|
speechSokt = new WebSocket( Uri ); // 定义socket连接对象
|
||||||
speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
|
speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
|
||||||
speechSokt.onclose = function(e){onClose(e);};
|
speechSokt.onclose = function(e){
|
||||||
|
console.log("onclose ws!");
|
||||||
|
speechSokt.close();
|
||||||
|
onClose(e);
|
||||||
|
};
|
||||||
speechSokt.onmessage = function(e){onMessage(e);};
|
speechSokt.onmessage = function(e){onMessage(e);};
|
||||||
speechSokt.onerror = function(e){onError(e);};
|
speechSokt.onerror = function(e){onError(e);};
|
||||||
return 1;
|
return 1;
|
||||||
@ -42,6 +46,7 @@ function WebSocketConnectMethod( config ) { //定义socket连接方法类
|
|||||||
// 定义停止与发送函数
|
// 定义停止与发送函数
|
||||||
this.wsStop = function () {
|
this.wsStop = function () {
|
||||||
if(speechSokt != undefined) {
|
if(speechSokt != undefined) {
|
||||||
|
console.log("stop ws!");
|
||||||
speechSokt.close();
|
speechSokt.close();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@ -58,16 +58,36 @@ inference_pipeline_asr_online = pipeline(
|
|||||||
model=args.asr_model_online,
|
model=args.asr_model_online,
|
||||||
ngpu=args.ngpu,
|
ngpu=args.ngpu,
|
||||||
ncpu=args.ncpu,
|
ncpu=args.ncpu,
|
||||||
model_revision='v1.0.6',
|
model_revision='v1.0.4',
|
||||||
|
update_model='v1.0.4',
|
||||||
mode='paraformer_streaming')
|
mode='paraformer_streaming')
|
||||||
|
|
||||||
print("model loaded")
|
print("model loaded! only support one client at the same time now!!!!")
|
||||||
|
|
||||||
|
async def ws_reset(websocket):
|
||||||
|
print("ws reset now, total num is ",len(websocket_users))
|
||||||
|
websocket.param_dict_asr_online = {"cache": dict()}
|
||||||
|
websocket.param_dict_vad = {'in_cache': dict(), "is_final": True}
|
||||||
|
websocket.param_dict_asr_online["is_final"]=True
|
||||||
|
audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
|
||||||
|
inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
|
||||||
|
inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
|
||||||
|
await websocket.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def clear_websocket():
|
||||||
|
for websocket in websocket_users:
|
||||||
|
await ws_reset(websocket)
|
||||||
|
websocket_users.clear()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def ws_serve(websocket, path):
|
async def ws_serve(websocket, path):
|
||||||
frames = []
|
frames = []
|
||||||
frames_asr = []
|
frames_asr = []
|
||||||
frames_asr_online = []
|
frames_asr_online = []
|
||||||
global websocket_users
|
global websocket_users
|
||||||
|
await clear_websocket()
|
||||||
websocket_users.add(websocket)
|
websocket_users.add(websocket)
|
||||||
websocket.param_dict_asr = {}
|
websocket.param_dict_asr = {}
|
||||||
websocket.param_dict_asr_online = {"cache": dict()}
|
websocket.param_dict_asr_online = {"cache": dict()}
|
||||||
@ -139,7 +159,8 @@ async def ws_serve(websocket, path):
|
|||||||
|
|
||||||
|
|
||||||
except websockets.ConnectionClosed:
|
except websockets.ConnectionClosed:
|
||||||
print("ConnectionClosed...", websocket_users)
|
print("ConnectionClosed...", websocket_users,flush=True)
|
||||||
|
await ws_reset(websocket)
|
||||||
websocket_users.remove(websocket)
|
websocket_users.remove(websocket)
|
||||||
except websockets.InvalidState:
|
except websockets.InvalidState:
|
||||||
print("InvalidState...")
|
print("InvalidState...")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user