From 9afc91702a77235990988916bbb2e09abe4edaa3 Mon Sep 17 00:00:00 2001 From: "shixian.shi" Date: Wed, 27 Dec 2023 17:16:07 +0800 Subject: [PATCH 1/2] seaco paraformer inference --- .../contextual_paraformer/demo.py | 12 ++++++++++++ .../infer.sh | 2 +- .../neat_contextual_paraformer/demo.py | 12 ------------ .../seaco_paraformer/demo.py | 12 ++++++++++++ .../seaco_paraformer/infer.sh | 15 +++++++++++++++ .../__init__.py | 0 .../decoder.py | 0 .../model.py | 4 ++-- .../template.yaml | 2 +- 9 files changed, 43 insertions(+), 16 deletions(-) create mode 100644 examples/industrial_data_pretraining/contextual_paraformer/demo.py rename examples/industrial_data_pretraining/{neat_contextual_paraformer => contextual_paraformer}/infer.sh (92%) delete mode 100644 examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py create mode 100644 examples/industrial_data_pretraining/seaco_paraformer/demo.py create mode 100644 examples/industrial_data_pretraining/seaco_paraformer/infer.sh rename funasr/models/{neat_contextual_paraformer => contextual_paraformer}/__init__.py (100%) rename funasr/models/{neat_contextual_paraformer => contextual_paraformer}/decoder.py (100%) rename funasr/models/{neat_contextual_paraformer => contextual_paraformer}/model.py (99%) rename funasr/models/{neat_contextual_paraformer => contextual_paraformer}/template.yaml (98%) diff --git a/examples/industrial_data_pretraining/contextual_paraformer/demo.py b/examples/industrial_data_pretraining/contextual_paraformer/demo.py new file mode 100644 index 000000000..0edbb2a4f --- /dev/null +++ b/examples/industrial_data_pretraining/contextual_paraformer/demo.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- +# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. +# MIT License (https://opensource.org/licenses/MIT) + +from funasr import AutoModel + +model = AutoModel(model="../modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404") + +res = model(input="../modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav", + hotword='达魔院 魔搭') +print(res) \ No newline at end of file diff --git a/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh b/examples/industrial_data_pretraining/contextual_paraformer/infer.sh similarity index 92% rename from examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh rename to examples/industrial_data_pretraining/contextual_paraformer/infer.sh index 4ae7d036f..2dbc62752 100644 --- a/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh +++ b/examples/industrial_data_pretraining/contextual_paraformer/infer.sh @@ -1,6 +1,6 @@ # download model -local_path_root=./modelscope_models +local_path_root=../modelscope_models mkdir -p ${local_path_root} local_path=${local_path_root}/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404 git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path} diff --git a/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py b/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py deleted file mode 100644 index b74aacdef..000000000 --- a/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python3 -# -*- encoding: utf-8 -*- -# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. -# MIT License (https://opensource.org/licenses/MIT) - -from funasr import AutoModel - -model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404") - -res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav", - hotword='达魔院 魔搭') -print(res) \ No newline at end of file diff --git a/examples/industrial_data_pretraining/seaco_paraformer/demo.py b/examples/industrial_data_pretraining/seaco_paraformer/demo.py new file mode 100644 index 000000000..0edbb2a4f --- /dev/null +++ b/examples/industrial_data_pretraining/seaco_paraformer/demo.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- +# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. +# MIT License (https://opensource.org/licenses/MIT) + +from funasr import AutoModel + +model = AutoModel(model="../modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404") + +res = model(input="../modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav", + hotword='达魔院 魔搭') +print(res) \ No newline at end of file diff --git a/examples/industrial_data_pretraining/seaco_paraformer/infer.sh b/examples/industrial_data_pretraining/seaco_paraformer/infer.sh new file mode 100644 index 000000000..bfb4315f4 --- /dev/null +++ b/examples/industrial_data_pretraining/seaco_paraformer/infer.sh @@ -0,0 +1,15 @@ + +# download model +local_path_root=../modelscope_models +mkdir -p ${local_path_root} +local_path=${local_path_root}/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404 +git clone https://www.modelscope.cn/damo/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path} + + +python funasr/bin/inference.py \ ++model="${local_path}" \ ++input="${local_path}/example/asr_example.wav" \ ++output_dir="./outputs/debug" \ ++device="cpu" \ ++"hotword='达魔院 魔搭'" + diff --git a/funasr/models/neat_contextual_paraformer/__init__.py b/funasr/models/contextual_paraformer/__init__.py similarity index 100% rename from funasr/models/neat_contextual_paraformer/__init__.py rename to funasr/models/contextual_paraformer/__init__.py diff --git a/funasr/models/neat_contextual_paraformer/decoder.py b/funasr/models/contextual_paraformer/decoder.py similarity index 100% rename from funasr/models/neat_contextual_paraformer/decoder.py rename to funasr/models/contextual_paraformer/decoder.py diff --git a/funasr/models/neat_contextual_paraformer/model.py b/funasr/models/contextual_paraformer/model.py similarity index 99% rename from funasr/models/neat_contextual_paraformer/model.py rename to funasr/models/contextual_paraformer/model.py index 939df3162..1ca134d06 100644 --- a/funasr/models/neat_contextual_paraformer/model.py +++ b/funasr/models/contextual_paraformer/model.py @@ -55,8 +55,8 @@ from funasr.models.paraformer.model import Paraformer from funasr.register import tables -@tables.register("model_classes", "NeatContextualParaformer") -class NeatContextualParaformer(Paraformer): +@tables.register("model_classes", "ContextualParaformer") +class ContextualParaformer(Paraformer): """ Author: Speech Lab of DAMO Academy, Alibaba Group Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition diff --git a/funasr/models/neat_contextual_paraformer/template.yaml b/funasr/models/contextual_paraformer/template.yaml similarity index 98% rename from funasr/models/neat_contextual_paraformer/template.yaml rename to funasr/models/contextual_paraformer/template.yaml index 6efc62c8c..fbd15cea9 100644 --- a/funasr/models/neat_contextual_paraformer/template.yaml +++ b/funasr/models/contextual_paraformer/template.yaml @@ -6,7 +6,7 @@ # tables.print() # network architecture -model: NeatContextualParaformer +model: ContextualParaformer model_conf: ctc_weight: 0.0 lsm_weight: 0.1 From 840657e54eaa790c4c8e95e30dc3959acb679417 Mon Sep 17 00:00:00 2001 From: "shixian.shi" Date: Wed, 27 Dec 2023 17:22:59 +0800 Subject: [PATCH 2/2] paper link --- funasr/models/contextual_paraformer/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/funasr/models/contextual_paraformer/model.py b/funasr/models/contextual_paraformer/model.py index 1ca134d06..1bb2d3262 100644 --- a/funasr/models/contextual_paraformer/model.py +++ b/funasr/models/contextual_paraformer/model.py @@ -59,8 +59,8 @@ from funasr.register import tables class ContextualParaformer(Paraformer): """ Author: Speech Lab of DAMO Academy, Alibaba Group - Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition - https://arxiv.org/abs/2206.08317 + FunASR: A Fundamental End-to-End Speech Recognition Toolkit + https://arxiv.org/abs/2305.11013 """ def __init__(