From c6d6c932a047f49f80eca33954afb802136f02c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Wed, 27 Dec 2023 16:43:30 +0800 Subject: [PATCH] funasr1.0 --- .../fsmn-vad/demo.py | 11 +++++ .../fsmn-vad/infer.sh | 15 ++++--- .../neat_contextual_paraformer/demo.py | 12 +++++ .../neat_contextual_paraformer/infer.sh | 15 +++++++ .../paraformer-large-long/demo.py | 14 ++++++ .../paraformer-large-long/infer.sh | 44 ++++++++----------- .../paraformer-large/demo.py | 11 +++++ .../paraformer-large/infer.sh | 29 +++++------- .../industrial_data_pretraining/punc/demo.py | 11 +++++ .../industrial_data_pretraining/punc/infer.sh | 24 +++++----- funasr/bin/inference.py | 2 +- .../neat_contextual_paraformer/model.py | 2 +- funasr/models/paraformer/model.py | 2 +- 13 files changed, 126 insertions(+), 66 deletions(-) create mode 100644 examples/industrial_data_pretraining/fsmn-vad/demo.py create mode 100644 examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py create mode 100644 examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh create mode 100644 examples/industrial_data_pretraining/paraformer-large-long/demo.py create mode 100644 examples/industrial_data_pretraining/paraformer-large/demo.py create mode 100644 examples/industrial_data_pretraining/punc/demo.py diff --git a/examples/industrial_data_pretraining/fsmn-vad/demo.py b/examples/industrial_data_pretraining/fsmn-vad/demo.py new file mode 100644 index 000000000..b3e9bb6ea --- /dev/null +++ b/examples/industrial_data_pretraining/fsmn-vad/demo.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- +# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. +# MIT License (https://opensource.org/licenses/MIT) + +from funasr import AutoModel + +model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch") + +res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav") +print(res) \ No newline at end of file diff --git a/examples/industrial_data_pretraining/fsmn-vad/infer.sh b/examples/industrial_data_pretraining/fsmn-vad/infer.sh index 9bfd8ba1a..7662a5314 100644 --- a/examples/industrial_data_pretraining/fsmn-vad/infer.sh +++ b/examples/industrial_data_pretraining/fsmn-vad/infer.sh @@ -1,8 +1,13 @@ -cmd="funasr/bin/inference.py" +# download model +local_path_root=./modelscope_models +mkdir -p ${local_path_root} +local_path=${local_path_root}/speech_fsmn_vad_zh-cn-16k-common-pytorch +git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git ${local_path} -python $cmd \ -+model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \ -+input="/Users/zhifu/Downloads/asr_example.wav" \ -+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2_vad" \ + +python funasr/bin/inference.py \ ++model="${local_path}" \ ++input="${local_path}/example/vad_example.wav" \ ++output_dir="./outputs/debug" \ +device="cpu" \ diff --git a/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py b/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py new file mode 100644 index 000000000..b74aacdef --- /dev/null +++ b/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- +# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. +# MIT License (https://opensource.org/licenses/MIT) + +from funasr import AutoModel + +model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404") + +res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav", + hotword='达魔院 魔搭') +print(res) \ No newline at end of file diff --git a/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh b/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh new file mode 100644 index 000000000..4ae7d036f --- /dev/null +++ b/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh @@ -0,0 +1,15 @@ + +# download model +local_path_root=./modelscope_models +mkdir -p ${local_path_root} +local_path=${local_path_root}/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404 +git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path} + + +python funasr/bin/inference.py \ ++model="${local_path}" \ ++input="${local_path}/example/asr_example.wav" \ ++output_dir="./outputs/debug" \ ++device="cpu" \ ++"hotword='达魔院 魔搭'" + diff --git a/examples/industrial_data_pretraining/paraformer-large-long/demo.py b/examples/industrial_data_pretraining/paraformer-large-long/demo.py new file mode 100644 index 000000000..e45cae871 --- /dev/null +++ b/examples/industrial_data_pretraining/paraformer-large-long/demo.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- +# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. +# MIT License (https://opensource.org/licenses/MIT) + +from funasr import AutoModel + +model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", + vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch", + punc_model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch", + ) + +res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav", batch_size_s=300, batch_size_threshold_s=60) +print(res) \ No newline at end of file diff --git a/examples/industrial_data_pretraining/paraformer-large-long/infer.sh b/examples/industrial_data_pretraining/paraformer-large-long/infer.sh index 2e6ec0dba..fc2a09aa3 100644 --- a/examples/industrial_data_pretraining/paraformer-large-long/infer.sh +++ b/examples/industrial_data_pretraining/paraformer-large-long/infer.sh @@ -1,32 +1,26 @@ -cmd="funasr/bin/inference.py" +# download model +local_path_root=./modelscope_models +mkdir -p ${local_path_root} -python $cmd \ -+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ -+vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \ -+punc_model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" \ -+input="/Users/zhifu/funasr_github/test_local/vad_example.wav" \ -+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \ +local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch +git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path} + +local_path_vad=${local_path_root}/speech_fsmn_vad_zh-cn-16k-common-pytorch +git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path_vad} + +local_path_punc=${local_path_root}/punc_ct-transformer_zh-cn-common-vocab272727-pytorch +git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path_punc} + + +python funasr/bin/inference.py \ ++model="${local_path}" \ ++vad_model="${local_path_vad}" ++punc_model="${local_path_punc}" ++input="${local_path}/example/asr_example.wav" \ ++output_dir="./outputs/debug" \ +device="cpu" \ +batch_size_s=300 \ +batch_size_threshold_s=60 \ +debug="true" -#python $cmd \ -#+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \ -#+input="/Users/zhifu/Downloads/asr_example.wav" \ -#+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \ -#+device="cpu" \ -#+"hotword='达魔院 魔搭'" - -#+input="/Users/zhifu/funasr_github/test_local/wav.scp" -#+input="/Users/zhifu/funasr_github/test_local/asr_example.wav" \ -#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \ -#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \ -#+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ - -#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ -#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \ -#+"hotword='达魔院 魔搭'" - -#+vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \ diff --git a/examples/industrial_data_pretraining/paraformer-large/demo.py b/examples/industrial_data_pretraining/paraformer-large/demo.py new file mode 100644 index 000000000..11c52f5b6 --- /dev/null +++ b/examples/industrial_data_pretraining/paraformer-large/demo.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- +# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. +# MIT License (https://opensource.org/licenses/MIT) + +from funasr import AutoModel + +model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch") + +res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav") +print(res) \ No newline at end of file diff --git a/examples/industrial_data_pretraining/paraformer-large/infer.sh b/examples/industrial_data_pretraining/paraformer-large/infer.sh index 87260acfd..c7487e269 100644 --- a/examples/industrial_data_pretraining/paraformer-large/infer.sh +++ b/examples/industrial_data_pretraining/paraformer-large/infer.sh @@ -1,23 +1,14 @@ -cmd="funasr/bin/inference.py" +# download model +local_path_root=./modelscope_models +mkdir -p ${local_path_root} +local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch +git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path} -python $cmd \ -+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ -+input="/Users/zhifu/Downloads/asr_example.wav" \ -+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \ + +python funasr/bin/inference.py \ ++model="${local_path}" \ ++input="${local_path}/example/asr_example.wav" \ ++output_dir="./outputs/debug" \ +device="cpu" \ -python $cmd \ -+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \ -+input="/Users/zhifu/Downloads/asr_example.wav" \ -+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \ -+device="cpu" \ -+"hotword='达魔院 魔搭'" - -#+input="/Users/zhifu/funasr_github/test_local/asr_example.wav" \ -#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \ -#+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ - -#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ -#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \ -#+"hotword='达魔院 魔搭'" \ No newline at end of file diff --git a/examples/industrial_data_pretraining/punc/demo.py b/examples/industrial_data_pretraining/punc/demo.py new file mode 100644 index 000000000..d3b63db22 --- /dev/null +++ b/examples/industrial_data_pretraining/punc/demo.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- +# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. +# MIT License (https://opensource.org/licenses/MIT) + +from funasr import AutoModel + +model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch") + +res = model(input="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/example/punc_example.txt") +print(res) \ No newline at end of file diff --git a/examples/industrial_data_pretraining/punc/infer.sh b/examples/industrial_data_pretraining/punc/infer.sh index 367581502..f6c5c2354 100644 --- a/examples/industrial_data_pretraining/punc/infer.sh +++ b/examples/industrial_data_pretraining/punc/infer.sh @@ -1,18 +1,14 @@ -cmd="funasr/bin/inference.py" +# download model +local_path_root=./modelscope_models +mkdir -p ${local_path_root} +local_path=${local_path_root}/punc_ct-transformer_zh-cn-common-vocab272727-pytorch +git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path} -python $cmd \ -+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \ -+model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" \ -+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2_punc" \ + +python funasr/bin/inference.py \ ++model="${local_path}" \ ++input="${local_path}/example/punc_example.txt" \ ++output_dir="./outputs/debug" \ +device="cpu" \ +debug="true" - - -#+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \ - -#+"input='跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益'" \ - -#+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \ - -#+"input='那今天的会就到这里吧 happy new year 明年见'" \ \ No newline at end of file diff --git a/funasr/bin/inference.py b/funasr/bin/inference.py index 16ad0e2a9..c545c4d2e 100644 --- a/funasr/bin/inference.py +++ b/funasr/bin/inference.py @@ -339,7 +339,7 @@ class AutoModel: # sentences = time_stamp_sentence(model.punc_list, model.sentence_end_id, results_ret_list[i]["timestamp"], res[i]["text"]) # results_ret_list[i]["time_stamp"] = res[0]["text_postprocessed_punc"] # results_ret_list[i]["sentences"] = sentences - # results_ret_list[i]["text_with_punc"] = res[i]["text"] + results_ret_list[i]["text_with_punc"] = res[i]["text"] pbar_total.update(1) end_total = time.time() diff --git a/funasr/models/neat_contextual_paraformer/model.py b/funasr/models/neat_contextual_paraformer/model.py index d056ab980..939df3162 100644 --- a/funasr/models/neat_contextual_paraformer/model.py +++ b/funasr/models/neat_contextual_paraformer/model.py @@ -417,7 +417,7 @@ class NeatContextualParaformer(Paraformer): text = tokenizer.tokens2text(token) text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) - result_i = {"key": key[i], "token": token, "text": text, "text_postprocessed": text_postprocessed} + result_i = {"key": key[i], "text": text_postprocessed} if ibest_writer is not None: ibest_writer["token"][key[i]] = " ".join(token) diff --git a/funasr/models/paraformer/model.py b/funasr/models/paraformer/model.py index 1caed90f3..c54658574 100644 --- a/funasr/models/paraformer/model.py +++ b/funasr/models/paraformer/model.py @@ -535,7 +535,7 @@ class Paraformer(nn.Module): text = tokenizer.tokens2text(token) text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) - result_i = {"key": key[i], "text_postprocessed": text_postprocessed} + result_i = {"key": key[i], "text": text_postprocessed} if ibest_writer is not None: