funasr1.0

2025-09-15 14:48:36 +08:00 · 2023-12-27 16:43:30 +08:00 · 2023-12-27 16:43:30 +08:00 · c6d6c932a0
commit c6d6c932a0
parent 523e902edb
13 changed files with 126 additions and 66 deletions
--- a/examples/industrial_data_pretraining/fsmn-vad/demo.py
+++ b/examples/industrial_data_pretraining/fsmn-vad/demo.py
@ -0,0 +1,11 @@
 #!/usr/bin/env python3
 # -*- encoding: utf-8 -*-
 # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
 #  MIT License  (https://opensource.org/licenses/MIT)
 from funasr import AutoModel
 model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch")
 res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav")
 print(res)
--- a/examples/industrial_data_pretraining/fsmn-vad/infer.sh
+++ b/examples/industrial_data_pretraining/fsmn-vad/infer.sh
@ -1,8 +1,13 @@
-cmd="funasr/bin/inference.py"
+# download model
 local_path_root=./modelscope_models
 mkdir -p ${local_path_root}
 local_path=${local_path_root}/speech_fsmn_vad_zh-cn-16k-common-pytorch
 git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git ${local_path}
-python $cmd \
+
-+model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \
+python funasr/bin/inference.py \
-+input="/Users/zhifu/Downloads/asr_example.wav" \
+model="${local_path}" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2_vad" \
+input="${local_path}/example/vad_example.wav" \
 +output_dir="./outputs/debug" \
 +device="cpu" \
--- a/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py
+++ b/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py
@ -0,0 +1,12 @@
 #!/usr/bin/env python3
 # -*- encoding: utf-8 -*-
 # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
 #  MIT License  (https://opensource.org/licenses/MIT)
 from funasr import AutoModel
 model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404")
 res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav",
            hotword='达魔院 魔搭')
 print(res)
--- a/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh
+++ b/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh
@ -0,0 +1,15 @@
 # download model
 local_path_root=./modelscope_models
 mkdir -p ${local_path_root}
 local_path=${local_path_root}/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404
 git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
 python funasr/bin/inference.py \
 +model="${local_path}" \
 +input="${local_path}/example/asr_example.wav" \
 +output_dir="./outputs/debug" \
 +device="cpu" \
 +"hotword='达魔院 魔搭'"
--- a/examples/industrial_data_pretraining/paraformer-large-long/demo.py
+++ b/examples/industrial_data_pretraining/paraformer-large-long/demo.py
@ -0,0 +1,14 @@
 #!/usr/bin/env python3
 # -*- encoding: utf-8 -*-
 # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
 #  MIT License  (https://opensource.org/licenses/MIT)
 from funasr import AutoModel
 model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
                  vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch",
                  punc_model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
                  )
 res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav", batch_size_s=300, batch_size_threshold_s=60)
 print(res)
--- a/examples/industrial_data_pretraining/paraformer-large-long/infer.sh
+++ b/examples/industrial_data_pretraining/paraformer-large-long/infer.sh
@ -1,32 +1,26 @@
-cmd="funasr/bin/inference.py"
+# download model
 local_path_root=./modelscope_models
 mkdir -p ${local_path_root}
-python $cmd \
+local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
-+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
+git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
-+vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \
+
-+punc_model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" \
+local_path_vad=${local_path_root}/speech_fsmn_vad_zh-cn-16k-common-pytorch
-+input="/Users/zhifu/funasr_github/test_local/vad_example.wav" \
+git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path_vad}
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
+
 local_path_punc=${local_path_root}/punc_ct-transformer_zh-cn-common-vocab272727-pytorch
 git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path_punc}
 python funasr/bin/inference.py \
 +model="${local_path}" \
 +vad_model="${local_path_vad}"
 +punc_model="${local_path_punc}"
 +input="${local_path}/example/asr_example.wav" \
 +output_dir="./outputs/debug" \
 +device="cpu" \
 +batch_size_s=300 \
 +batch_size_threshold_s=60 \
 +debug="true"
 #python $cmd \
 #+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
 #+input="/Users/zhifu/Downloads/asr_example.wav" \
 #+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
 #+device="cpu" \
 #+"hotword='达魔院 魔搭'"
 #+input="/Users/zhifu/funasr_github/test_local/wav.scp"
 #+input="/Users/zhifu/funasr_github/test_local/asr_example.wav" \
 #+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \
 #+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \
 #+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
 #+model="/Users/zhifu/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
 #+model="/Users/zhifu/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
 #+"hotword='达魔院 魔搭'"
 #+vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \
--- a/examples/industrial_data_pretraining/paraformer-large/demo.py
+++ b/examples/industrial_data_pretraining/paraformer-large/demo.py
@ -0,0 +1,11 @@
 #!/usr/bin/env python3
 # -*- encoding: utf-8 -*-
 # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
 #  MIT License  (https://opensource.org/licenses/MIT)
 from funasr import AutoModel
 model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch")
 res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav")
 print(res)
--- a/examples/industrial_data_pretraining/paraformer-large/infer.sh
+++ b/examples/industrial_data_pretraining/paraformer-large/infer.sh
@ -1,23 +1,14 @@
-cmd="funasr/bin/inference.py"
+# download model
 local_path_root=./modelscope_models
 mkdir -p ${local_path_root}
 local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
 git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
-python $cmd \
+
-+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
+python funasr/bin/inference.py \
-+input="/Users/zhifu/Downloads/asr_example.wav" \
+model="${local_path}" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
+input="${local_path}/example/asr_example.wav" \
 +output_dir="./outputs/debug" \
 +device="cpu" \
 python $cmd \
 +model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
 +input="/Users/zhifu/Downloads/asr_example.wav" \
 +output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
 +device="cpu" \
 +"hotword='达魔院 魔搭'"
 #+input="/Users/zhifu/funasr_github/test_local/asr_example.wav" \
 #+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \
 #+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
 #+model="/Users/zhifu/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
 #+model="/Users/zhifu/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
 #+"hotword='达魔院 魔搭'"
--- a/examples/industrial_data_pretraining/punc/demo.py
+++ b/examples/industrial_data_pretraining/punc/demo.py
@ -0,0 +1,11 @@
 #!/usr/bin/env python3
 # -*- encoding: utf-8 -*-
 # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
 #  MIT License  (https://opensource.org/licenses/MIT)
 from funasr import AutoModel
 model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch")
 res = model(input="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/example/punc_example.txt")
 print(res)
--- a/examples/industrial_data_pretraining/punc/infer.sh
+++ b/examples/industrial_data_pretraining/punc/infer.sh
@ -1,18 +1,14 @@
-cmd="funasr/bin/inference.py"
+# download model
 local_path_root=./modelscope_models
 mkdir -p ${local_path_root}
 local_path=${local_path_root}/punc_ct-transformer_zh-cn-common-vocab272727-pytorch
 git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
-python $cmd \
+
-+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \
+python funasr/bin/inference.py \
-+model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" \
+model="${local_path}" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2_punc" \
+input="${local_path}/example/punc_example.txt" \
 +output_dir="./outputs/debug" \
 +device="cpu" \
 +debug="true"
 #+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \
 #+"input='跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益'" \
 #+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \
 #+"input='那今天的会就到这里吧 happy new year 明年见'" \
--- a/funasr/bin/inference.py
+++ b/funasr/bin/inference.py
@ -339,7 +339,7 @@ class AutoModel:
 			# sentences = time_stamp_sentence(model.punc_list, model.sentence_end_id, results_ret_list[i]["timestamp"], res[i]["text"])
 			# results_ret_list[i]["time_stamp"] = res[0]["text_postprocessed_punc"]
 			# results_ret_list[i]["sentences"] = sentences
-			# results_ret_list[i]["text_with_punc"] = res[i]["text"]
+			results_ret_list[i]["text_with_punc"] = res[i]["text"]
 		pbar_total.update(1)
 		end_total = time.time()
--- a/funasr/models/neat_contextual_paraformer/model.py
+++ b/funasr/models/neat_contextual_paraformer/model.py
@ -417,7 +417,7 @@ class NeatContextualParaformer(Paraformer):
 					text = tokenizer.tokens2text(token)
 					text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
-					result_i = {"key": key[i], "token": token, "text": text, "text_postprocessed": text_postprocessed}
+					result_i = {"key": key[i], "text": text_postprocessed}
 					if ibest_writer is not None:
 						ibest_writer["token"][key[i]] = " ".join(token)
--- a/funasr/models/paraformer/model.py
+++ b/funasr/models/paraformer/model.py
@ -535,7 +535,7 @@ class Paraformer(nn.Module):
 					text = tokenizer.tokens2text(token)
 					text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
-					result_i = {"key": key[i], "text_postprocessed": text_postprocessed}
+					result_i = {"key": key[i], "text": text_postprocessed}
 					if ibest_writer is not None: