funasr1.0

This commit is contained in:
游雁 2023-12-27 16:43:30 +08:00
parent 523e902edb
commit c6d6c932a0
13 changed files with 126 additions and 66 deletions

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from funasr import AutoModel
model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch")
res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav")
print(res)

View File

@ -1,8 +1,13 @@
cmd="funasr/bin/inference.py" # download model
local_path_root=./modelscope_models
mkdir -p ${local_path_root}
local_path=${local_path_root}/speech_fsmn_vad_zh-cn-16k-common-pytorch
git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git ${local_path}
python $cmd \
+model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \ python funasr/bin/inference.py \
+input="/Users/zhifu/Downloads/asr_example.wav" \ +model="${local_path}" \
+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2_vad" \ +input="${local_path}/example/vad_example.wav" \
+output_dir="./outputs/debug" \
+device="cpu" \ +device="cpu" \

View File

@ -0,0 +1,12 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from funasr import AutoModel
model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404")
res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav",
hotword='达魔院 魔搭')
print(res)

View File

@ -0,0 +1,15 @@
# download model
local_path_root=./modelscope_models
mkdir -p ${local_path_root}
local_path=${local_path_root}/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404
git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
python funasr/bin/inference.py \
+model="${local_path}" \
+input="${local_path}/example/asr_example.wav" \
+output_dir="./outputs/debug" \
+device="cpu" \
+"hotword='达魔院 魔搭'"

View File

@ -0,0 +1,14 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from funasr import AutoModel
model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch",
punc_model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
)
res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav", batch_size_s=300, batch_size_threshold_s=60)
print(res)

View File

@ -1,32 +1,26 @@
cmd="funasr/bin/inference.py" # download model
local_path_root=./modelscope_models
mkdir -p ${local_path_root}
python $cmd \ local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
+vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \
+punc_model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" \ local_path_vad=${local_path_root}/speech_fsmn_vad_zh-cn-16k-common-pytorch
+input="/Users/zhifu/funasr_github/test_local/vad_example.wav" \ git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path_vad}
+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
local_path_punc=${local_path_root}/punc_ct-transformer_zh-cn-common-vocab272727-pytorch
git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path_punc}
python funasr/bin/inference.py \
+model="${local_path}" \
+vad_model="${local_path_vad}"
+punc_model="${local_path_punc}"
+input="${local_path}/example/asr_example.wav" \
+output_dir="./outputs/debug" \
+device="cpu" \ +device="cpu" \
+batch_size_s=300 \ +batch_size_s=300 \
+batch_size_threshold_s=60 \ +batch_size_threshold_s=60 \
+debug="true" +debug="true"
#python $cmd \
#+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
#+input="/Users/zhifu/Downloads/asr_example.wav" \
#+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
#+device="cpu" \
#+"hotword='达魔院 魔搭'"
#+input="/Users/zhifu/funasr_github/test_local/wav.scp"
#+input="/Users/zhifu/funasr_github/test_local/asr_example.wav" \
#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \
#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \
#+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
#+"hotword='达魔院 魔搭'"
#+vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from funasr import AutoModel
model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch")
res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav")
print(res)

View File

@ -1,23 +1,14 @@
cmd="funasr/bin/inference.py" # download model
local_path_root=./modelscope_models
mkdir -p ${local_path_root}
local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
python $cmd \
+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ python funasr/bin/inference.py \
+input="/Users/zhifu/Downloads/asr_example.wav" \ +model="${local_path}" \
+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \ +input="${local_path}/example/asr_example.wav" \
+output_dir="./outputs/debug" \
+device="cpu" \ +device="cpu" \
python $cmd \
+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
+input="/Users/zhifu/Downloads/asr_example.wav" \
+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
+device="cpu" \
+"hotword='达魔院 魔搭'"
#+input="/Users/zhifu/funasr_github/test_local/asr_example.wav" \
#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \
#+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
#+"hotword='达魔院 魔搭'"

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from funasr import AutoModel
model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch")
res = model(input="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/example/punc_example.txt")
print(res)

View File

@ -1,18 +1,14 @@
cmd="funasr/bin/inference.py" # download model
local_path_root=./modelscope_models
mkdir -p ${local_path_root}
local_path=${local_path_root}/punc_ct-transformer_zh-cn-common-vocab272727-pytorch
git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
python $cmd \
+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \ python funasr/bin/inference.py \
+model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" \ +model="${local_path}" \
+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2_punc" \ +input="${local_path}/example/punc_example.txt" \
+output_dir="./outputs/debug" \
+device="cpu" \ +device="cpu" \
+debug="true" +debug="true"
#+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \
#+"input='跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益'" \
#+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \
#+"input='那今天的会就到这里吧 happy new year 明年见'" \

View File

@ -339,7 +339,7 @@ class AutoModel:
# sentences = time_stamp_sentence(model.punc_list, model.sentence_end_id, results_ret_list[i]["timestamp"], res[i]["text"]) # sentences = time_stamp_sentence(model.punc_list, model.sentence_end_id, results_ret_list[i]["timestamp"], res[i]["text"])
# results_ret_list[i]["time_stamp"] = res[0]["text_postprocessed_punc"] # results_ret_list[i]["time_stamp"] = res[0]["text_postprocessed_punc"]
# results_ret_list[i]["sentences"] = sentences # results_ret_list[i]["sentences"] = sentences
# results_ret_list[i]["text_with_punc"] = res[i]["text"] results_ret_list[i]["text_with_punc"] = res[i]["text"]
pbar_total.update(1) pbar_total.update(1)
end_total = time.time() end_total = time.time()

View File

@ -417,7 +417,7 @@ class NeatContextualParaformer(Paraformer):
text = tokenizer.tokens2text(token) text = tokenizer.tokens2text(token)
text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
result_i = {"key": key[i], "token": token, "text": text, "text_postprocessed": text_postprocessed} result_i = {"key": key[i], "text": text_postprocessed}
if ibest_writer is not None: if ibest_writer is not None:
ibest_writer["token"][key[i]] = " ".join(token) ibest_writer["token"][key[i]] = " ".join(token)

View File

@ -535,7 +535,7 @@ class Paraformer(nn.Module):
text = tokenizer.tokens2text(token) text = tokenizer.tokens2text(token)
text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
result_i = {"key": key[i], "text_postprocessed": text_postprocessed} result_i = {"key": key[i], "text": text_postprocessed}
if ibest_writer is not None: if ibest_writer is not None: