mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
funasr1.0 update (#1278)
This commit is contained in:
parent
37d7764ecf
commit
1159adbca0
@ -93,7 +93,7 @@ from funasr import AutoModel
|
|||||||
# use vad, punc, spk or not as you need
|
# use vad, punc, spk or not as you need
|
||||||
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
|
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
|
||||||
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
|
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
|
||||||
punc_model="ct-punc-c", punc_model_revision="v2.0.2",
|
punc_model="ct-punc-c", punc_model_revision="v2.0.3",
|
||||||
# spk_model="cam++", spk_model_revision="v2.0.2",
|
# spk_model="cam++", spk_model_revision="v2.0.2",
|
||||||
)
|
)
|
||||||
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
|
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
|
||||||
|
|||||||
@ -89,7 +89,7 @@ from funasr import AutoModel
|
|||||||
# use vad, punc, spk or not as you need
|
# use vad, punc, spk or not as you need
|
||||||
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
|
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
|
||||||
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
|
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
|
||||||
punc_model="ct-punc-c", punc_model_revision="v2.0.2",
|
punc_model="ct-punc-c", punc_model_revision="v2.0.3",
|
||||||
# spk_model="cam++", spk_model_revision="v2.0.2",
|
# spk_model="cam++", spk_model_revision="v2.0.2",
|
||||||
)
|
)
|
||||||
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
|
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
|
||||||
|
|||||||
@ -10,7 +10,7 @@ model = AutoModel(model="damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k
|
|||||||
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
|
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
|
||||||
vad_model_revision="v2.0.2",
|
vad_model_revision="v2.0.2",
|
||||||
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
|
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
|
||||||
punc_model_revision="v2.0.2",
|
punc_model_revision="v2.0.3",
|
||||||
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
|
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
|
||||||
spk_model_revision="v2.0.2",
|
spk_model_revision="v2.0.2",
|
||||||
)
|
)
|
||||||
|
|||||||
@ -4,7 +4,7 @@ model_revision="v2.0.2"
|
|||||||
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
||||||
vad_model_revision="v2.0.2"
|
vad_model_revision="v2.0.2"
|
||||||
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
||||||
punc_model_revision="v2.0.2"
|
punc_model_revision="v2.0.3"
|
||||||
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
|
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
|
||||||
spk_model_revision="v2.0.2"
|
spk_model_revision="v2.0.2"
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@ model = AutoModel(model="damo/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-co
|
|||||||
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
|
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
|
||||||
vad_model_revision="v2.0.2",
|
vad_model_revision="v2.0.2",
|
||||||
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
|
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
|
||||||
punc_model_revision="v2.0.2",
|
punc_model_revision="v2.0.3",
|
||||||
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
|
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
|
||||||
spk_model_revision="v2.0.2"
|
spk_model_revision="v2.0.2"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -4,7 +4,7 @@ model_revision="v2.0.2"
|
|||||||
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
||||||
vad_model_revision="v2.0.2"
|
vad_model_revision="v2.0.2"
|
||||||
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
||||||
punc_model_revision="v2.0.2"
|
punc_model_revision="v2.0.3"
|
||||||
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
|
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
|
||||||
spk_model_revision="v2.0.2"
|
spk_model_revision="v2.0.2"
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,12 @@
|
|||||||
|
|
||||||
from funasr import AutoModel
|
from funasr import AutoModel
|
||||||
|
|
||||||
model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.2")
|
model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.3",
|
||||||
|
# vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
|
||||||
|
# vad_model_revision="v2.0.2",
|
||||||
|
# punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
|
||||||
|
# punc_model_revision="v2.0.3",
|
||||||
|
)
|
||||||
|
|
||||||
res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
|
res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
|
||||||
print(res)
|
print(res)
|
||||||
|
|||||||
@ -10,7 +10,7 @@ model = AutoModel(model="damo/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-co
|
|||||||
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
|
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
|
||||||
vad_model_revision="v2.0.2",
|
vad_model_revision="v2.0.2",
|
||||||
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
|
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
|
||||||
punc_model_revision="v2.0.2",
|
punc_model_revision="v2.0.3",
|
||||||
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
|
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
|
||||||
spk_model_revision="v2.0.2",
|
spk_model_revision="v2.0.2",
|
||||||
)
|
)
|
||||||
|
|||||||
@ -4,7 +4,7 @@ model_revision="v2.0.2"
|
|||||||
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
|
||||||
vad_model_revision="v2.0.2"
|
vad_model_revision="v2.0.2"
|
||||||
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
|
||||||
punc_model_revision="v2.0.2"
|
punc_model_revision="v2.0.3"
|
||||||
|
|
||||||
python funasr/bin/inference.py \
|
python funasr/bin/inference.py \
|
||||||
+model=${model} \
|
+model=${model} \
|
||||||
|
|||||||
@ -391,7 +391,7 @@ class AutoModel:
|
|||||||
if self.punc_model is not None:
|
if self.punc_model is not None:
|
||||||
self.punc_kwargs.update(cfg)
|
self.punc_kwargs.update(cfg)
|
||||||
punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg)
|
punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg)
|
||||||
result["text_with_punc"] = punc_res[0]["text"]
|
result["text"] = punc_res[0]["text"]
|
||||||
|
|
||||||
# speaker embedding cluster after resorted
|
# speaker embedding cluster after resorted
|
||||||
if self.spk_model is not None:
|
if self.spk_model is not None:
|
||||||
|
|||||||
@ -451,7 +451,7 @@ class Paraformer(torch.nn.Module):
|
|||||||
self.nbest = kwargs.get("nbest", 1)
|
self.nbest = kwargs.get("nbest", 1)
|
||||||
|
|
||||||
meta_data = {}
|
meta_data = {}
|
||||||
if isinstance(data_in, torch.Tensor): # fbank
|
if isinstance(data_in, torch.Tensor) and kwargs.get("data_type", "sound") == "fbank": # fbank
|
||||||
speech, speech_lengths = data_in, data_lengths
|
speech, speech_lengths = data_in, data_lengths
|
||||||
if len(speech.shape) < 3:
|
if len(speech.shape) < 3:
|
||||||
speech = speech[None, :, :]
|
speech = speech[None, :, :]
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user