mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
update paraformer-speaker pipeline
This commit is contained in:
parent
8a0930d682
commit
ac6afabdd1
@ -0,0 +1,17 @@
|
|||||||
|
from modelscope.pipelines import pipeline
|
||||||
|
from modelscope.utils.constant import Tasks
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
audio_in = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/vad_example.wav'
|
||||||
|
output_dir = "./results"
|
||||||
|
inference_pipeline = pipeline(
|
||||||
|
task=Tasks.auto_speech_recognition,
|
||||||
|
model='damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn',
|
||||||
|
vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch',
|
||||||
|
#punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
|
||||||
|
punc_model='damo/punc_ct-transformer_cn-en-common-vocab471067-large',
|
||||||
|
output_dir=output_dir,
|
||||||
|
)
|
||||||
|
rec_result = inference_pipeline(audio_in=audio_in, batch_size_token=5000, batch_size_token_threshold_s=40, max_single_segment_time=6000)
|
||||||
|
print(rec_result)
|
||||||
|
|
||||||
@ -787,7 +787,7 @@ def inference_paraformer_vad_speaker(
|
|||||||
time_stamp_writer: bool = True,
|
time_stamp_writer: bool = True,
|
||||||
punc_infer_config: Optional[str] = None,
|
punc_infer_config: Optional[str] = None,
|
||||||
punc_model_file: Optional[str] = None,
|
punc_model_file: Optional[str] = None,
|
||||||
sv_model_file: Optional[str] = None,
|
sv_model_file: Optional[str] = "~/.cache/modelscope/hub/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/campplus_cn_common.bin",
|
||||||
streaming: bool = False,
|
streaming: bool = False,
|
||||||
embedding_node: str = "resnet1_dense",
|
embedding_node: str = "resnet1_dense",
|
||||||
sv_threshold: float = 0.9465,
|
sv_threshold: float = 0.9465,
|
||||||
@ -933,7 +933,7 @@ def inference_paraformer_vad_speaker(
|
|||||||
##### speaker_verification #####
|
##### speaker_verification #####
|
||||||
##################################
|
##################################
|
||||||
# load sv model
|
# load sv model
|
||||||
sv_model_dict = torch.load(sv_model_file, map_location=torch.device('cpu'))
|
sv_model_dict = torch.load(sv_model_file.replace("~", os.environ['HOME']), map_location=torch.device('cpu'))
|
||||||
sv_model = CAMPPlus()
|
sv_model = CAMPPlus()
|
||||||
sv_model.load_state_dict(sv_model_dict)
|
sv_model.load_state_dict(sv_model_dict)
|
||||||
sv_model.eval()
|
sv_model.eval()
|
||||||
@ -1084,7 +1084,6 @@ def inference_paraformer_vad_speaker(
|
|||||||
logging.info("decoding, utt: {}, predictions: {}".format(key, text_postprocessed_punc))
|
logging.info("decoding, utt: {}, predictions: {}".format(key, text_postprocessed_punc))
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
distribute_spk(asr_result_list[0]['sentences'], sv_output)
|
distribute_spk(asr_result_list[0]['sentences'], sv_output)
|
||||||
import pdb; pdb.set_trace()
|
|
||||||
return asr_result_list
|
return asr_result_list
|
||||||
|
|
||||||
return _forward
|
return _forward
|
||||||
@ -2030,7 +2029,7 @@ def inference_launch(**kwargs):
|
|||||||
return inference_paraformer(**kwargs)
|
return inference_paraformer(**kwargs)
|
||||||
elif mode == "paraformer_streaming":
|
elif mode == "paraformer_streaming":
|
||||||
return inference_paraformer_online(**kwargs)
|
return inference_paraformer_online(**kwargs)
|
||||||
elif mode == "paraformer_vad_speaker":
|
elif mode.startswith("paraformer_vad_speaker"):
|
||||||
return inference_paraformer_vad_speaker(**kwargs)
|
return inference_paraformer_vad_speaker(**kwargs)
|
||||||
elif mode.startswith("paraformer_vad"):
|
elif mode.startswith("paraformer_vad"):
|
||||||
return inference_paraformer_vad_punc(**kwargs)
|
return inference_paraformer_vad_punc(**kwargs)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user