diff --git a/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py b/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py index 3db6f7d62..9e80d2be3 100644 --- a/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py +++ b/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py @@ -7,8 +7,9 @@ https://arxiv.org/abs/2303.05397 from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -# 初始化推理 pipeline -# 当以原始音频作为输入时使用配置文件 sond.yaml,并设置 mode 为sond_demo +# initialize the pipeline for inference +# when using the raw waveform files to inference, please use the config file `sond.yaml` +# and set mode to `sond_demo` inference_diar_pipline = pipeline( mode="sond_demo", num_workers=0, @@ -19,7 +20,8 @@ inference_diar_pipline = pipeline( sv_model_revision="master", ) -# 以 audio_list 作为输入,其中第一个音频为待检测语音,后面的音频为不同说话人的声纹注册语音 +# use audio_list as the input, where the first one is the record to be detected +# and the following files are enrollments for different speakers audio_list = [ "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/record.wav", "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_A.wav", diff --git a/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py b/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py index db101930e..dc867b006 100644 --- a/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py +++ b/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py @@ -7,8 +7,9 @@ https://arxiv.org/abs/2211.10243 from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks -# 初始化推理 pipeline -# 当以原始音频作为输入时使用配置文件 sond.yaml,并设置 mode 为sond_demo +# initialize the pipeline for inference +# when using the raw waveform files to inference, please use the config file `sond.yaml` +# and set mode to `sond_demo` inference_diar_pipline = pipeline( mode="sond_demo", num_workers=0, @@ -19,7 +20,8 @@ inference_diar_pipline = pipeline( sv_model_revision="master", ) -# 以 audio_list 作为输入,其中第一个音频为待检测语音,后面的音频为不同说话人的声纹注册语音 +# use audio_list as the input, where the first one is the record to be detected +# and the following files are enrollments for different speakers audio_list = [ "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav", "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav", diff --git a/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py b/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py index c51313db0..7a5382740 100644 --- a/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py +++ b/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py @@ -7,13 +7,13 @@ if __name__ == '__main__': model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch' ) - # 两个语音为相同说话人 + # the same speaker rec_result = inference_sv_pipline(audio_in=( 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav', 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav')) print("Similarity", rec_result["scores"]) - # 两个语音为不同说话人 + # different speaker rec_result = inference_sv_pipline(audio_in=( 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav', 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav')) diff --git a/funasr/bin/diar_inference_launch.py b/funasr/bin/diar_inference_launch.py index 69d37d6d3..e0d900e76 100755 --- a/funasr/bin/diar_inference_launch.py +++ b/funasr/bin/diar_inference_launch.py @@ -38,7 +38,6 @@ from typeguard import check_return_type from scipy.signal import medfilt from funasr.utils.cli_utils import get_commandline_args from funasr.tasks.diar import DiarTask -from funasr.tasks.asr import ASRTask from funasr.tasks.diar import EENDOLADiarTask from funasr.torch_utils.device_funcs import to_device from funasr.torch_utils.set_all_random_seed import set_all_random_seed @@ -187,7 +186,7 @@ def inference_sond( raise TypeError("raw_inputs must be a list or tuple in [speech, profile1, profile2, ...] ") else: # 3. Build data-iterator - loader = ASRTask.build_streaming_iterator( + loader = DiarTask.build_streaming_iterator( data_path_and_name_and_type, dtype=dtype, batch_size=batch_size, diff --git a/funasr/bin/sv_infer.py b/funasr/bin/sv_infer.py index 9761497ae..1517bfa79 100755 --- a/funasr/bin/sv_infer.py +++ b/funasr/bin/sv_infer.py @@ -23,7 +23,6 @@ from typeguard import check_return_type from funasr.utils.cli_utils import get_commandline_args from funasr.tasks.sv import SVTask -from funasr.tasks.asr import ASRTask from funasr.torch_utils.device_funcs import to_device from funasr.torch_utils.set_all_random_seed import set_all_random_seed from funasr.utils import config_argparse diff --git a/funasr/bin/sv_inference_launch.py b/funasr/bin/sv_inference_launch.py index 8e00730f5..dbddd9fc6 100755 --- a/funasr/bin/sv_inference_launch.py +++ b/funasr/bin/sv_inference_launch.py @@ -34,7 +34,6 @@ from typeguard import check_return_type from funasr.utils.cli_utils import get_commandline_args from funasr.tasks.sv import SVTask -from funasr.tasks.asr import ASRTask from funasr.torch_utils.device_funcs import to_device from funasr.torch_utils.set_all_random_seed import set_all_random_seed from funasr.utils import config_argparse @@ -115,7 +114,7 @@ def inference_sv( data_path_and_name_and_type = [raw_inputs, "speech", "waveform"] # 3. Build data-iterator - loader = ASRTask.build_streaming_iterator( + loader = SVTask.build_streaming_iterator( data_path_and_name_and_type, dtype=dtype, batch_size=batch_size,