mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Merge branch 'dev_infer' of https://github.com/alibaba/FunASR into dev_infer
This commit is contained in:
commit
e1ba6bc138
@ -7,8 +7,9 @@ https://arxiv.org/abs/2303.05397
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
|
||||
# 初始化推理 pipeline
|
||||
# 当以原始音频作为输入时使用配置文件 sond.yaml,并设置 mode 为sond_demo
|
||||
# initialize the pipeline for inference
|
||||
# when using the raw waveform files to inference, please use the config file `sond.yaml`
|
||||
# and set mode to `sond_demo`
|
||||
inference_diar_pipline = pipeline(
|
||||
mode="sond_demo",
|
||||
num_workers=0,
|
||||
@ -19,7 +20,8 @@ inference_diar_pipline = pipeline(
|
||||
sv_model_revision="master",
|
||||
)
|
||||
|
||||
# 以 audio_list 作为输入,其中第一个音频为待检测语音,后面的音频为不同说话人的声纹注册语音
|
||||
# use audio_list as the input, where the first one is the record to be detected
|
||||
# and the following files are enrollments for different speakers
|
||||
audio_list = [
|
||||
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/record.wav",
|
||||
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_A.wav",
|
||||
|
||||
@ -7,8 +7,9 @@ https://arxiv.org/abs/2211.10243
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
|
||||
# 初始化推理 pipeline
|
||||
# 当以原始音频作为输入时使用配置文件 sond.yaml,并设置 mode 为sond_demo
|
||||
# initialize the pipeline for inference
|
||||
# when using the raw waveform files to inference, please use the config file `sond.yaml`
|
||||
# and set mode to `sond_demo`
|
||||
inference_diar_pipline = pipeline(
|
||||
mode="sond_demo",
|
||||
num_workers=0,
|
||||
@ -19,7 +20,8 @@ inference_diar_pipline = pipeline(
|
||||
sv_model_revision="master",
|
||||
)
|
||||
|
||||
# 以 audio_list 作为输入,其中第一个音频为待检测语音,后面的音频为不同说话人的声纹注册语音
|
||||
# use audio_list as the input, where the first one is the record to be detected
|
||||
# and the following files are enrollments for different speakers
|
||||
audio_list = [
|
||||
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
|
||||
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
|
||||
|
||||
@ -7,13 +7,13 @@ if __name__ == '__main__':
|
||||
model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
|
||||
)
|
||||
|
||||
# 两个语音为相同说话人
|
||||
# the same speaker
|
||||
rec_result = inference_sv_pipline(audio_in=(
|
||||
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
|
||||
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav'))
|
||||
print("Similarity", rec_result["scores"])
|
||||
|
||||
# 两个语音为不同说话人
|
||||
# different speaker
|
||||
rec_result = inference_sv_pipline(audio_in=(
|
||||
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
|
||||
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav'))
|
||||
|
||||
@ -38,7 +38,6 @@ from typeguard import check_return_type
|
||||
from scipy.signal import medfilt
|
||||
from funasr.utils.cli_utils import get_commandline_args
|
||||
from funasr.tasks.diar import DiarTask
|
||||
from funasr.tasks.asr import ASRTask
|
||||
from funasr.tasks.diar import EENDOLADiarTask
|
||||
from funasr.torch_utils.device_funcs import to_device
|
||||
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
|
||||
@ -187,7 +186,7 @@ def inference_sond(
|
||||
raise TypeError("raw_inputs must be a list or tuple in [speech, profile1, profile2, ...] ")
|
||||
else:
|
||||
# 3. Build data-iterator
|
||||
loader = ASRTask.build_streaming_iterator(
|
||||
loader = DiarTask.build_streaming_iterator(
|
||||
data_path_and_name_and_type,
|
||||
dtype=dtype,
|
||||
batch_size=batch_size,
|
||||
|
||||
@ -23,7 +23,6 @@ from typeguard import check_return_type
|
||||
|
||||
from funasr.utils.cli_utils import get_commandline_args
|
||||
from funasr.tasks.sv import SVTask
|
||||
from funasr.tasks.asr import ASRTask
|
||||
from funasr.torch_utils.device_funcs import to_device
|
||||
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
|
||||
from funasr.utils import config_argparse
|
||||
|
||||
@ -34,7 +34,6 @@ from typeguard import check_return_type
|
||||
|
||||
from funasr.utils.cli_utils import get_commandline_args
|
||||
from funasr.tasks.sv import SVTask
|
||||
from funasr.tasks.asr import ASRTask
|
||||
from funasr.torch_utils.device_funcs import to_device
|
||||
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
|
||||
from funasr.utils import config_argparse
|
||||
@ -115,7 +114,7 @@ def inference_sv(
|
||||
data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
|
||||
|
||||
# 3. Build data-iterator
|
||||
loader = ASRTask.build_streaming_iterator(
|
||||
loader = SVTask.build_streaming_iterator(
|
||||
data_path_and_name_and_type,
|
||||
dtype=dtype,
|
||||
batch_size=batch_size,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user