Merge branch 'dev_infer' of https://github.com/alibaba/FunASR into dev_infer

This commit is contained in:
嘉渊 2023-05-17 15:16:06 +08:00
commit e1ba6bc138
6 changed files with 14 additions and 13 deletions

View File

@ -7,8 +7,9 @@ https://arxiv.org/abs/2303.05397
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
# 初始化推理 pipeline
# 当以原始音频作为输入时使用配置文件 sond.yaml并设置 mode 为sond_demo
# initialize the pipeline for inference
# when using the raw waveform files to inference, please use the config file `sond.yaml`
# and set mode to `sond_demo`
inference_diar_pipline = pipeline(
mode="sond_demo",
num_workers=0,
@ -19,7 +20,8 @@ inference_diar_pipline = pipeline(
sv_model_revision="master",
)
# 以 audio_list 作为输入,其中第一个音频为待检测语音,后面的音频为不同说话人的声纹注册语音
# use audio_list as the input, where the first one is the record to be detected
# and the following files are enrollments for different speakers
audio_list = [
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/record.wav",
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_A.wav",

View File

@ -7,8 +7,9 @@ https://arxiv.org/abs/2211.10243
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
# 初始化推理 pipeline
# 当以原始音频作为输入时使用配置文件 sond.yaml并设置 mode 为sond_demo
# initialize the pipeline for inference
# when using the raw waveform files to inference, please use the config file `sond.yaml`
# and set mode to `sond_demo`
inference_diar_pipline = pipeline(
mode="sond_demo",
num_workers=0,
@ -19,7 +20,8 @@ inference_diar_pipline = pipeline(
sv_model_revision="master",
)
# 以 audio_list 作为输入,其中第一个音频为待检测语音,后面的音频为不同说话人的声纹注册语音
# use audio_list as the input, where the first one is the record to be detected
# and the following files are enrollments for different speakers
audio_list = [
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",

View File

@ -7,13 +7,13 @@ if __name__ == '__main__':
model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
)
# 两个语音为相同说话人
# the same speaker
rec_result = inference_sv_pipline(audio_in=(
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav'))
print("Similarity", rec_result["scores"])
# 两个语音为不同说话人
# different speaker
rec_result = inference_sv_pipline(audio_in=(
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav'))

View File

@ -38,7 +38,6 @@ from typeguard import check_return_type
from scipy.signal import medfilt
from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.diar import DiarTask
from funasr.tasks.asr import ASRTask
from funasr.tasks.diar import EENDOLADiarTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
@ -187,7 +186,7 @@ def inference_sond(
raise TypeError("raw_inputs must be a list or tuple in [speech, profile1, profile2, ...] ")
else:
# 3. Build data-iterator
loader = ASRTask.build_streaming_iterator(
loader = DiarTask.build_streaming_iterator(
data_path_and_name_and_type,
dtype=dtype,
batch_size=batch_size,

View File

@ -23,7 +23,6 @@ from typeguard import check_return_type
from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.sv import SVTask
from funasr.tasks.asr import ASRTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.utils import config_argparse

View File

@ -34,7 +34,6 @@ from typeguard import check_return_type
from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.sv import SVTask
from funasr.tasks.asr import ASRTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.utils import config_argparse
@ -115,7 +114,7 @@ def inference_sv(
data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
# 3. Build data-iterator
loader = ASRTask.build_streaming_iterator(
loader = SVTask.build_streaming_iterator(
data_path_and_name_and_type,
dtype=dtype,
batch_size=batch_size,