From 3b42ace3d49c0cc66e68df5e45c06cb764b051dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=97=E6=B5=A9?= Date: Mon, 27 Feb 2023 12:22:34 +0800 Subject: [PATCH] fixbug for sd and sv --- .../diarization/sond/unit_test_modelscope.py | 67 +++++++++++++++++++ funasr/bin/sond_inference.py | 4 +- 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 egs/alimeeting/diarization/sond/unit_test_modelscope.py diff --git a/egs/alimeeting/diarization/sond/unit_test_modelscope.py b/egs/alimeeting/diarization/sond/unit_test_modelscope.py new file mode 100644 index 000000000..af3fae833 --- /dev/null +++ b/egs/alimeeting/diarization/sond/unit_test_modelscope.py @@ -0,0 +1,67 @@ +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +import numpy as np +import os + + +def test_wav_cpu_infer(): + output_dir = "./outputs" + data_path_and_name_and_type = [ + "data/unit_test/test_wav.scp,speech,sound", + "data/unit_test/test_profile.scp,profile,kaldi_ark", + ] + diar_pipeline = pipeline( + task=Tasks.speaker_diarization, + model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch', + mode="sond", + output_dir=output_dir, + num_workers=0, + log_level="WARNING", + ) + results = diar_pipeline(data_path_and_name_and_type) + print(results) + + +def test_wav_gpu_infer(): + output_dir = "./outputs" + data_path_and_name_and_type = [ + "data/unit_test/test_wav.scp,speech,sound", + "data/unit_test/test_profile.scp,profile,kaldi_ark", + ] + diar_pipeline = pipeline( + task=Tasks.speaker_diarization, + model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch', + mode="sond", + output_dir=output_dir, + num_workers=0, + log_level="WARNING", + ) + results = diar_pipeline(data_path_and_name_and_type) + print(results) + + +def test_without_profile_gpu_infer(): + raw_inputs = [ + "data/unit_test/raw_inputs/record.wav", + "data/unit_test/raw_inputs/spk1.wav", + "data/unit_test/raw_inputs/spk2.wav", + "data/unit_test/raw_inputs/spk3.wav", + "data/unit_test/raw_inputs/spk4.wav" + ] + diar_pipeline = pipeline( + task=Tasks.speaker_diarization, + model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch', + mode="sond_demo", + num_workers=0, + log_level="WARNING", + param_dict={}, + ) + results = diar_pipeline(raw_inputs=raw_inputs) + print(results) + + +if __name__ == '__main__': + os.environ["CUDA_VISIBLE_DEVICES"] = "0" + test_wav_cpu_infer() + test_wav_gpu_infer() + test_without_profile_gpu_infer() diff --git a/funasr/bin/sond_inference.py b/funasr/bin/sond_inference.py index 47675777a..7632a22fb 100755 --- a/funasr/bin/sond_inference.py +++ b/funasr/bin/sond_inference.py @@ -312,7 +312,7 @@ def inference_modelscope( def _forward( data_path_and_name_and_type: Sequence[Tuple[str, str, str]] = None, - raw_inputs: List[List[Union[np.ndarray, torch.Tensor, str]]] = None, + raw_inputs: List[List[Union[np.ndarray, torch.Tensor, str, bytes]]] = None, output_dir_v2: Optional[str] = None, param_dict: Optional[dict] = None, ): @@ -321,6 +321,8 @@ def inference_modelscope( if isinstance(raw_inputs, (list, tuple)): assert all([len(example) >= 2 for example in raw_inputs]), \ "The length of test case in raw_inputs must larger than 1 (>=2)." + if not isinstance(raw_inputs, List): + raw_inputs = [raw_inputs] def prepare_dataset(): for idx, example in enumerate(raw_inputs):