diff --git a/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py b/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py
index 3db6f7d62..9e80d2be3 100644
--- a/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py
+++ b/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py
@@ -7,8 +7,9 @@ https://arxiv.org/abs/2303.05397
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 
-# 初始化推理 pipeline
-# 当以原始音频作为输入时使用配置文件 sond.yaml，并设置 mode 为sond_demo
+# initialize the pipeline for inference
+# when using the raw waveform files to inference, please use the config file `sond.yaml`
+# and set mode to `sond_demo`
 inference_diar_pipline = pipeline(
     mode="sond_demo",
     num_workers=0,
@@ -19,7 +20,8 @@ inference_diar_pipline = pipeline(
     sv_model_revision="master",
 )
 
-# 以 audio_list 作为输入，其中第一个音频为待检测语音，后面的音频为不同说话人的声纹注册语音
+# use audio_list as the input, where the first one is the record to be detected
+# and the following files are enrollments for different speakers
 audio_list = [
     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/record.wav",
     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_A.wav",
diff --git a/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py b/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py
index db101930e..dc867b006 100644
--- a/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py
+++ b/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py
@@ -7,8 +7,9 @@ https://arxiv.org/abs/2211.10243
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 
-# 初始化推理 pipeline
-# 当以原始音频作为输入时使用配置文件 sond.yaml，并设置 mode 为sond_demo
+# initialize the pipeline for inference
+# when using the raw waveform files to inference, please use the config file `sond.yaml`
+# and set mode to `sond_demo`
 inference_diar_pipline = pipeline(
     mode="sond_demo",
     num_workers=0,
@@ -19,7 +20,8 @@ inference_diar_pipline = pipeline(
     sv_model_revision="master",
 )
 
-# 以 audio_list 作为输入，其中第一个音频为待检测语音，后面的音频为不同说话人的声纹注册语音
+# use audio_list as the input, where the first one is the record to be detected
+# and the following files are enrollments for different speakers
 audio_list = [
     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
diff --git a/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py b/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py
index c51313db0..7a5382740 100644
--- a/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py
+++ b/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py
@@ -7,13 +7,13 @@ if __name__ == '__main__':
         model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
     )
 
-    # 两个语音为相同说话人
+    # the same speaker
     rec_result = inference_sv_pipline(audio_in=(
         'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
         'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav'))
     print("Similarity", rec_result["scores"])
 
-    # 两个语音为不同说话人
+    # different speaker
     rec_result = inference_sv_pipline(audio_in=(
         'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
         'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav'))
diff --git a/funasr/bin/diar_inference_launch.py b/funasr/bin/diar_inference_launch.py
index 69d37d6d3..e0d900e76 100755
--- a/funasr/bin/diar_inference_launch.py
+++ b/funasr/bin/diar_inference_launch.py
@@ -38,7 +38,6 @@ from typeguard import check_return_type
 from scipy.signal import medfilt
 from funasr.utils.cli_utils import get_commandline_args
 from funasr.tasks.diar import DiarTask
-from funasr.tasks.asr import ASRTask
 from funasr.tasks.diar import EENDOLADiarTask
 from funasr.torch_utils.device_funcs import to_device
 from funasr.torch_utils.set_all_random_seed import set_all_random_seed
@@ -187,7 +186,7 @@ def inference_sond(
                 raise TypeError("raw_inputs must be a list or tuple in [speech, profile1, profile2, ...] ")
         else:
             # 3. Build data-iterator
-            loader = ASRTask.build_streaming_iterator(
+            loader = DiarTask.build_streaming_iterator(
                 data_path_and_name_and_type,
                 dtype=dtype,
                 batch_size=batch_size,
diff --git a/funasr/bin/sv_infer.py b/funasr/bin/sv_infer.py
index 9761497ae..1517bfa79 100755
--- a/funasr/bin/sv_infer.py
+++ b/funasr/bin/sv_infer.py
@@ -23,7 +23,6 @@ from typeguard import check_return_type
 
 from funasr.utils.cli_utils import get_commandline_args
 from funasr.tasks.sv import SVTask
-from funasr.tasks.asr import ASRTask
 from funasr.torch_utils.device_funcs import to_device
 from funasr.torch_utils.set_all_random_seed import set_all_random_seed
 from funasr.utils import config_argparse
diff --git a/funasr/bin/sv_inference_launch.py b/funasr/bin/sv_inference_launch.py
index 8e00730f5..dbddd9fc6 100755
--- a/funasr/bin/sv_inference_launch.py
+++ b/funasr/bin/sv_inference_launch.py
@@ -34,7 +34,6 @@ from typeguard import check_return_type
 
 from funasr.utils.cli_utils import get_commandline_args
 from funasr.tasks.sv import SVTask
-from funasr.tasks.asr import ASRTask
 from funasr.torch_utils.device_funcs import to_device
 from funasr.torch_utils.set_all_random_seed import set_all_random_seed
 from funasr.utils import config_argparse
@@ -115,7 +114,7 @@ def inference_sv(
             data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
         
         # 3. Build data-iterator
-        loader = ASRTask.build_streaming_iterator(
+        loader = SVTask.build_streaming_iterator(
             data_path_and_name_and_type,
             dtype=dtype,
             batch_size=batch_size,