diff --git a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/README.md b/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/README.md
deleted file mode 100644
index 6d9cd3024..000000000
--- a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# ModelScope Model
-
-## How to finetune and infer using a pretrained ModelScope Model
-
-### Inference
-
-Or you can use the finetuned model for inference directly.
-
-- Setting parameters in `infer.py`
- - audio_in: # support wav, url, bytes, and parsed audio format.
- - output_dir: # If the input format is wav.scp, it needs to be set.
-
-- Then you can run the pipeline to infer with:
-```python
- python infer.py
-```
-
-
-Modify inference related parameters in vad.yaml.
-
-- max_end_silence_time: The end-point silence duration to judge the end of sentence, the parameter range is 500ms~6000ms, and the default value is 800ms
-- speech_noise_thres: The balance of speech and silence scores, the parameter range is (-1,1)
- - The value tends to -1, the greater probability of noise being judged as speech
- - The value tends to 1, the greater probability of speech being judged as noise
diff --git a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/infer.py b/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/infer.py
deleted file mode 100755
index e11d5d21f..000000000
--- a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/infer.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-
-if __name__ == '__main__':
- audio_in = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/vad_example.wav'
- output_dir = None
- inference_pipline = pipeline(
- task=Tasks.auto_speech_recognition,
- model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
- model_revision=None,
- output_dir=output_dir,
- batch_size=1,
- )
- segments_result = inference_pipline(audio_in=audio_in)
- print(segments_result)