diff --git a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/README.md b/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/README.md deleted file mode 100644 index 6d9cd3024..000000000 --- a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# ModelScope Model - -## How to finetune and infer using a pretrained ModelScope Model - -### Inference - -Or you can use the finetuned model for inference directly. - -- Setting parameters in `infer.py` - - audio_in: # support wav, url, bytes, and parsed audio format. - - output_dir: # If the input format is wav.scp, it needs to be set. - -- Then you can run the pipeline to infer with: -```python - python infer.py -``` - - -Modify inference related parameters in vad.yaml. - -- max_end_silence_time: The end-point silence duration to judge the end of sentence, the parameter range is 500ms~6000ms, and the default value is 800ms -- speech_noise_thres: The balance of speech and silence scores, the parameter range is (-1,1) - - The value tends to -1, the greater probability of noise being judged as speech - - The value tends to 1, the greater probability of speech being judged as noise diff --git a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/infer.py b/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/infer.py deleted file mode 100755 index e11d5d21f..000000000 --- a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common-pytorch/infer.py +++ /dev/null @@ -1,15 +0,0 @@ -from modelscope.pipelines import pipeline -from modelscope.utils.constant import Tasks - -if __name__ == '__main__': - audio_in = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/vad_example.wav' - output_dir = None - inference_pipline = pipeline( - task=Tasks.auto_speech_recognition, - model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", - model_revision=None, - output_dir=output_dir, - batch_size=1, - ) - segments_result = inference_pipline(audio_in=audio_in) - print(segments_result)