From 12fdd421ef0830bd8b614d04fe8f790d2c29a112 Mon Sep 17 00:00:00 2001 From: onlybetheone Date: Thu, 9 Feb 2023 11:08:43 +0800 Subject: [PATCH] add speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline & speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online finetune and infer scripts --- .../finetune.py | 35 +++++++++++++++++++ .../infer.py | 13 +++++++ .../finetune.py | 35 +++++++++++++++++++ .../infer.py | 13 +++++++ 4 files changed, 96 insertions(+) create mode 100644 egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline/finetune.py create mode 100644 egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline/infer.py create mode 100644 egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/finetune.py create mode 100644 egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/infer.py diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline/finetune.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline/finetune.py new file mode 100644 index 000000000..3a90ed21f --- /dev/null +++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline/finetune.py @@ -0,0 +1,35 @@ +import os +from modelscope.metainfo import Trainers +from modelscope.trainers import build_trainer +from funasr.datasets.ms_dataset import MsDataset + + +def modelscope_finetune(params): + if not os.path.exists(params["output_dir"]): + os.makedirs(params["output_dir"], exist_ok=True) + # dataset split ["train", "validation"] + ds_dict = MsDataset.load(params["data_dir"]) + kwargs = dict( + model=params["model"], + model_revision=params["model_revision"], + data_dir=ds_dict, + dataset_type=params["dataset_type"], + work_dir=params["output_dir"], + batch_bins=params["batch_bins"], + max_epoch=params["max_epoch"], + lr=params["lr"]) + trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs) + trainer.train() + + +if __name__ == '__main__': + params = {} + params["output_dir"] = "./checkpoint" + params["data_dir"] = "./data" + params["batch_bins"] = 2000 + params["dataset_type"] = "small" + params["max_epoch"] = 50 + params["lr"] = 0.00005 + params["model"] = "damo/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline" + params["model_revision"] = None + modelscope_finetune(params) diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline/infer.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline/infer.py new file mode 100644 index 000000000..b7fcd5933 --- /dev/null +++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline/infer.py @@ -0,0 +1,13 @@ +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks + +if __name__ == "__main__": + audio_in = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_vi.wav" + output_dir = "./results" + inference_pipline = pipeline( + task=Tasks.auto_speech_recognition, + model="damo/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline", + output_dir=output_dir, + ) + rec_result = inference_pipline(audio_in=audio_in) + print(rec_result) diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/finetune.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/finetune.py new file mode 100644 index 000000000..5be25850a --- /dev/null +++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/finetune.py @@ -0,0 +1,35 @@ +import os +from modelscope.metainfo import Trainers +from modelscope.trainers import build_trainer +from funasr.datasets.ms_dataset import MsDataset + + +def modelscope_finetune(params): + if not os.path.exists(params["output_dir"]): + os.makedirs(params["output_dir"], exist_ok=True) + # dataset split ["train", "validation"] + ds_dict = MsDataset.load(params["data_dir"]) + kwargs = dict( + model=params["model"], + model_revision=params["model_revision"], + data_dir=ds_dict, + dataset_type=params["dataset_type"], + work_dir=params["output_dir"], + batch_bins=params["batch_bins"], + max_epoch=params["max_epoch"], + lr=params["lr"]) + trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs) + trainer.train() + + +if __name__ == '__main__': + params = {} + params["output_dir"] = "./checkpoint" + params["data_dir"] = "./data" + params["batch_bins"] = 2000 + params["dataset_type"] = "small" + params["max_epoch"] = 50 + params["lr"] = 0.00005 + params["model"] = "damo/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online" + params["model_revision"] = None + modelscope_finetune(params) diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/infer.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/infer.py new file mode 100644 index 000000000..869082b07 --- /dev/null +++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/infer.py @@ -0,0 +1,13 @@ +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks + +if __name__ == "__main__": + audio_in = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_vi.wav" + output_dir = "./results" + inference_pipline = pipeline( + task=Tasks.auto_speech_recognition, + model="damo/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online", + output_dir=output_dir, + ) + rec_result = inference_pipline(audio_in=audio_in) + print(rec_result)