From 2326aefc39d5070455eced7fa9acaadfb09eb154 Mon Sep 17 00:00:00 2001 From: Lizerui9926 Date: Fri, 2 Dec 2022 22:43:13 +0800 Subject: [PATCH] update modelscope details --- .../paraformer/paraformer_large_finetune.sh | 9 +++++---- .../aishell/paraformer/paraformer_large_infer.sh | 2 ++ .../paraformer/paraformer_large_finetune.sh | 9 +++++---- .../aishell2/paraformer/paraformer_large_infer.sh | 3 +++ .../common/modelscope_common_finetune.sh | 9 +++++---- egs_modelscope/common/modelscope_common_infer.sh | 15 ++++++--------- .../common/modelscope_utils/download_model.py | 6 +++++- .../common/modelscope_utils/modelscope_infer.sh | 15 ++++++--------- .../speechio/paraformer/paraformer_large_infer.sh | 2 ++ .../paraformer/paraformer_large_infer.sh | 2 ++ 10 files changed, 41 insertions(+), 31 deletions(-) diff --git a/egs_modelscope/aishell/paraformer/paraformer_large_finetune.sh b/egs_modelscope/aishell/paraformer/paraformer_large_finetune.sh index a68338fb9..3c7577417 100755 --- a/egs_modelscope/aishell/paraformer/paraformer_large_finetune.sh +++ b/egs_modelscope/aishell/paraformer/paraformer_large_finetune.sh @@ -11,7 +11,7 @@ njob=4 # the number of jobs for each gpu train_cmd=utils/run.pl # general configuration -feats_dir="." #feature output dictionary, for large data +feats_dir="../DATA" #feature output dictionary, for large data exp_dir="." lang=zh dumpdir=dump/fbank @@ -32,6 +32,7 @@ lfr_m=7 lfr_n=6 init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope during fine-tuning +model_revision="v1.0.3" # please do not modify the model revision cmvn_file=init_model/${init_model_name}/am.mvn seg_file=init_model/${init_model_name}/seg_dict vocab=init_model/${init_model_name}/tokens.txt @@ -53,7 +54,7 @@ valid_set=dev test_sets="dev test" asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml -init_param="init_model/${init_model_name}/${init_model_name}" +init_param="init_model/${init_model_name}/model.pb" inference_config=conf/decode_asr_transformer_noctc_1best.yaml inference_asr_model=valid.acc.ave_10best.pth @@ -61,7 +62,7 @@ inference_asr_model=valid.acc.ave_10best.pth . utils/parse_options.sh || exit 1; # download model from modelscope -python modelscope_utils/download_model.py --model_name ${init_model_name} +python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision} if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist" @@ -152,7 +153,7 @@ fi world_size=$gpu_num # run on one machine if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # update asr train config.yaml - python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml + python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml mkdir -p ${exp_dir}/exp/${model_dir} diff --git a/egs_modelscope/aishell/paraformer/paraformer_large_infer.sh b/egs_modelscope/aishell/paraformer/paraformer_large_infer.sh index 8e2c8f33d..ab65cdebd 100755 --- a/egs_modelscope/aishell/paraformer/paraformer_large_infer.sh +++ b/egs_modelscope/aishell/paraformer/paraformer_large_infer.sh @@ -8,6 +8,7 @@ ori_data= data_dir= exp_dir= model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch +model_revision="v1.0.3" # please do not modify the model revision inference_nj=32 gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') @@ -61,6 +62,7 @@ modelscope_utils/modelscope_infer.sh \ --exp_dir ${exp_dir}/aishell \ --test_sets "${test_sets}" \ --model_name ${model_name} \ + --model_revision ${model_revision} \ --inference_nj ${inference_nj} \ --gpuid_list ${gpuid_list} \ --njob ${njob} \ diff --git a/egs_modelscope/aishell2/paraformer/paraformer_large_finetune.sh b/egs_modelscope/aishell2/paraformer/paraformer_large_finetune.sh index d4b5dde73..b864370c1 100755 --- a/egs_modelscope/aishell2/paraformer/paraformer_large_finetune.sh +++ b/egs_modelscope/aishell2/paraformer/paraformer_large_finetune.sh @@ -11,7 +11,7 @@ njob=4 # the number of jobs for each gpu train_cmd=utils/run.pl # general configuration -feats_dir="." #feature output dictionary, for large data +feats_dir="../DATA" #feature output dictionary, for large data exp_dir="." lang=zh dumpdir=dump/fbank @@ -32,6 +32,7 @@ lfr_m=7 lfr_n=6 init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope during fine-tuning +model_revision="v1.0.3" # please do not modify the model revision cmvn_file=init_model/${init_model_name}/am.mvn seg_file=init_model/${init_model_name}/seg_dict vocab=init_model/${init_model_name}/tokens.txt @@ -54,7 +55,7 @@ valid_set=dev_ios test_sets="dev_ios test_android test_ios test_mic" asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml -init_param="init_model/${init_model_name}/${init_model_name}" +init_param="init_model/${init_model_name}/model.pb" inference_config=conf/decode_asr_transformer_noctc_1best.yaml inference_asr_model=valid.acc.ave_10best.pth @@ -62,7 +63,7 @@ inference_asr_model=valid.acc.ave_10best.pth . utils/parse_options.sh || exit 1; # download model from modelscope -python modelscope_utils/download_model.py --model_name ${init_model_name} +python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision} if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist" @@ -167,7 +168,7 @@ fi world_size=$gpu_num # run on one machine if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # update asr train config.yaml - python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml + python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml mkdir -p ${exp_dir}/exp/${model_dir} diff --git a/egs_modelscope/aishell2/paraformer/paraformer_large_infer.sh b/egs_modelscope/aishell2/paraformer/paraformer_large_infer.sh index 95b32fc75..2d5afd8f7 100755 --- a/egs_modelscope/aishell2/paraformer/paraformer_large_infer.sh +++ b/egs_modelscope/aishell2/paraformer/paraformer_large_infer.sh @@ -8,6 +8,7 @@ ori_data= data_dir= exp_dir= model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch +model_revision="v1.0.3" # please do not modify the model revision inference_nj=32 gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') @@ -20,6 +21,7 @@ else inference_nj=$njob fi +# LM configs use_lm=false beam_size=1 lm_weight=0.0 @@ -47,6 +49,7 @@ modelscope_utils/modelscope_infer.sh \ --exp_dir ${exp_dir}/aishell2 \ --test_sets "${test_sets}" \ --model_name ${model_name} \ + --model_revision ${model_revision} \ --inference_nj ${inference_nj} \ --gpuid_list ${gpuid_list} \ --njob ${njob} \ diff --git a/egs_modelscope/common/modelscope_common_finetune.sh b/egs_modelscope/common/modelscope_common_finetune.sh index a43083f0c..39cd03e89 100755 --- a/egs_modelscope/common/modelscope_common_finetune.sh +++ b/egs_modelscope/common/modelscope_common_finetune.sh @@ -11,7 +11,7 @@ njob=4 # the number of jobs for each gpu train_cmd=utils/run.pl # general configuration -feats_dir="." #feature output dictionary, for large data +feats_dir="../DATA" #feature output dictionary, for large data exp_dir="." lang=zh dumpdir=dump/fbank @@ -32,6 +32,7 @@ lfr_m=7 lfr_n=6 init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope during fine-tuning +model_revision="v1.0.3" # please do not modify the model revision cmvn_file=init_model/${init_model_name}/am.mvn seg_file=init_model/${init_model_name}/seg_dict vocab=init_model/${init_model_name}/tokens.txt @@ -53,7 +54,7 @@ valid_set=dev test_sets="dev test" asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml -init_param="init_model/${init_model_name}/${init_model_name}" +init_param="init_model/${init_model_name}/model.pb" inference_config=conf/decode_asr_transformer_noctc_1best.yaml inference_asr_model=valid.acc.ave_10best.pth @@ -61,7 +62,7 @@ inference_asr_model=valid.acc.ave_10best.pth . utils/parse_options.sh || exit 1; # download model from modelscope -python modelscope_utils/download_model.py --model_name ${init_model_name} +python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision} if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist" @@ -158,7 +159,7 @@ fi world_size=$gpu_num # run on one machine if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # update asr train config.yaml - python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml + python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml mkdir -p ${exp_dir}/exp/${model_dir} diff --git a/egs_modelscope/common/modelscope_common_infer.sh b/egs_modelscope/common/modelscope_common_infer.sh index 12b2cbcb2..9c607cb41 100755 --- a/egs_modelscope/common/modelscope_common_infer.sh +++ b/egs_modelscope/common/modelscope_common_infer.sh @@ -5,6 +5,7 @@ set -u set -o pipefail model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope +model_revision="v1.0.3" # please do not modify the model revision data_dir= # wav list, ${data_dir}/wav.scp exp_dir="exp" gpuid_list="0,1" @@ -29,7 +30,7 @@ beam_size=1 lm_weight=0.0 python modelscope_utils/download_model.py \ - --model_name ${model_name} + --model_name ${model_name} --model_revision ${model_revision} if [ -d ${exp_dir} ]; then echo "${exp_dir} is already exists. if you want to decode again, please delete ${exp_dir} first." @@ -50,12 +51,9 @@ done utils/split_scp.pl "${data_dir}/wav.scp" ${split_scps} if "${use_lm}"; then - cp ${exp_dir}/${model_name}/decode_asr_transformer.yaml ${exp_dir}/${model_name}/decode_asr_transformer.yaml.back - cp ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml.back - sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decode_asr_transformer.yaml - sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml - sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decode_asr_transformer.yaml - sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml + cp ${exp_dir}/${model_name}/decoding.yaml ${exp_dir}/${model_name}/decoding.yaml.back + sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decoding.yaml + sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decoding.yaml fi echo "Decoding started... log: '${_logdir}/asr_inference.*.log'" @@ -73,6 +71,5 @@ ${decode_cmd} --max-jobs-run "${inference_nj}" JOB=1:"${inference_nj}" "${_logdi cat ${_logdir}/text.${i} done | sort -k1 >${_dir}/text -mv ${exp_dir}/${model_name}/decode_asr_transformer.yaml.back ${exp_dir}/${model_name}/decode_asr_transformer.yaml -mv ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml.back ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml +mv ${exp_dir}/${model_name}/decoding.yaml.back ${exp_dir}/${model_name}/decoding.yaml diff --git a/egs_modelscope/common/modelscope_utils/download_model.py b/egs_modelscope/common/modelscope_utils/download_model.py index 5d5f70dd1..51ba6b835 100755 --- a/egs_modelscope/common/modelscope_utils/download_model.py +++ b/egs_modelscope/common/modelscope_utils/download_model.py @@ -13,9 +13,13 @@ if __name__ == '__main__': type=str, default="speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", help="model name in modelscope") + parser.add_argument("--model_revision", + type=str, + default="v1.0.3", + help="model revision in modelscope") args = parser.parse_args() inference_pipeline = pipeline( task=Tasks.auto_speech_recognition, model='damo/{}'.format(args.model_name), - model_revision='v1.0.0') + model_revision=args.model_revision) diff --git a/egs_modelscope/common/modelscope_utils/modelscope_infer.sh b/egs_modelscope/common/modelscope_utils/modelscope_infer.sh index 1a56dce98..80f0d166b 100755 --- a/egs_modelscope/common/modelscope_utils/modelscope_infer.sh +++ b/egs_modelscope/common/modelscope_utils/modelscope_infer.sh @@ -7,6 +7,7 @@ set -o pipefail data_dir= exp_dir= model_name= +model_revision= inference_nj=32 gpuid_list="0,1,2,3" njob=32 @@ -30,7 +31,7 @@ fi # download model from modelscope python modelscope_utils/download_model.py \ - --model_name ${model_name} + --model_name ${model_name} --model_revision ${model_revision} modelscope_dir=${HOME}/.cache/modelscope/hub/damo/${model_name} @@ -48,12 +49,9 @@ for dset in ${test_sets}; do fi if "${use_lm}"; then - cp ${modelscope_dir}/decode_asr_transformer.yaml ${modelscope_dir}/decode_asr_transformer.yaml.back - cp ${modelscope_dir}/decode_asr_transformer_wav.yaml ${modelscope_dir}/decode_asr_transformer_wav.yaml.back - sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer.yaml - sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml - sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer.yaml - sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml + cp ${modelscope_dir}/decoding.yaml ${modelscope_dir}/decoding.yaml.back + sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decoding.yaml + sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decoding.yaml fi for n in $(seq "${inference_nj}"); do @@ -85,6 +83,5 @@ for dset in ${test_sets}; do done if "${use_lm}"; then - mv ${modelscope_dir}/decode_asr_transformer.yaml.back ${modelscope_dir}/decode_asr_transformer.yaml - mv ${modelscope_dir}/decode_asr_transformer_wav.yaml.back ${modelscope_dir}/decode_asr_transformer_wav.yaml + mv ${modelscope_dir}/decoding.yaml.back ${modelscope_dir}/decoding.yaml fi diff --git a/egs_modelscope/speechio/paraformer/paraformer_large_infer.sh b/egs_modelscope/speechio/paraformer/paraformer_large_infer.sh index bcf8c331c..8cce7608c 100755 --- a/egs_modelscope/speechio/paraformer/paraformer_large_infer.sh +++ b/egs_modelscope/speechio/paraformer/paraformer_large_infer.sh @@ -8,6 +8,7 @@ ori_data= data_dir= exp_dir= model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch +model_revision="v1.0.3" # please do not modify the model revision inference_nj=32 gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') @@ -46,6 +47,7 @@ modelscope_utils/modelscope_infer.sh \ --exp_dir ${exp_dir}/speechio \ --test_sets "${test_sets}" \ --model_name ${model_name} \ + --model_revision ${model_revision} \ --inference_nj ${inference_nj} \ --gpuid_list ${gpuid_list} \ --njob ${njob} \ diff --git a/egs_modelscope/wenetspeech/paraformer/paraformer_large_infer.sh b/egs_modelscope/wenetspeech/paraformer/paraformer_large_infer.sh index 182a32488..88e099073 100755 --- a/egs_modelscope/wenetspeech/paraformer/paraformer_large_infer.sh +++ b/egs_modelscope/wenetspeech/paraformer/paraformer_large_infer.sh @@ -8,6 +8,7 @@ ori_data= data_dir= exp_dir= model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch +model_revision="v1.0.3" # please do not modify the model revision inference_nj=32 gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') @@ -46,6 +47,7 @@ modelscope_utils/modelscope_infer.sh \ --exp_dir ${exp_dir}/wenetspeech \ --test_sets "${test_sets}" \ --model_name ${model_name} \ + --model_revision ${model_revision} \ --inference_nj ${inference_nj} \ --gpuid_list ${gpuid_list} \ --njob ${njob} \