From 2326aefc39d5070455eced7fa9acaadfb09eb154 Mon Sep 17 00:00:00 2001
From: Lizerui9926 <lzr265946@alibaba-inc.com>
Date: Fri, 2 Dec 2022 22:43:13 +0800
Subject: [PATCH] update modelscope details

---
 .../paraformer/paraformer_large_finetune.sh       |  9 +++++----
 .../aishell/paraformer/paraformer_large_infer.sh  |  2 ++
 .../paraformer/paraformer_large_finetune.sh       |  9 +++++----
 .../aishell2/paraformer/paraformer_large_infer.sh |  3 +++
 .../common/modelscope_common_finetune.sh          |  9 +++++----
 egs_modelscope/common/modelscope_common_infer.sh  | 15 ++++++---------
 .../common/modelscope_utils/download_model.py     |  6 +++++-
 .../common/modelscope_utils/modelscope_infer.sh   | 15 ++++++---------
 .../speechio/paraformer/paraformer_large_infer.sh |  2 ++
 .../paraformer/paraformer_large_infer.sh          |  2 ++
 10 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/egs_modelscope/aishell/paraformer/paraformer_large_finetune.sh b/egs_modelscope/aishell/paraformer/paraformer_large_finetune.sh
index a68338fb9..3c7577417 100755
--- a/egs_modelscope/aishell/paraformer/paraformer_large_finetune.sh
+++ b/egs_modelscope/aishell/paraformer/paraformer_large_finetune.sh
@@ -11,7 +11,7 @@ njob=4 # the number of jobs for each gpu
 train_cmd=utils/run.pl
 
 # general configuration
-feats_dir="." #feature output dictionary, for large data
+feats_dir="../DATA" #feature output dictionary, for large data
 exp_dir="."
 lang=zh
 dumpdir=dump/fbank
@@ -32,6 +32,7 @@ lfr_m=7
 lfr_n=6
 
 init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch  # pre-trained model, download from modelscope during fine-tuning
+model_revision="v1.0.3"     # please do not modify the model revision
 cmvn_file=init_model/${init_model_name}/am.mvn
 seg_file=init_model/${init_model_name}/seg_dict
 vocab=init_model/${init_model_name}/tokens.txt
@@ -53,7 +54,7 @@ valid_set=dev
 test_sets="dev test"
 
 asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml
-init_param="init_model/${init_model_name}/${init_model_name}"
+init_param="init_model/${init_model_name}/model.pb"
 
 inference_config=conf/decode_asr_transformer_noctc_1best.yaml
 inference_asr_model=valid.acc.ave_10best.pth
@@ -61,7 +62,7 @@ inference_asr_model=valid.acc.ave_10best.pth
 . utils/parse_options.sh || exit 1;
 
 # download model from modelscope
-python modelscope_utils/download_model.py --model_name ${init_model_name}
+python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision}
 
 if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then
     echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist"
@@ -152,7 +153,7 @@ fi
 world_size=$gpu_num  # run on one machine
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # update asr train config.yaml
-    python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml
+    python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml
     finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml
 
     mkdir -p ${exp_dir}/exp/${model_dir}
diff --git a/egs_modelscope/aishell/paraformer/paraformer_large_infer.sh b/egs_modelscope/aishell/paraformer/paraformer_large_infer.sh
index 8e2c8f33d..ab65cdebd 100755
--- a/egs_modelscope/aishell/paraformer/paraformer_large_infer.sh
+++ b/egs_modelscope/aishell/paraformer/paraformer_large_infer.sh
@@ -8,6 +8,7 @@ ori_data=
 data_dir=
 exp_dir=
 model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+model_revision="v1.0.3"     # please do not modify the model revision
 inference_nj=32
 gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1"
 ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
@@ -61,6 +62,7 @@ modelscope_utils/modelscope_infer.sh \
         --exp_dir ${exp_dir}/aishell \
         --test_sets "${test_sets}" \
         --model_name ${model_name} \
+        --model_revision ${model_revision} \
         --inference_nj ${inference_nj} \
         --gpuid_list ${gpuid_list} \
         --njob ${njob} \
diff --git a/egs_modelscope/aishell2/paraformer/paraformer_large_finetune.sh b/egs_modelscope/aishell2/paraformer/paraformer_large_finetune.sh
index d4b5dde73..b864370c1 100755
--- a/egs_modelscope/aishell2/paraformer/paraformer_large_finetune.sh
+++ b/egs_modelscope/aishell2/paraformer/paraformer_large_finetune.sh
@@ -11,7 +11,7 @@ njob=4 # the number of jobs for each gpu
 train_cmd=utils/run.pl
 
 # general configuration
-feats_dir="." #feature output dictionary, for large data
+feats_dir="../DATA" #feature output dictionary, for large data
 exp_dir="."
 lang=zh
 dumpdir=dump/fbank
@@ -32,6 +32,7 @@ lfr_m=7
 lfr_n=6
 
 init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch  # pre-trained model, download from modelscope during fine-tuning
+model_revision="v1.0.3"     # please do not modify the model revision
 cmvn_file=init_model/${init_model_name}/am.mvn
 seg_file=init_model/${init_model_name}/seg_dict
 vocab=init_model/${init_model_name}/tokens.txt
@@ -54,7 +55,7 @@ valid_set=dev_ios
 test_sets="dev_ios test_android test_ios test_mic"
 
 asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml
-init_param="init_model/${init_model_name}/${init_model_name}"
+init_param="init_model/${init_model_name}/model.pb"
 
 inference_config=conf/decode_asr_transformer_noctc_1best.yaml
 inference_asr_model=valid.acc.ave_10best.pth
@@ -62,7 +63,7 @@ inference_asr_model=valid.acc.ave_10best.pth
 . utils/parse_options.sh || exit 1;
 
 # download model from modelscope
-python modelscope_utils/download_model.py --model_name ${init_model_name}
+python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision}
 
 if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then
     echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist"
@@ -167,7 +168,7 @@ fi
 world_size=$gpu_num  # run on one machine
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # update asr train config.yaml
-    python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml
+    python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml
     finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml
 
     mkdir -p ${exp_dir}/exp/${model_dir}
diff --git a/egs_modelscope/aishell2/paraformer/paraformer_large_infer.sh b/egs_modelscope/aishell2/paraformer/paraformer_large_infer.sh
index 95b32fc75..2d5afd8f7 100755
--- a/egs_modelscope/aishell2/paraformer/paraformer_large_infer.sh
+++ b/egs_modelscope/aishell2/paraformer/paraformer_large_infer.sh
@@ -8,6 +8,7 @@ ori_data=
 data_dir=
 exp_dir=
 model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+model_revision="v1.0.3"     # please do not modify the model revision
 inference_nj=32
 gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1"
 ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
@@ -20,6 +21,7 @@ else
     inference_nj=$njob
 fi
 
+# LM configs
 use_lm=false
 beam_size=1
 lm_weight=0.0
@@ -47,6 +49,7 @@ modelscope_utils/modelscope_infer.sh \
         --exp_dir ${exp_dir}/aishell2 \
         --test_sets "${test_sets}" \
         --model_name ${model_name} \
+        --model_revision ${model_revision} \
         --inference_nj ${inference_nj} \
         --gpuid_list ${gpuid_list} \
         --njob ${njob} \
diff --git a/egs_modelscope/common/modelscope_common_finetune.sh b/egs_modelscope/common/modelscope_common_finetune.sh
index a43083f0c..39cd03e89 100755
--- a/egs_modelscope/common/modelscope_common_finetune.sh
+++ b/egs_modelscope/common/modelscope_common_finetune.sh
@@ -11,7 +11,7 @@ njob=4 # the number of jobs for each gpu
 train_cmd=utils/run.pl
 
 # general configuration
-feats_dir="." #feature output dictionary, for large data
+feats_dir="../DATA" #feature output dictionary, for large data
 exp_dir="."
 lang=zh
 dumpdir=dump/fbank
@@ -32,6 +32,7 @@ lfr_m=7
 lfr_n=6
 
 init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch  # pre-trained model, download from modelscope during fine-tuning
+model_revision="v1.0.3"     # please do not modify the model revision
 cmvn_file=init_model/${init_model_name}/am.mvn
 seg_file=init_model/${init_model_name}/seg_dict
 vocab=init_model/${init_model_name}/tokens.txt
@@ -53,7 +54,7 @@ valid_set=dev
 test_sets="dev test"
 
 asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml
-init_param="init_model/${init_model_name}/${init_model_name}"
+init_param="init_model/${init_model_name}/model.pb"
 
 inference_config=conf/decode_asr_transformer_noctc_1best.yaml
 inference_asr_model=valid.acc.ave_10best.pth
@@ -61,7 +62,7 @@ inference_asr_model=valid.acc.ave_10best.pth
 . utils/parse_options.sh || exit 1;
 
 # download model from modelscope
-python modelscope_utils/download_model.py --model_name ${init_model_name}
+python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision}
 
 if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then
     echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist"
@@ -158,7 +159,7 @@ fi
 world_size=$gpu_num  # run on one machine
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # update asr train config.yaml
-    python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml
+    python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml
     finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml
 
     mkdir -p ${exp_dir}/exp/${model_dir}
diff --git a/egs_modelscope/common/modelscope_common_infer.sh b/egs_modelscope/common/modelscope_common_infer.sh
index 12b2cbcb2..9c607cb41 100755
--- a/egs_modelscope/common/modelscope_common_infer.sh
+++ b/egs_modelscope/common/modelscope_common_infer.sh
@@ -5,6 +5,7 @@ set -u
 set -o pipefail
 
 model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch  # pre-trained model, download from modelscope
+model_revision="v1.0.3"     # please do not modify the model revision
 data_dir=  # wav list, ${data_dir}/wav.scp
 exp_dir="exp"
 gpuid_list="0,1"
@@ -29,7 +30,7 @@ beam_size=1
 lm_weight=0.0
 
 python modelscope_utils/download_model.py \
-          --model_name ${model_name}
+          --model_name ${model_name} --model_revision ${model_revision}
 
 if [ -d ${exp_dir} ]; then
     echo "${exp_dir} is already exists. if you want to decode again, please delete ${exp_dir} first."
@@ -50,12 +51,9 @@ done
 utils/split_scp.pl "${data_dir}/wav.scp" ${split_scps}
 
 if "${use_lm}"; then
-    cp ${exp_dir}/${model_name}/decode_asr_transformer.yaml ${exp_dir}/${model_name}/decode_asr_transformer.yaml.back
-    cp ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml.back
-    sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decode_asr_transformer.yaml
-    sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml
-    sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decode_asr_transformer.yaml
-    sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml  
+    cp ${exp_dir}/${model_name}/decoding.yaml ${exp_dir}/${model_name}/decoding.yaml.back
+    sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decoding.yaml
+    sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decoding.yaml
 fi
 
 echo "Decoding started... log: '${_logdir}/asr_inference.*.log'"
@@ -73,6 +71,5 @@ ${decode_cmd} --max-jobs-run "${inference_nj}" JOB=1:"${inference_nj}" "${_logdi
         cat ${_logdir}/text.${i}
     done | sort -k1 >${_dir}/text
 
-mv ${exp_dir}/${model_name}/decode_asr_transformer.yaml.back ${exp_dir}/${model_name}/decode_asr_transformer.yaml
-mv ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml.back ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml
+mv ${exp_dir}/${model_name}/decoding.yaml.back ${exp_dir}/${model_name}/decoding.yaml
 
diff --git a/egs_modelscope/common/modelscope_utils/download_model.py b/egs_modelscope/common/modelscope_utils/download_model.py
index 5d5f70dd1..51ba6b835 100755
--- a/egs_modelscope/common/modelscope_utils/download_model.py
+++ b/egs_modelscope/common/modelscope_utils/download_model.py
@@ -13,9 +13,13 @@ if __name__ == '__main__':
                         type=str,
                         default="speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
                         help="model name in modelscope")
+    parser.add_argument("--model_revision",
+                        type=str,
+                        default="v1.0.3",
+                        help="model revision in modelscope")
     args = parser.parse_args()
 
     inference_pipeline = pipeline(
         task=Tasks.auto_speech_recognition,
         model='damo/{}'.format(args.model_name),
-        model_revision='v1.0.0')
+        model_revision=args.model_revision)
diff --git a/egs_modelscope/common/modelscope_utils/modelscope_infer.sh b/egs_modelscope/common/modelscope_utils/modelscope_infer.sh
index 1a56dce98..80f0d166b 100755
--- a/egs_modelscope/common/modelscope_utils/modelscope_infer.sh
+++ b/egs_modelscope/common/modelscope_utils/modelscope_infer.sh
@@ -7,6 +7,7 @@ set -o pipefail
 data_dir=
 exp_dir=
 model_name=
+model_revision=
 inference_nj=32
 gpuid_list="0,1,2,3"
 njob=32
@@ -30,7 +31,7 @@ fi
 
 # download model from modelscope
 python modelscope_utils/download_model.py \
-          --model_name ${model_name}
+          --model_name ${model_name} --model_revision ${model_revision}
 
 modelscope_dir=${HOME}/.cache/modelscope/hub/damo/${model_name}
 
@@ -48,12 +49,9 @@ for dset in ${test_sets}; do
     fi
 
     if "${use_lm}"; then
-        cp ${modelscope_dir}/decode_asr_transformer.yaml ${modelscope_dir}/decode_asr_transformer.yaml.back
-        cp ${modelscope_dir}/decode_asr_transformer_wav.yaml ${modelscope_dir}/decode_asr_transformer_wav.yaml.back
-        sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer.yaml
-        sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml
-        sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer.yaml
-        sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml
+        cp ${modelscope_dir}/decoding.yaml ${modelscope_dir}/decoding.yaml.back
+        sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decoding.yaml
+        sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decoding.yaml
     fi
 
     for n in $(seq "${inference_nj}"); do
@@ -85,6 +83,5 @@ for dset in ${test_sets}; do
 done
 
 if "${use_lm}"; then
-    mv ${modelscope_dir}/decode_asr_transformer.yaml.back  ${modelscope_dir}/decode_asr_transformer.yaml
-    mv ${modelscope_dir}/decode_asr_transformer_wav.yaml.back ${modelscope_dir}/decode_asr_transformer_wav.yaml
+    mv ${modelscope_dir}/decoding.yaml.back ${modelscope_dir}/decoding.yaml
 fi
diff --git a/egs_modelscope/speechio/paraformer/paraformer_large_infer.sh b/egs_modelscope/speechio/paraformer/paraformer_large_infer.sh
index bcf8c331c..8cce7608c 100755
--- a/egs_modelscope/speechio/paraformer/paraformer_large_infer.sh
+++ b/egs_modelscope/speechio/paraformer/paraformer_large_infer.sh
@@ -8,6 +8,7 @@ ori_data=
 data_dir=
 exp_dir=
 model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+model_revision="v1.0.3"     # please do not modify the model revision
 inference_nj=32
 gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1"
 ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
@@ -46,6 +47,7 @@ modelscope_utils/modelscope_infer.sh \
         --exp_dir ${exp_dir}/speechio \
         --test_sets "${test_sets}" \
         --model_name ${model_name} \
+        --model_revision ${model_revision} \
         --inference_nj ${inference_nj} \
         --gpuid_list ${gpuid_list} \
         --njob ${njob} \
diff --git a/egs_modelscope/wenetspeech/paraformer/paraformer_large_infer.sh b/egs_modelscope/wenetspeech/paraformer/paraformer_large_infer.sh
index 182a32488..88e099073 100755
--- a/egs_modelscope/wenetspeech/paraformer/paraformer_large_infer.sh
+++ b/egs_modelscope/wenetspeech/paraformer/paraformer_large_infer.sh
@@ -8,6 +8,7 @@ ori_data=
 data_dir=
 exp_dir=
 model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+model_revision="v1.0.3"     # please do not modify the model revision
 inference_nj=32
 gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1"
 ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
@@ -46,6 +47,7 @@ modelscope_utils/modelscope_infer.sh \
         --exp_dir ${exp_dir}/wenetspeech \
         --test_sets "${test_sets}" \
         --model_name ${model_name} \
+        --model_revision ${model_revision} \
         --inference_nj ${inference_nj} \
         --gpuid_list ${gpuid_list} \
         --njob ${njob} \