update repo

This commit is contained in:
嘉渊 2023-05-23 19:37:26 +08:00
parent 6149a24866
commit 8bf745a55e
16 changed files with 28 additions and 28 deletions

View File

@ -47,7 +47,7 @@ BAC009S0002W0124 自 六 月 底 呼 和 浩 特 市 率 先 宣 布 取 消 限
These two files both have two columns, while the first column is wav ids and the second column is the corresponding wav paths/label tokens.
### Stage 1: Feature and CMVN Generation
This stage computes CMVN based on `train` dataset, which is used in the following stages. Users can set `nj` to control the number of jobs for computing CMVN. The generated CMVN file is saved as `$feats_dir/data/train/cmvn/cmvn.mvn`.
This stage computes CMVN based on `train` dataset, which is used in the following stages. Users can set `nj` to control the number of jobs for computing CMVN. The generated CMVN file is saved as `$feats_dir/data/train/cmvn/am.mvn`.
### Stage 2: Dictionary Preparation
This stage processes the dictionary, which is used as a mapping between label characters and integer indices during ASR training. The processed dictionary file is saved as `$feats_dir/data/$lang_toekn_list/$token_type/tokens.txt`. An example of `tokens.txt` is as follows:

View File

@ -136,7 +136,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp,text" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
@ -186,7 +186,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -141,7 +141,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--valid_set ${valid_set} \
--data_file_names "wav.scp,text" \
--init_param ${init_param} \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
--config $asr_config \
@ -190,7 +190,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \

View File

@ -141,7 +141,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--valid_set ${valid_set} \
--data_file_names "wav.scp,text" \
--init_param ${init_param} \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
@ -191,7 +191,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -136,7 +136,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp,text" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
@ -186,7 +186,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -147,7 +147,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp,text,embeds.scp" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
@ -197,7 +197,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -136,7 +136,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp,text" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
@ -186,7 +186,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -29,6 +29,6 @@ $cmd JOB=1:$nj $logdir/cmvn.JOB.log \
python utils/combine_cmvn_file.py --dim ${feats_dim} --cmvn_dir $split_dir --nj $nj --output_dir ${fbankdir}/cmvn
python utils/cmvn_converter.py --cmvn_json ${fbankdir}/cmvn/cmvn.json --am_mvn ${fbankdir}/cmvn/cmvn.mvn
python utils/cmvn_converter.py --cmvn_json ${fbankdir}/cmvn/cmvn.json --am_mvn ${fbankdir}/cmvn/am.mvn
echo "$0: Succeeded compute global cmvn"

View File

@ -138,7 +138,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp,text" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--dataset_type $dataset_type \
--resume true \
@ -189,7 +189,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -109,7 +109,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--dataset_type $dataset_type \
--resume true \

View File

@ -138,7 +138,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp,text" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--dataset_type $dataset_type \
--resume true \
@ -189,7 +189,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -148,7 +148,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp,text,embeds.scp" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--dataset_type $dataset_type \
--resume true \
@ -199,7 +199,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -138,7 +138,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--train_set ${train_set} \
--valid_set ${valid_set} \
--data_file_names "wav.scp,text" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--dataset_type $dataset_type \
--resume true \
@ -189,7 +189,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -29,6 +29,6 @@ $cmd JOB=1:$nj $logdir/cmvn.JOB.log \
python utils/combine_cmvn_file.py --dim ${feats_dim} --cmvn_dir $split_dir --nj $nj --output_dir ${fbankdir}/cmvn
python utils/cmvn_converter.py --cmvn_json ${fbankdir}/cmvn/cmvn.json --am_mvn ${fbankdir}/cmvn/cmvn.mvn
python utils/cmvn_converter.py --cmvn_json ${fbankdir}/cmvn/cmvn.json --am_mvn ${fbankdir}/cmvn/am.mvn
echo "$0: Succeeded compute global cmvn"

View File

@ -150,7 +150,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
--data_dir ${feats_dir}/data \
--train_set ${train_set} \
--valid_set ${valid_set} \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
@ -201,7 +201,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \

View File

@ -146,7 +146,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4; then
--data_dir ${feats_dir}/data \
--train_set ${train_set} \
--valid_set ${valid_set} \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--speed_perturb ${speed_perturb} \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
@ -197,7 +197,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \