This commit is contained in:
嘉渊 2023-04-25 01:29:12 +08:00
parent 4c3e502cb8
commit 70f9a8f890
2 changed files with 4 additions and 4 deletions

View File

@ -169,12 +169,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--token_list $token_list \
--train_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${train_set}/${scp},speech,${type} \
--train_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${train_set}/text,text,text \
--train_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${train_set}/speech_shape \
--train_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${train_set}/text_shape.char \
--valid_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${valid_set}/${scp},speech,${type} \
--valid_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${valid_set}/text,text,text \
--valid_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${valid_set}/speech_shape \
--valid_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${valid_set}/text_shape.char \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
--config $asr_config \

View File

@ -162,6 +162,10 @@ def prepare_data(args, distributed_option):
if args.dataset_type == "large" and args.train_data_file is not None:
return
distributed = distributed_option.distributed
if not hasattr(args, "train_set"):
args.train_set = "train"
if not hasattr(args, "dev_set"):
args.dev_set = "validation"
if not distributed or distributed_option.dist_rank == 0:
filter_wav_text(args.data_dir, args.train_set)
filter_wav_text(args.data_dir, args.dev_set)