diff --git a/egs/aishell/branchformer/conf/train_asr_branchformer.yaml b/egs/aishell/branchformer/conf/train_asr_branchformer.yaml index b01fd5b07..f35c89706 100644 --- a/egs/aishell/branchformer/conf/train_asr_branchformer.yaml +++ b/egs/aishell/branchformer/conf/train_asr_branchformer.yaml @@ -97,7 +97,7 @@ dataset_conf: sort_size: 500 batch_conf: batch_type: token - batch_size: 20000 + batch_size: 10000 num_workers: 8 log_interval: 50 diff --git a/egs/aishell/branchformer/conf/train_asr_branchformer_bs10000.yaml b/egs/aishell/branchformer/conf/train_asr_branchformer_bs10000.yaml deleted file mode 100644 index f35c89706..000000000 --- a/egs/aishell/branchformer/conf/train_asr_branchformer_bs10000.yaml +++ /dev/null @@ -1,104 +0,0 @@ -# network architecture -# encoder related -encoder: branchformer -encoder_conf: - output_size: 256 - use_attn: true - attention_heads: 4 - attention_layer_type: rel_selfattn - pos_enc_layer_type: rel_pos - rel_pos_type: latest - use_cgmlp: true - cgmlp_linear_units: 2048 - cgmlp_conv_kernel: 31 - use_linear_after_conv: false - gate_activation: identity - merge_method: concat - cgmlp_weight: 0.5 # used only if merge_method is "fixed_ave" - attn_branch_drop_rate: 0.0 # used only if merge_method is "learned_ave" - num_blocks: 24 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - attention_dropout_rate: 0.1 - input_layer: conv2d - stochastic_depth_rate: 0.0 - -# decoder related -decoder: transformer -decoder_conf: - attention_heads: 4 - linear_units: 2048 - num_blocks: 6 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - self_attention_dropout_rate: 0. - src_attention_dropout_rate: 0. - -# frontend related -frontend: wav_frontend -frontend_conf: - fs: 16000 - window: hamming - n_mels: 80 - frame_length: 25 - frame_shift: 10 - lfr_m: 1 - lfr_n: 1 - -# hybrid CTC/attention -model_conf: - ctc_weight: 0.3 - lsm_weight: 0.1 # label smoothing option - length_normalized_loss: false - -# optimization related -accum_grad: 1 -grad_clip: 5 -max_epoch: 180 -val_scheduler_criterion: - - valid - - acc -best_model_criterion: -- - valid - - acc - - max -keep_nbest_models: 10 - -optim: adam -optim_conf: - lr: 0.001 - weight_decay: 0.000001 -scheduler: warmuplr -scheduler_conf: - warmup_steps: 35000 - -specaug: specaug -specaug_conf: - apply_time_warp: true - time_warp_window: 5 - time_warp_mode: bicubic - apply_freq_mask: true - freq_mask_width_range: - - 0 - - 27 - num_freq_mask: 2 - apply_time_mask: true - time_mask_width_ratio_range: - - 0. - - 0.05 - num_time_mask: 10 - -dataset_conf: - data_names: speech,text - data_types: sound,text - shuffle: True - shuffle_conf: - shuffle_size: 2048 - sort_size: 500 - batch_conf: - batch_type: token - batch_size: 10000 - num_workers: 8 - -log_interval: 50 -normalize: None \ No newline at end of file diff --git a/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000.yaml b/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000.yaml deleted file mode 100644 index 5f889d045..000000000 --- a/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000.yaml +++ /dev/null @@ -1,104 +0,0 @@ -# network architecture -# encoder related -encoder: branchformer -encoder_conf: - output_size: 256 - use_attn: true - attention_heads: 4 - attention_layer_type: rel_selfattn - pos_enc_layer_type: rel_pos - rel_pos_type: latest - use_cgmlp: true - cgmlp_linear_units: 2048 - cgmlp_conv_kernel: 31 - use_linear_after_conv: false - gate_activation: identity - merge_method: concat - cgmlp_weight: 0.5 # used only if merge_method is "fixed_ave" - attn_branch_drop_rate: 0.0 # used only if merge_method is "learned_ave" - num_blocks: 24 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - attention_dropout_rate: 0.1 - input_layer: conv2d - stochastic_depth_rate: 0.0 - -# decoder related -decoder: transformer -decoder_conf: - attention_heads: 4 - linear_units: 2048 - num_blocks: 6 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - self_attention_dropout_rate: 0. - src_attention_dropout_rate: 0. - -# frontend related -frontend: wav_frontend -frontend_conf: - fs: 16000 - window: hamming - n_mels: 80 - frame_length: 25 - frame_shift: 10 - lfr_m: 1 - lfr_n: 1 - -# hybrid CTC/attention -model_conf: - ctc_weight: 0.3 - lsm_weight: 0.1 # label smoothing option - length_normalized_loss: false - -# optimization related -accum_grad: 1 -grad_clip: 5 -max_epoch: 180 -val_scheduler_criterion: - - valid - - acc -best_model_criterion: -- - valid - - acc - - max -keep_nbest_models: 10 - -optim: adam -optim_conf: - lr: 0.001 - weight_decay: 0.000001 -scheduler: warmuplr -scheduler_conf: - warmup_steps: 35000 - -specaug: specaug -specaug_conf: - apply_time_warp: true - time_warp_window: 5 - time_warp_mode: bicubic - apply_freq_mask: true - freq_mask_width_range: - - 0 - - 27 - num_freq_mask: 2 - apply_time_mask: true - time_mask_width_ratio_range: - - 0. - - 0.05 - num_time_mask: 10 - -dataset_conf: - data_names: speech,text - data_types: sound,text - shuffle: True - shuffle_conf: - shuffle_size: 2048 - sort_size: 500 - batch_conf: - batch_type: token - batch_size: 16000 - num_workers: 8 - -log_interval: 50 -normalize: None \ No newline at end of file diff --git a/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000_gc2.yaml b/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000_gc2.yaml deleted file mode 100644 index bd5d934f9..000000000 --- a/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000_gc2.yaml +++ /dev/null @@ -1,104 +0,0 @@ -# network architecture -# encoder related -encoder: branchformer -encoder_conf: - output_size: 256 - use_attn: true - attention_heads: 4 - attention_layer_type: rel_selfattn - pos_enc_layer_type: rel_pos - rel_pos_type: latest - use_cgmlp: true - cgmlp_linear_units: 2048 - cgmlp_conv_kernel: 31 - use_linear_after_conv: false - gate_activation: identity - merge_method: concat - cgmlp_weight: 0.5 # used only if merge_method is "fixed_ave" - attn_branch_drop_rate: 0.0 # used only if merge_method is "learned_ave" - num_blocks: 24 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - attention_dropout_rate: 0.1 - input_layer: conv2d - stochastic_depth_rate: 0.0 - -# decoder related -decoder: transformer -decoder_conf: - attention_heads: 4 - linear_units: 2048 - num_blocks: 6 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - self_attention_dropout_rate: 0. - src_attention_dropout_rate: 0. - -# frontend related -frontend: wav_frontend -frontend_conf: - fs: 16000 - window: hamming - n_mels: 80 - frame_length: 25 - frame_shift: 10 - lfr_m: 1 - lfr_n: 1 - -# hybrid CTC/attention -model_conf: - ctc_weight: 0.3 - lsm_weight: 0.1 # label smoothing option - length_normalized_loss: false - -# optimization related -accum_grad: 2 -grad_clip: 5 -max_epoch: 180 -val_scheduler_criterion: - - valid - - acc -best_model_criterion: -- - valid - - acc - - max -keep_nbest_models: 10 - -optim: adam -optim_conf: - lr: 0.001 - weight_decay: 0.000001 -scheduler: warmuplr -scheduler_conf: - warmup_steps: 35000 - -specaug: specaug -specaug_conf: - apply_time_warp: true - time_warp_window: 5 - time_warp_mode: bicubic - apply_freq_mask: true - freq_mask_width_range: - - 0 - - 27 - num_freq_mask: 2 - apply_time_mask: true - time_mask_width_ratio_range: - - 0. - - 0.05 - num_time_mask: 10 - -dataset_conf: - data_names: speech,text - data_types: sound,text - shuffle: True - shuffle_conf: - shuffle_size: 2048 - sort_size: 500 - batch_conf: - batch_type: token - batch_size: 16000 - num_workers: 8 - -log_interval: 50 -normalize: None \ No newline at end of file diff --git a/egs/aishell/branchformer/run.sh b/egs/aishell/branchformer/run.sh index 37336ea4c..6bb4a0cc5 100755 --- a/egs/aishell/branchformer/run.sh +++ b/egs/aishell/branchformer/run.sh @@ -46,10 +46,7 @@ train_set=train valid_set=dev test_sets="dev test" -#asr_config=conf/train_asr_branchformer.yaml -#asr_config=conf/train_asr_branchformer_bs16000.yaml -asr_config=conf/train_asr_branchformer_bs16000_gc2.yaml -#asr_config=conf/train_asr_branchformer_bs10000.yaml +asr_config=conf/train_asr_branchformer.yaml model_dir="baseline_$(basename "${asr_config}" .yaml)_${lang}_${token_type}_${tag}" inference_config=conf/decode_asr_transformer.yaml diff --git a/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml b/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml deleted file mode 100644 index 6e81f4830..000000000 --- a/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml +++ /dev/null @@ -1,101 +0,0 @@ -# network architecture -# encoder related -encoder: e_branchformer -encoder_conf: - output_size: 256 - attention_heads: 4 - attention_layer_type: rel_selfattn - pos_enc_layer_type: rel_pos - rel_pos_type: latest - cgmlp_linear_units: 1024 - cgmlp_conv_kernel: 31 - use_linear_after_conv: false - gate_activation: identity - num_blocks: 12 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - attention_dropout_rate: 0.1 - input_layer: conv2d - layer_drop_rate: 0.0 - linear_units: 1024 - positionwise_layer_type: linear - use_ffn: true - macaron_ffn: true - merge_conv_kernel: 31 - -# decoder related -decoder: transformer -decoder_conf: - attention_heads: 4 - linear_units: 2048 - num_blocks: 6 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - self_attention_dropout_rate: 0. - src_attention_dropout_rate: 0. - -# frontend related -frontend: wav_frontend -frontend_conf: - fs: 16000 - window: hamming - n_mels: 80 - frame_length: 25 - frame_shift: 10 - lfr_m: 1 - lfr_n: 1 - -# hybrid CTC/attention -model_conf: - ctc_weight: 0.3 - lsm_weight: 0.1 # label smoothing option - length_normalized_loss: false - -# optimization related -accum_grad: 1 -grad_clip: 5 -max_epoch: 180 -best_model_criterion: -- - valid - - acc - - max -keep_nbest_models: 10 - -optim: adam -optim_conf: - lr: 0.001 - weight_decay: 0.000001 -scheduler: warmuplr -scheduler_conf: - warmup_steps: 35000 - -specaug: specaug -specaug_conf: - apply_time_warp: true - time_warp_window: 5 - time_warp_mode: bicubic - apply_freq_mask: true - freq_mask_width_range: - - 0 - - 27 - num_freq_mask: 2 - apply_time_mask: true - time_mask_width_ratio_range: - - 0. - - 0.05 - num_time_mask: 10 - -dataset_conf: - data_names: speech,text - data_types: sound,text - shuffle: True - shuffle_conf: - shuffle_size: 2048 - sort_size: 500 - batch_conf: - batch_type: token - batch_size: 16000 - num_workers: 8 - -log_interval: 50 -normalize: None \ No newline at end of file diff --git a/egs/aishell/e_branchformer/run.sh b/egs/aishell/e_branchformer/run.sh index 8290ebf58..bcba2d75f 100755 --- a/egs/aishell/e_branchformer/run.sh +++ b/egs/aishell/e_branchformer/run.sh @@ -46,8 +46,7 @@ train_set=train valid_set=dev test_sets="dev test" -#asr_config=conf/train_asr_e_branchformer.yaml -asr_config=conf/train_asr_e_branchformer_bs16000.yaml +asr_config=conf/train_asr_e_branchformer.yaml model_dir="baseline_$(basename "${asr_config}" .yaml)_${lang}_${token_type}_${tag}" inference_config=conf/decode_asr_transformer.yaml