diff --git a/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml b/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml new file mode 100644 index 000000000..6e81f4830 --- /dev/null +++ b/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml @@ -0,0 +1,101 @@ +# network architecture +# encoder related +encoder: e_branchformer +encoder_conf: + output_size: 256 + attention_heads: 4 + attention_layer_type: rel_selfattn + pos_enc_layer_type: rel_pos + rel_pos_type: latest + cgmlp_linear_units: 1024 + cgmlp_conv_kernel: 31 + use_linear_after_conv: false + gate_activation: identity + num_blocks: 12 + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + attention_dropout_rate: 0.1 + input_layer: conv2d + layer_drop_rate: 0.0 + linear_units: 1024 + positionwise_layer_type: linear + use_ffn: true + macaron_ffn: true + merge_conv_kernel: 31 + +# decoder related +decoder: transformer +decoder_conf: + attention_heads: 4 + linear_units: 2048 + num_blocks: 6 + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + self_attention_dropout_rate: 0. + src_attention_dropout_rate: 0. + +# frontend related +frontend: wav_frontend +frontend_conf: + fs: 16000 + window: hamming + n_mels: 80 + frame_length: 25 + frame_shift: 10 + lfr_m: 1 + lfr_n: 1 + +# hybrid CTC/attention +model_conf: + ctc_weight: 0.3 + lsm_weight: 0.1 # label smoothing option + length_normalized_loss: false + +# optimization related +accum_grad: 1 +grad_clip: 5 +max_epoch: 180 +best_model_criterion: +- - valid + - acc + - max +keep_nbest_models: 10 + +optim: adam +optim_conf: + lr: 0.001 + weight_decay: 0.000001 +scheduler: warmuplr +scheduler_conf: + warmup_steps: 35000 + +specaug: specaug +specaug_conf: + apply_time_warp: true + time_warp_window: 5 + time_warp_mode: bicubic + apply_freq_mask: true + freq_mask_width_range: + - 0 + - 27 + num_freq_mask: 2 + apply_time_mask: true + time_mask_width_ratio_range: + - 0. + - 0.05 + num_time_mask: 10 + +dataset_conf: + data_names: speech,text + data_types: sound,text + shuffle: True + shuffle_conf: + shuffle_size: 2048 + sort_size: 500 + batch_conf: + batch_type: token + batch_size: 16000 + num_workers: 8 + +log_interval: 50 +normalize: None \ No newline at end of file diff --git a/egs/aishell/e_branchformer/run.sh b/egs/aishell/e_branchformer/run.sh index bcba2d75f..8290ebf58 100755 --- a/egs/aishell/e_branchformer/run.sh +++ b/egs/aishell/e_branchformer/run.sh @@ -46,7 +46,8 @@ train_set=train valid_set=dev test_sets="dev test" -asr_config=conf/train_asr_e_branchformer.yaml +#asr_config=conf/train_asr_e_branchformer.yaml +asr_config=conf/train_asr_e_branchformer_bs16000.yaml model_dir="baseline_$(basename "${asr_config}" .yaml)_${lang}_${token_type}_${tag}" inference_config=conf/decode_asr_transformer.yaml