From 001083ba31a8105ba3e377c5108ae892f562c97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=98=89=E6=B8=8A?= Date: Wed, 17 May 2023 16:40:22 +0800 Subject: [PATCH] update repo --- .../conf/train_asr_paraformer_transformer_12e_6d_3072_768.yaml | 2 ++ .../conf/train_asr_transformer_12e_6d_3072_768.yaml | 2 ++ .../conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml | 2 ++ egs/aishell/paraformerbert/local/extract_embeds.sh | 2 ++ egs/aishell/transformer/conf/train_asr_transformer.yaml | 2 ++ egs/aishell2/conformer/conf/train_asr_conformer.yaml | 1 + ...train_asr_paraformer_conformer_20e_1280_320_6d_1280_320.yaml | 1 + egs/aishell2/paraformerbert/local/extract_embeds.sh | 2 ++ egs/aishell2/transformer/conf/train_asr_transformer.yaml | 1 + 9 files changed, 15 insertions(+) diff --git a/egs/aishell/data2vec_paraformer_finetune/conf/train_asr_paraformer_transformer_12e_6d_3072_768.yaml b/egs/aishell/data2vec_paraformer_finetune/conf/train_asr_paraformer_transformer_12e_6d_3072_768.yaml index 287b08891..1e1acee21 100644 --- a/egs/aishell/data2vec_paraformer_finetune/conf/train_asr_paraformer_transformer_12e_6d_3072_768.yaml +++ b/egs/aishell/data2vec_paraformer_finetune/conf/train_asr_paraformer_transformer_12e_6d_3072_768.yaml @@ -105,6 +105,8 @@ predictor_conf: r_order: 1 dataset_conf: + data_names: speech,text + data_types: sound,text shuffle: True shuffle_conf: shuffle_size: 2048 diff --git a/egs/aishell/data2vec_transformer_finetune/conf/train_asr_transformer_12e_6d_3072_768.yaml b/egs/aishell/data2vec_transformer_finetune/conf/train_asr_transformer_12e_6d_3072_768.yaml index ad3ad2e12..32a7b5bb0 100644 --- a/egs/aishell/data2vec_transformer_finetune/conf/train_asr_transformer_12e_6d_3072_768.yaml +++ b/egs/aishell/data2vec_transformer_finetune/conf/train_asr_transformer_12e_6d_3072_768.yaml @@ -96,6 +96,8 @@ specaug_conf: num_time_mask: 2 dataset_conf: + data_names: speech,text + data_types: sound,text shuffle: True shuffle_conf: shuffle_size: 2048 diff --git a/egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml b/egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml index bac8d0497..6a14b7fcf 100644 --- a/egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml +++ b/egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml @@ -93,6 +93,8 @@ predictor_conf: tail_threshold: 0.45 dataset_conf: + data_names: speech,text + data_types: sound,text shuffle: True shuffle_conf: shuffle_size: 2048 diff --git a/egs/aishell/paraformerbert/local/extract_embeds.sh b/egs/aishell/paraformerbert/local/extract_embeds.sh index c07e528fa..ca0c878ec 100755 --- a/egs/aishell/paraformerbert/local/extract_embeds.sh +++ b/egs/aishell/paraformerbert/local/extract_embeds.sh @@ -54,6 +54,8 @@ for data_set in train dev test;do cat ${local_records_dir}/embeds.${JOB}.shape || exit 1; done > ${local_scp_dir_raw}/embeds.shape fi + + cp ${local_scp_dir_raw}/embeds.scp ${raw_dataset_path}/data/${data_set}/embeds.scp done echo "embeds is in: ${local_scp_dir_raw}" diff --git a/egs/aishell/transformer/conf/train_asr_transformer.yaml b/egs/aishell/transformer/conf/train_asr_transformer.yaml index 22e651bd9..b38656514 100644 --- a/egs/aishell/transformer/conf/train_asr_transformer.yaml +++ b/egs/aishell/transformer/conf/train_asr_transformer.yaml @@ -73,6 +73,8 @@ scheduler_conf: warmup_steps: 25000 dataset_conf: + data_names: speech,text + data_types: sound,text shuffle: True shuffle_conf: shuffle_size: 2048 diff --git a/egs/aishell2/conformer/conf/train_asr_conformer.yaml b/egs/aishell2/conformer/conf/train_asr_conformer.yaml index 3fd034f4f..8183378fb 100644 --- a/egs/aishell2/conformer/conf/train_asr_conformer.yaml +++ b/egs/aishell2/conformer/conf/train_asr_conformer.yaml @@ -84,6 +84,7 @@ specaug_conf: num_time_mask: 2 dataset_conf: + data_names: speech,text data_types: sound,text shuffle: True shuffle_conf: diff --git a/egs/aishell2/paraformer/conf/train_asr_paraformer_conformer_20e_1280_320_6d_1280_320.yaml b/egs/aishell2/paraformer/conf/train_asr_paraformer_conformer_20e_1280_320_6d_1280_320.yaml index 7fc9794a1..3ecf44e31 100644 --- a/egs/aishell2/paraformer/conf/train_asr_paraformer_conformer_20e_1280_320_6d_1280_320.yaml +++ b/egs/aishell2/paraformer/conf/train_asr_paraformer_conformer_20e_1280_320_6d_1280_320.yaml @@ -94,6 +94,7 @@ predictor_conf: r_order: 1 dataset_conf: + data_names: speech,text data_types: sound,text shuffle: True shuffle_conf: diff --git a/egs/aishell2/paraformerbert/local/extract_embeds.sh b/egs/aishell2/paraformerbert/local/extract_embeds.sh index ee899248f..d7dd4f20b 100755 --- a/egs/aishell2/paraformerbert/local/extract_embeds.sh +++ b/egs/aishell2/paraformerbert/local/extract_embeds.sh @@ -54,6 +54,8 @@ for data_set in train dev_ios;do cat ${local_records_dir}/embeds.${JOB}.shape || exit 1; done > ${local_scp_dir_raw}/embeds.shape fi + + cp ${local_scp_dir_raw}/embeds.scp ${raw_dataset_path}/data/${data_set}/embeds.scp done echo "embeds is in: ${local_scp_dir_raw}" diff --git a/egs/aishell2/transformer/conf/train_asr_transformer.yaml b/egs/aishell2/transformer/conf/train_asr_transformer.yaml index 2d16c1890..1b76e2a51 100644 --- a/egs/aishell2/transformer/conf/train_asr_transformer.yaml +++ b/egs/aishell2/transformer/conf/train_asr_transformer.yaml @@ -78,6 +78,7 @@ specaug_conf: num_time_mask: 2 dataset_conf: + data_names: speech,text data_types: sound,text shuffle: True shuffle_conf: