diff --git a/egs/aishell/conformer/run.sh b/egs/aishell/conformer/run.sh index f73605cfa..cdcd766d8 100755 --- a/egs/aishell/conformer/run.sh +++ b/egs/aishell/conformer/run.sh @@ -85,7 +85,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then echo "stage 1: Feature and CMVN Generation" - utils/compute_cmvn.sh --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} ${feats_dir}/data/${train_set} + utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0 fi token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt diff --git a/egs/aishell/data2vec_paraformer_finetune/run.sh b/egs/aishell/data2vec_paraformer_finetune/run.sh index ff747147f..b322f8a6d 100755 --- a/egs/aishell/data2vec_paraformer_finetune/run.sh +++ b/egs/aishell/data2vec_paraformer_finetune/run.sh @@ -88,7 +88,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then echo "stage 1: Feature and CMVN Generation" - utils/compute_cmvn.sh --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} ${feats_dir}/data/${train_set} + utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0 fi token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt diff --git a/egs/aishell/data2vec_transformer_finetune/run.sh b/egs/aishell/data2vec_transformer_finetune/run.sh index 6ac4fa905..8e694e09f 100755 --- a/egs/aishell/data2vec_transformer_finetune/run.sh +++ b/egs/aishell/data2vec_transformer_finetune/run.sh @@ -88,7 +88,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then echo "stage 1: Feature and CMVN Generation" - utils/compute_cmvn.sh --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} ${feats_dir}/data/${train_set} + utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0 fi token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt diff --git a/egs/aishell/paraformer/run.sh b/egs/aishell/paraformer/run.sh index 474c199dd..dfb542c6c 100755 --- a/egs/aishell/paraformer/run.sh +++ b/egs/aishell/paraformer/run.sh @@ -85,7 +85,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then echo "stage 1: Feature and CMVN Generation" - utils/compute_cmvn.sh --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} ${feats_dir}/data/${train_set} + utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0 fi token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt diff --git a/egs/aishell/paraformerbert/run.sh b/egs/aishell/paraformerbert/run.sh index 90d9aee12..23f7e9b3b 100755 --- a/egs/aishell/paraformerbert/run.sh +++ b/egs/aishell/paraformerbert/run.sh @@ -89,7 +89,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then echo "stage 1: Feature and CMVN Generation" - utils/compute_cmvn.sh --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} ${feats_dir}/data/${train_set} + utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0 fi token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt diff --git a/egs/aishell/transformer/run.sh b/egs/aishell/transformer/run.sh index 011852f5e..ca3137ba8 100755 --- a/egs/aishell/transformer/run.sh +++ b/egs/aishell/transformer/run.sh @@ -85,7 +85,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then echo "stage 1: Feature and CMVN Generation" - utils/compute_cmvn.sh --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} ${feats_dir}/data/${train_set} + utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0 fi token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt diff --git a/egs/aishell/transformer/utils/compute_cmvn.py b/egs/aishell/transformer/utils/compute_cmvn.py index 949cc084c..6c9b44512 100755 --- a/egs/aishell/transformer/utils/compute_cmvn.py +++ b/egs/aishell/transformer/utils/compute_cmvn.py @@ -5,6 +5,7 @@ import os import numpy as np import torchaudio import torchaudio.compliance.kaldi as kaldi +import yaml def get_parser(): @@ -25,6 +26,11 @@ def get_parser(): type=str, help="the path of wav scps", ) + parser.add_argument( + "--config", + type=str, + help="the config file for computing cmvn", + ) parser.add_argument( "--idx", default=1, @@ -82,11 +88,27 @@ def main(): # mean_stats += np.sum(mat, axis=0) # var_stats += np.sum(np.square(mat), axis=0) # total_frames += mat.shape[0] + + with open(args.config) as f: + configs = yaml.safe_load(f) + frontend_configs = configs.get("frontend_conf", {}) + num_mel_bins = frontend_configs.get("n_mels", 80) + frame_length = frontend_configs.get("frame_length", 25) + frame_shift = frontend_configs.get("frame_shift", 10) + window_type = frontend_configs.get("window", "hamming") + resample_rate = frontend_configs.get("fs", 16000) + assert num_mel_bins == args.dim + with open(wav_scp_file) as f: lines = f.readlines() for line in lines: _, wav_file = line.strip().split() - fbank = compute_fbank(wav_file, num_mel_bins=args.dim) + fbank = compute_fbank(wav_file, + num_mel_bins=args.dim, + frame_length=frame_length, + frame_shift=frame_shift, + resample_rate=resample_rate, + window_type=window_type) mean_stats += np.sum(fbank, axis=0) var_stats += np.sum(np.square(fbank), axis=0) total_frames += fbank.shape[0] diff --git a/egs/aishell/transformer/utils/compute_cmvn.sh b/egs/aishell/transformer/utils/compute_cmvn.sh index 75d88a266..ad8813d90 100755 --- a/egs/aishell/transformer/utils/compute_cmvn.sh +++ b/egs/aishell/transformer/utils/compute_cmvn.sh @@ -2,15 +2,19 @@ . ./path.sh || exit 1; # Begin configuration section. +fbankdir=$1 nj=32 cmd=./utils/run.pl feats_dim=80 +config= +scale=1.0 echo "$0 $@" . utils/parse_options.sh || exit 1; -fbankdir=$1 +# shellcheck disable=SC2046 +head -n $(awk -v lines="$(wc -l < ${fbankdir}/wav.scp)" -v scale="$scale" 'BEGIN { printf "%.0f\n", lines*scale }') ${fbankdir}/wav.scp > ${fbankdir}/wav.scp.scale split_dir=${fbankdir}/cmvn/split_${nj}; mkdir -p $split_dir @@ -18,14 +22,15 @@ split_scps="" for n in $(seq $nj); do split_scps="$split_scps $split_dir/wav.$n.scp" done -utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1; +utils/split_scp.pl ${fbankdir}/wav.scp.scale $split_scps || exit 1; logdir=${fbankdir}/cmvn/log $cmd JOB=1:$nj $logdir/cmvn.JOB.log \ python utils/compute_cmvn.py \ --dim ${feats_dim} \ --wav_path $split_dir \ - --idx JOB + --config $config \ + --idx JOB \ python utils/combine_cmvn_file.py --dim ${feats_dim} --cmvn_dir $split_dir --nj $nj --output_dir ${fbankdir}/cmvn