From 3aad0e15ecf53aa22e89c82f48fcf356df16df20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=98=89=E6=B8=8A?= Date: Sun, 23 Jul 2023 00:12:57 +0800 Subject: [PATCH] update --- egs/callhome/eend_ola/local/dump_feature.py | 2 +- egs/callhome/eend_ola/run_test.sh | 92 ++++++++++++++------- 2 files changed, 63 insertions(+), 31 deletions(-) diff --git a/egs/callhome/eend_ola/local/dump_feature.py b/egs/callhome/eend_ola/local/dump_feature.py index 8549c31fa..5d7a0610c 100644 --- a/egs/callhome/eend_ola/local/dump_feature.py +++ b/egs/callhome/eend_ola/local/dump_feature.py @@ -34,7 +34,7 @@ class KaldiData: utt2spk_file = os.path.join(self.data_dir, 'utt2spk.{}'.format(idx)) self.utt2spk = load_utt2spk(utt2spk_file) - wav_file = os.path.join(self.data_dir, 'wav.{}.scp'.format(idx)) + wav_file = os.path.join(self.data_dir, 'wav.scp.{}'.format(idx)) self.wavs = load_wav_scp(wav_file) reco2dur_file = os.path.join(self.data_dir, 'reco2dur.{}'.format(idx)) diff --git a/egs/callhome/eend_ola/run_test.sh b/egs/callhome/eend_ola/run_test.sh index a824a6832..188b61ed5 100644 --- a/egs/callhome/eend_ola/run_test.sh +++ b/egs/callhome/eend_ola/run_test.sh @@ -3,7 +3,7 @@ . ./path.sh || exit 1; # machines configuration -CUDA_VISIBLE_DEVICES="7" +CUDA_VISIBLE_DEVICES="0" gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') count=1 @@ -12,7 +12,7 @@ dump_cmd=utils/run.pl nj=64 # feature configuration -data_dir="/nfs/wangjiaming.wjm/EEND_DATA_sad30_snr10n15n20/convert_test/data" +data_dir="./data" simu_feats_dir="/nfs/wangjiaming.wjm/EEND_ARK_DATA/dump/simu_data/data" simu_feats_dir_chunk2000="/nfs/wangjiaming.wjm/EEND_ARK_DATA/dump/simu_data_chunk2000/data" callhome_feats_dir_chunk2000="/nfs/wangjiaming.wjm/EEND_ARK_DATA/dump/callhome_chunk2000/data" @@ -74,36 +74,68 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then simu_opts_sil_scale_array=(2 2 5 9) simu_opts_num_train=100000 - # for simulated data of chunk500 and chunk2000 - for dset in swb_sre_cv swb_sre_tr; do - if [ "$dset" == "swb_sre_tr" ]; then - n_mixtures=${simu_opts_num_train} - dataset=train - else - n_mixtures=500 - dataset=dev - fi - simu_data_dir=${dset}_ns"$(IFS="n"; echo "${simu_opts_num_speaker_array[*]}")"_beta"$(IFS="n"; echo "${simu_opts_sil_scale_array[*]}")"_${n_mixtures} - mkdir -p ${data_dir}/simu/data/${simu_data_dir}/.work - split_scps= - for n in $(seq $nj); do - split_scps="$split_scps ${data_dir}/simu/data/${simu_data_dir}/.work/wav.$n.scp" - done - utils/split_scp.pl "${data_dir}/simu/data/${simu_data_dir}/wav.scp" $split_scps || exit 1 - python local/split.py ${data_dir}/simu/data/${simu_data_dir} - # for chunk_size=500 - output_dir=${data_dir}/ark_data/dump/simu_data/$dataset - mkdir -p $output_dir/.logs - $dump_cmd --max-jobs-run $nj JOB=1:$nj $output_dir/.logs/dump.JOB.log \ +# # for simulated data of chunk500 and chunk2000 +# for dset in swb_sre_cv swb_sre_tr; do +# if [ "$dset" == "swb_sre_tr" ]; then +# n_mixtures=${simu_opts_num_train} +# dataset=train +# else +# n_mixtures=500 +# dataset=dev +# fi +# simu_data_dir=${dset}_ns"$(IFS="n"; echo "${simu_opts_num_speaker_array[*]}")"_beta"$(IFS="n"; echo "${simu_opts_sil_scale_array[*]}")"_${n_mixtures} +# mkdir -p ${data_dir}/simu/data/${simu_data_dir}/.work +# split_scps= +# for n in $(seq $nj); do +# split_scps="$split_scps ${data_dir}/simu/data/${simu_data_dir}/.work/wav.scp.$n" +# done +# utils/split_scp.pl "${data_dir}/simu/data/${simu_data_dir}/wav.scp" $split_scps || exit 1 +# python local/split.py ${data_dir}/simu/data/${simu_data_dir} +# # for chunk_size=500 +# output_dir=${data_dir}/ark_data/dump/simu_data/$dataset +# mkdir -p $output_dir/.logs +# $dump_cmd --max-jobs-run $nj JOB=1:$nj $output_dir/.logs/dump.JOB.log \ +# python local/dump_feature.py \ +# --data_dir ${data_dir}/simu/data/${simu_data_dir}/.work \ +# --output_dir $output_dir \ +# --index JOB +# mkdir -p ${data_dir}/ark_data/dump/simu_data/data/$dataset +# python local/gen_feats_scp.py \ +# --root_path ${data_dir}/ark_data/dump/simu_data/$dataset \ +# --out_path ${data_dir}/ark_data/dump/simu_data/data/$dataset \ +# --split_num $nj +# grep "ns2" ${data_dir}/ark_data/dump/simu_data/data/$dataset/feats.scp > ${data_dir}/ark_data/dump/simu_data/data/$dataset/feats_2spkr.scp +# # for chunk_size=2000 +# output_dir=${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset +# mkdir -p $output_dir/.logs +# $dump_cmd --max-jobs-run $nj JOB=1:$nj $output_dir/.logs/dump.JOB.log \ +# python local/dump_feature.py \ +# --data_dir ${data_dir}/simu/data/${simu_data_dir}/.work \ +# --output_dir $output_dir \ +# --index JOB \ +# --num_frames 2000 +# mkdir -p ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset +# python local/gen_feats_scp.py \ +# --root_path ${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset \ +# --out_path ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset \ +# --split_num $nj +# grep "ns2" ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feats.scp > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feats_2spkr.scp +# done + + # for callhome data + for dset in callhome1_spkall callhome2_spkall; do + find $data_dir/eval/$dset -maxdepth 1 -type f -exec cp {} {}.1 \; + output_dir=${data_dir}/ark_data/dump/callhome/$dset python local/dump_feature.py \ - --data_dir ${data_dir}/simu/data/${simu_data_dir}/.work \ - --output_dir ${data_dir}/ark_data/dump/simu_data/$dataset \ - --index JOB - mkdir -p ${data_dir}/ark_data/dump/simu_data/data/$dataset + --data_dir $data_dir/eval/$dset \ + --output_dir $output_dir \ + --index 1 \ + --num_frames 2000 + mkdir -p ${data_dir}/ark_data/dump/callhome/data/$dset python local/gen_feats_scp.py \ - --root_path ${data_dir}/ark_data/dump/simu_data/$dataset \ - --out_path ${data_dir}/ark_data/dump/simu_data/data/$dataset \ - --split_num $nj + --root_path ${data_dir}/ark_data/dump/callhome/$dset \ + --out_path ${data_dir}/ark_data/dump/callhome/data/$dset \ + --split_num 1 done fi