From e4550565f0569e8520180e0d1382fbc3843cdd60 Mon Sep 17 00:00:00 2001 From: yhliang <429259365@qq.com> Date: Wed, 17 May 2023 14:21:36 +0800 Subject: [PATCH 1/3] update gen profile padding --- egs/alimeeting/sa-asr/local/gen_oracle_profile_padding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/alimeeting/sa-asr/local/gen_oracle_profile_padding.py b/egs/alimeeting/sa-asr/local/gen_oracle_profile_padding.py index b70a32a19..186f1de9f 100644 --- a/egs/alimeeting/sa-asr/local/gen_oracle_profile_padding.py +++ b/egs/alimeeting/sa-asr/local/gen_oracle_profile_padding.py @@ -42,8 +42,8 @@ if __name__=="__main__": global_spk_list_tmp = global_spk_list[: ] for spk in meeting_map_tmp[meeting]: global_spk_list_tmp.remove(spk) - padding_spk = random.sample(global_spk_list_tmp, 4 - num) - meeting_map_tmp[meeting] = meeting_map_tmp[meeting] + padding_spk + padding_spk = random.sample(global_spk_list_tmp, 4 - num) + meeting_map_tmp[meeting] = meeting_map_tmp[meeting] + padding_spk meeting_map = {} os.system('mkdir -p ' + path + '/oracle_profile_padding') From db77a41e29d5c190d1bbc0d7698e384f14fa8168 Mon Sep 17 00:00:00 2001 From: yhliang <429259365@qq.com> Date: Thu, 18 May 2023 11:11:19 +0800 Subject: [PATCH 2/3] fix m2met2 baseline bugs --- egs/alimeeting/sa-asr/asr_local.sh | 8 ++++---- egs/alimeeting/sa-asr/local/alimeeting_data_prep.sh | 11 ++++++----- egs/alimeeting/sa-asr/path.sh | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/egs/alimeeting/sa-asr/asr_local.sh b/egs/alimeeting/sa-asr/asr_local.sh index 30401b91f..05599b722 100755 --- a/egs/alimeeting/sa-asr/asr_local.sh +++ b/egs/alimeeting/sa-asr/asr_local.sh @@ -1153,10 +1153,10 @@ if ! "${skip_train}"; then mkdir -p ${sa_asr_exp}/log INIT_FILE=${sa_asr_exp}/ddp_init - if [ ! -f "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pth" ]; then + if [ ! -f "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pb" ]; then # download xvector extractor model file python local/download_xvector_model.py exp - log "Successfully download the pretrained xvector extractor to exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pth" + log "Successfully download the pretrained xvector extractor to exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pb" fi if [ -f $INIT_FILE ];then @@ -1195,8 +1195,8 @@ if ! "${skip_train}"; then --init_param "${asr_exp}/valid.acc.ave.pb:decoder.decoders.3:decoder.decoder4.2" \ --init_param "${asr_exp}/valid.acc.ave.pb:decoder.decoders.4:decoder.decoder4.3" \ --init_param "${asr_exp}/valid.acc.ave.pb:decoder.decoders.5:decoder.decoder4.4" \ - --init_param "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pth:encoder:spk_encoder" \ - --init_param "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pth:decoder:spk_encoder:decoder.output_dense" \ + --init_param "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pb:encoder:spk_encoder" \ + --init_param "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pb:decoder:spk_encoder:decoder.output_dense" \ --valid_data_path_and_name_and_type "${_asr_valid_dir}/${_scp},speech,${_type}" \ --valid_data_path_and_name_and_type "${_asr_valid_dir}/text,text,text" \ --valid_data_path_and_name_and_type "${_asr_valid_dir}/oracle_profile_nopadding.scp,profile,npy" \ diff --git a/egs/alimeeting/sa-asr/local/alimeeting_data_prep.sh b/egs/alimeeting/sa-asr/local/alimeeting_data_prep.sh index 7d39cdc14..c13ee429e 100755 --- a/egs/alimeeting/sa-asr/local/alimeeting_data_prep.sh +++ b/egs/alimeeting/sa-asr/local/alimeeting_data_prep.sh @@ -61,9 +61,9 @@ mkdir -p $near_dir if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then log "stage 1:process alimeeting near dir" - find -L $near_raw_dir/audio_dir -iname "*.wav" > $near_dir/wavlist + find -L $near_raw_dir/audio_dir -iname "*.wav" | sort > $near_dir/wavlist awk -F '/' '{print $NF}' $near_dir/wavlist | awk -F '.' '{print $1}' > $near_dir/uttid - find -L $near_raw_dir/textgrid_dir -iname "*.TextGrid" > $near_dir/textgrid.flist + find -L $near_raw_dir/textgrid_dir -iname "*.TextGrid" | sort > $near_dir/textgrid.flist n1_wav=$(wc -l < $near_dir/wavlist) n2_text=$(wc -l < $near_dir/textgrid.flist) log near file found $n1_wav wav and $n2_text text. @@ -90,9 +90,9 @@ fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then log "stage 2:process alimeeting far dir" - find -L $far_raw_dir/audio_dir -iname "*.wav" > $far_dir/wavlist + find -L $far_raw_dir/audio_dir -iname "*.wav" | sort > $far_dir/wavlist awk -F '/' '{print $NF}' $far_dir/wavlist | awk -F '.' '{print $1}' > $far_dir/uttid - find -L $far_raw_dir/textgrid_dir -iname "*.TextGrid" > $far_dir/textgrid.flist + find -L $far_raw_dir/textgrid_dir -iname "*.TextGrid" | sort > $far_dir/textgrid.flist n1_wav=$(wc -l < $far_dir/wavlist) n2_text=$(wc -l < $far_dir/textgrid.flist) log far file found $n1_wav wav and $n2_text text. @@ -120,7 +120,8 @@ fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then log "stage 3: finali data process" - + local/fix_data_dir.sh $near_dir + local/fix_data_dir.sh $far_dir local/copy_data_dir.sh $near_dir data/${tgt}_Ali_near local/copy_data_dir.sh $far_dir data/${tgt}_Ali_far diff --git a/egs/alimeeting/sa-asr/path.sh b/egs/alimeeting/sa-asr/path.sh index 5721f3f48..dfc2b78e0 100755 --- a/egs/alimeeting/sa-asr/path.sh +++ b/egs/alimeeting/sa-asr/path.sh @@ -2,4 +2,4 @@ export FUNASR_DIR=$PWD/../../.. # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C export PYTHONIOENCODING=UTF-8 -export PATH=$FUNASR_DIR/funasr/bin:$PATH \ No newline at end of file +export PATH=$FUNASR_DIR/funasr/bin:./utils:$PATH \ No newline at end of file From 1e650fac78bf03ed40051cc41fad9fa16c236e3d Mon Sep 17 00:00:00 2001 From: yhliang <429259365@qq.com> Date: Thu, 18 May 2023 11:27:24 +0800 Subject: [PATCH 3/3] fix bug --- funasr/models/frontend/default.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/funasr/models/frontend/default.py b/funasr/models/frontend/default.py index 2e1b0c454..c4dd7c537 100644 --- a/funasr/models/frontend/default.py +++ b/funasr/models/frontend/default.py @@ -102,8 +102,8 @@ class DefaultFrontend(AbsFrontend): if input_stft.dim() == 4: # h: (B, T, C, F) -> h: (B, T, F) if self.training: - if self.use_channel == None: - input_stft = input_stft[:, :, 0, :] + if self.use_channel is not None: + input_stft = input_stft[:, :, self.use_channel, :] else: # Select 1ch randomly ch = np.random.randint(input_stft.size(2))