mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
TOLD/SOND: add utt2num_frame script
This commit is contained in:
parent
7664f364e6
commit
a8701ad5df
@ -180,9 +180,11 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
git lfs install
|
||||
git clone https://www.modelscope.cn/damo/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch.git
|
||||
mv speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch ${expdir}/
|
||||
echo "Done."
|
||||
fi
|
||||
|
||||
for dset in callhome1/nonoverlap_0s callhome2/nonoverlap_0s; do
|
||||
echo "Start to extract speaker embeddings for ${dset}"
|
||||
key_file=${datadir}/${dset}/wav.scp
|
||||
num_scp_file="$(<${key_file} wc -l)"
|
||||
_nj=$([ $inference_nj -le $num_scp_file ] && echo "$inference_nj" || echo "$num_scp_file")
|
||||
@ -207,6 +209,9 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
--sv_model_file ${sv_exp_dir}/sv.pth \
|
||||
--output_dir "${_logdir}"/output.JOB
|
||||
cat ${_logdir}/output.*/xvector.scp | sort > ${datadir}/${dset}/utt2xvec
|
||||
|
||||
python script/calc_num_frames.py ${key_file} ${datadir}/${dset}/utt2num_frames
|
||||
echo "Done."
|
||||
done
|
||||
|
||||
fi
|
||||
@ -219,7 +224,7 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
|
||||
python -Wignore script/calc_real_meeting_frame_labels.py \
|
||||
${datadir}/${dset} ${dumpdir}/${dset}/labels \
|
||||
--n_spk 8 --frame_shift 0.01 --nj 16 --sr 8000
|
||||
find `pwd`/${dumpdir}/${dset}/labels -iname "*.lbl.mat" | awk -F'[/.]' '{print $(NF-2),$0}' | sort > ${datadir}/${dset}/labels.scp
|
||||
find `pwd`/${dumpdir}/${dset}/labels/ -iname "*.lbl.mat" | awk -F'[/.]' '{print $(NF-2),$0}' | sort > ${datadir}/${dset}/labels.scp
|
||||
done
|
||||
|
||||
fi
|
||||
|
||||
21
egs/callhome/diarization/sond/script/calc_num_frames.py
Normal file
21
egs/callhome/diarization/sond/script/calc_num_frames.py
Normal file
@ -0,0 +1,21 @@
|
||||
import os
|
||||
import sys
|
||||
import soundfile as sf
|
||||
from funasr.utils.misc import load_scp_as_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
wav_scp = sys.argv[1]
|
||||
out_file = sys.argv[2]
|
||||
frame_shift = 0.01
|
||||
|
||||
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
||||
|
||||
out_file = open(out_file, "wt")
|
||||
for uttid, wav_path in load_scp_as_list(wav_scp):
|
||||
wav, sr = sf.read(wav_path)
|
||||
num_frame = wav.shape[0] // int(sr * frame_shift)
|
||||
out_file.write(f"{uttid} {num_frame}\n")
|
||||
out_file.flush()
|
||||
|
||||
out_file.close()
|
||||
@ -1,6 +1,6 @@
|
||||
import numpy as np
|
||||
from opennmt.utils.job_runner import MultiProcessRunnerV3
|
||||
from opennmt.utils.misc import load_scp_as_list, load_scp_as_dict
|
||||
from funasr.utils.job_runner import MultiProcessRunnerV3
|
||||
from funasr.utils.misc import load_scp_as_list, load_scp_as_dict
|
||||
import os
|
||||
import librosa
|
||||
import scipy.io as sio
|
||||
@ -90,7 +90,7 @@ def process(task_args):
|
||||
for mid, wav_path, rttms in task_list:
|
||||
meeting_labels, spk_list = build_labels(wav_path, rttms, args.n_spk, args.remove_sil,
|
||||
args.sr, args.frame_shift)
|
||||
save_path = os.path.join(args.out_dir, "{}.lbl".format(mid))
|
||||
save_path = os.path.join(args.out_dir, "{}.lbl.mat".format(mid))
|
||||
sio.savemat(save_path, {"labels": meeting_labels.astype(bool), "spk_list": spk_list})
|
||||
# print mid
|
||||
return None
|
||||
|
||||
Loading…
Reference in New Issue
Block a user