mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
update
This commit is contained in:
parent
91425c670b
commit
6494a503f4
73
egs/callhome/eend_ola/local/make_callhome.sh
Normal file
73
egs/callhome/eend_ola/local/make_callhome.sh
Normal file
@ -0,0 +1,73 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2017 David Snyder
|
||||
# Apache 2.0.
|
||||
#
|
||||
# This script prepares the Callhome portion of the NIST SRE 2000
|
||||
# corpus (LDC2001S97). It is the evaluation dataset used in the
|
||||
# callhome_diarization recipe.
|
||||
|
||||
if [ $# -ne 2 ]; then
|
||||
echo "Usage: $0 <callhome-speech> <out-data-dir>"
|
||||
echo "e.g.: $0 /mnt/data/LDC2001S97 data/"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
src_dir=$1
|
||||
data_dir=$2
|
||||
|
||||
tmp_dir=$data_dir/callhome/.tmp/
|
||||
mkdir -p $tmp_dir
|
||||
|
||||
# Download some metadata that wasn't provided in the LDC release
|
||||
if [ ! -d "$tmp_dir/sre2000-key" ]; then
|
||||
wget --no-check-certificate -P $tmp_dir/ \
|
||||
http://www.openslr.org/resources/10/sre2000-key.tar.gz
|
||||
tar -xvf $tmp_dir/sre2000-key.tar.gz -C $tmp_dir/
|
||||
fi
|
||||
|
||||
# The list of 500 recordings
|
||||
awk '{print $1}' $tmp_dir/sre2000-key/reco2num > $tmp_dir/reco.list
|
||||
|
||||
# Create wav.scp file
|
||||
count=0
|
||||
missing=0
|
||||
while read reco; do
|
||||
path=$(find $src_dir -name "$reco.sph")
|
||||
if [ -z "${path// }" ]; then
|
||||
>&2 echo "$0: Missing Sphere file for $reco"
|
||||
missing=$((missing+1))
|
||||
else
|
||||
echo "$reco sph2pipe -f wav -p $path |"
|
||||
fi
|
||||
count=$((count+1))
|
||||
done < $tmp_dir/reco.list > $data_dir/callhome/wav.scp
|
||||
|
||||
if [ $missing -gt 0 ]; then
|
||||
echo "$0: Missing $missing out of $count recordings"
|
||||
fi
|
||||
|
||||
cp $tmp_dir/sre2000-key/segments $data_dir/callhome/
|
||||
awk '{print $1, $2}' $data_dir/callhome/segments > $data_dir/callhome/utt2spk
|
||||
utils/utt2spk_to_spk2utt.pl $data_dir/callhome/utt2spk > $data_dir/callhome/spk2utt
|
||||
cp $tmp_dir/sre2000-key/reco2num $data_dir/callhome/reco2num_spk
|
||||
cp $tmp_dir/sre2000-key/fullref.rttm $data_dir/callhome/
|
||||
|
||||
utils/validate_data_dir.sh --no-text --no-feats $data_dir/callhome
|
||||
utils/fix_data_dir.sh $data_dir/callhome
|
||||
|
||||
utils/copy_data_dir.sh $data_dir/callhome $data_dir/callhome1
|
||||
utils/copy_data_dir.sh $data_dir/callhome $data_dir/callhome2
|
||||
|
||||
utils/shuffle_list.pl $data_dir/callhome/wav.scp | head -n 250 \
|
||||
| utils/filter_scp.pl - $data_dir/callhome/wav.scp \
|
||||
> $data_dir/callhome1/wav.scp
|
||||
utils/fix_data_dir.sh $data_dir/callhome1
|
||||
utils/filter_scp.pl --exclude $data_dir/callhome1/wav.scp \
|
||||
$data_dir/callhome/wav.scp > $data_dir/callhome2/wav.scp
|
||||
utils/fix_data_dir.sh $data_dir/callhome2
|
||||
utils/filter_scp.pl $data_dir/callhome1/wav.scp $data_dir/callhome/reco2num_spk \
|
||||
> $data_dir/callhome1/reco2num_spk
|
||||
utils/filter_scp.pl $data_dir/callhome2/wav.scp $data_dir/callhome/reco2num_spk \
|
||||
> $data_dir/callhome2/reco2num_spk
|
||||
|
||||
rm -rf $tmp_dir 2> /dev/null
|
||||
120
egs/callhome/eend_ola/local/make_mixture.py
Normal file
120
egs/callhome/eend_ola/local/make_mixture.py
Normal file
@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2019 Hitachi, Ltd. (author: Yusuke Fujita)
|
||||
# Licensed under the MIT license.
|
||||
#
|
||||
# This script generates simulated multi-talker mixtures for diarization
|
||||
#
|
||||
# common/make_mixture.py \
|
||||
# mixture.scp \
|
||||
# data/mixture \
|
||||
# wav/mixture
|
||||
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from eend import kaldi_data
|
||||
import numpy as np
|
||||
import math
|
||||
import soundfile as sf
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('script',
|
||||
help='list of json')
|
||||
parser.add_argument('out_data_dir',
|
||||
help='output data dir of mixture')
|
||||
parser.add_argument('out_wav_dir',
|
||||
help='output mixture wav files are stored here')
|
||||
parser.add_argument('--rate', type=int, default=16000,
|
||||
help='sampling rate')
|
||||
args = parser.parse_args()
|
||||
|
||||
# open output data files
|
||||
segments_f = open(args.out_data_dir + '/segments', 'w')
|
||||
utt2spk_f = open(args.out_data_dir + '/utt2spk', 'w')
|
||||
wav_scp_f = open(args.out_data_dir + '/wav.scp', 'w')
|
||||
|
||||
# "-R" forces the default random seed for reproducibility
|
||||
resample_cmd = "sox -R -t wav - -t wav - rate {}".format(args.rate)
|
||||
|
||||
for line in open(args.script):
|
||||
recid, jsonstr = line.strip().split(None, 1)
|
||||
indata = json.loads(jsonstr)
|
||||
wavfn = indata['recid']
|
||||
# recid now include out_wav_dir
|
||||
recid = os.path.join(args.out_wav_dir, wavfn).replace('/','_')
|
||||
noise = indata['noise']
|
||||
noise_snr = indata['snr']
|
||||
mixture = []
|
||||
for speaker in indata['speakers']:
|
||||
spkid = speaker['spkid']
|
||||
utts = speaker['utts']
|
||||
intervals = speaker['intervals']
|
||||
rir = speaker['rir']
|
||||
data = []
|
||||
pos = 0
|
||||
for interval, utt in zip(intervals, utts):
|
||||
# append silence interval data
|
||||
silence = np.zeros(int(interval * args.rate))
|
||||
data.append(silence)
|
||||
# utterance is reverberated using room impulse response
|
||||
preprocess = "wav-reverberate --print-args=false " \
|
||||
" --impulse-response={} - -".format(rir)
|
||||
if isinstance(utt, list):
|
||||
rec, st, et = utt
|
||||
st = np.rint(st * args.rate).astype(int)
|
||||
et = np.rint(et * args.rate).astype(int)
|
||||
else:
|
||||
rec = utt
|
||||
st = 0
|
||||
et = None
|
||||
if rir is not None:
|
||||
wav_rxfilename = kaldi_data.process_wav(rec, preprocess)
|
||||
else:
|
||||
wav_rxfilename = rec
|
||||
wav_rxfilename = kaldi_data.process_wav(
|
||||
wav_rxfilename, resample_cmd)
|
||||
speech, _ = kaldi_data.load_wav(wav_rxfilename, st, et)
|
||||
data.append(speech)
|
||||
# calculate start/end position in samples
|
||||
startpos = pos + len(silence)
|
||||
endpos = startpos + len(speech)
|
||||
# write segments and utt2spk
|
||||
uttid = '{}_{}_{:07d}_{:07d}'.format(
|
||||
spkid, recid, int(startpos / args.rate * 100),
|
||||
int(endpos / args.rate * 100))
|
||||
print(uttid, recid,
|
||||
startpos / args.rate, endpos / args.rate, file=segments_f)
|
||||
print(uttid, spkid, file=utt2spk_f)
|
||||
# update position for next utterance
|
||||
pos = endpos
|
||||
data = np.concatenate(data)
|
||||
mixture.append(data)
|
||||
|
||||
# fitting to the maximum-length speaker data, then mix all speakers
|
||||
maxlen = max(len(x) for x in mixture)
|
||||
mixture = [np.pad(x, (0, maxlen - len(x)), 'constant') for x in mixture]
|
||||
mixture = np.sum(mixture, axis=0)
|
||||
# noise is repeated or cutted for fitting to the mixture data length
|
||||
noise_resampled = kaldi_data.process_wav(noise, resample_cmd)
|
||||
noise_data, _ = kaldi_data.load_wav(noise_resampled)
|
||||
if maxlen > len(noise_data):
|
||||
noise_data = np.pad(noise_data, (0, maxlen - len(noise_data)), 'wrap')
|
||||
else:
|
||||
noise_data = noise_data[:maxlen]
|
||||
# noise power is scaled according to selected SNR, then mixed
|
||||
signal_power = np.sum(mixture**2) / len(mixture)
|
||||
noise_power = np.sum(noise_data**2) / len(noise_data)
|
||||
scale = math.sqrt(
|
||||
math.pow(10, - noise_snr / 10) * signal_power / noise_power)
|
||||
mixture += noise_data * scale
|
||||
# output the wav file and write wav.scp
|
||||
outfname = '{}.wav'.format(wavfn)
|
||||
outpath = os.path.join(args.out_wav_dir, outfname)
|
||||
sf.write(outpath, mixture, args.rate)
|
||||
print(recid, os.path.abspath(outpath), file=wav_scp_f)
|
||||
|
||||
wav_scp_f.close()
|
||||
segments_f.close()
|
||||
utt2spk_f.close()
|
||||
123
egs/callhome/eend_ola/local/make_musan.py
Normal file
123
egs/callhome/eend_ola/local/make_musan.py
Normal file
@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2015 David Snyder
|
||||
# 2018 Ewald Enzinger
|
||||
# Apache 2.0.
|
||||
#
|
||||
# Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
|
||||
# This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz.
|
||||
#
|
||||
# This file is meant to be invoked by make_musan.sh.
|
||||
|
||||
import os, sys
|
||||
|
||||
def process_music_annotations(path):
|
||||
utt2spk = {}
|
||||
utt2vocals = {}
|
||||
lines = open(path, 'r').readlines()
|
||||
for line in lines:
|
||||
utt, genres, vocals, musician = line.rstrip().split()[:4]
|
||||
# For this application, the musican ID isn't important
|
||||
utt2spk[utt] = utt
|
||||
utt2vocals[utt] = vocals == "Y"
|
||||
return utt2spk, utt2vocals
|
||||
|
||||
def prepare_music(root_dir, use_vocals):
|
||||
utt2vocals = {}
|
||||
utt2spk = {}
|
||||
utt2wav = {}
|
||||
num_good_files = 0
|
||||
num_bad_files = 0
|
||||
music_dir = os.path.join(root_dir, "music")
|
||||
for root, dirs, files in os.walk(music_dir):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
if file.endswith(".wav"):
|
||||
utt = str(file).replace(".wav", "")
|
||||
utt2wav[utt] = file_path
|
||||
elif str(file) == "ANNOTATIONS":
|
||||
utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
|
||||
utt2spk.update(utt2spk_part)
|
||||
utt2vocals.update(utt2vocals_part)
|
||||
utt2spk_str = ""
|
||||
utt2wav_str = ""
|
||||
for utt in utt2vocals:
|
||||
if utt in utt2wav:
|
||||
if use_vocals or not utt2vocals[utt]:
|
||||
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
|
||||
utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
|
||||
num_good_files += 1
|
||||
else:
|
||||
print("Missing file {}".format(utt))
|
||||
num_bad_files += 1
|
||||
print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
|
||||
return utt2spk_str, utt2wav_str
|
||||
|
||||
def prepare_speech(root_dir):
|
||||
utt2spk = {}
|
||||
utt2wav = {}
|
||||
num_good_files = 0
|
||||
num_bad_files = 0
|
||||
speech_dir = os.path.join(root_dir, "speech")
|
||||
for root, dirs, files in os.walk(speech_dir):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
if file.endswith(".wav"):
|
||||
utt = str(file).replace(".wav", "")
|
||||
utt2wav[utt] = file_path
|
||||
utt2spk[utt] = utt
|
||||
utt2spk_str = ""
|
||||
utt2wav_str = ""
|
||||
for utt in utt2spk:
|
||||
if utt in utt2wav:
|
||||
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
|
||||
utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
|
||||
num_good_files += 1
|
||||
else:
|
||||
print("Missing file {}".format(utt))
|
||||
num_bad_files += 1
|
||||
print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
|
||||
return utt2spk_str, utt2wav_str
|
||||
|
||||
def prepare_noise(root_dir):
|
||||
utt2spk = {}
|
||||
utt2wav = {}
|
||||
num_good_files = 0
|
||||
num_bad_files = 0
|
||||
noise_dir = os.path.join(root_dir, "noise")
|
||||
for root, dirs, files in os.walk(noise_dir):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
if file.endswith(".wav"):
|
||||
utt = str(file).replace(".wav", "")
|
||||
utt2wav[utt] = file_path
|
||||
utt2spk[utt] = utt
|
||||
utt2spk_str = ""
|
||||
utt2wav_str = ""
|
||||
for utt in utt2spk:
|
||||
if utt in utt2wav:
|
||||
utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
|
||||
utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
|
||||
num_good_files += 1
|
||||
else:
|
||||
print("Missing file {}".format(utt))
|
||||
num_bad_files += 1
|
||||
print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
|
||||
return utt2spk_str, utt2wav_str
|
||||
|
||||
def main():
|
||||
in_dir = sys.argv[1]
|
||||
out_dir = sys.argv[2]
|
||||
use_vocals = sys.argv[3] == "Y"
|
||||
utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals)
|
||||
utt2spk_speech, utt2wav_speech = prepare_speech(in_dir)
|
||||
utt2spk_noise, utt2wav_noise = prepare_noise(in_dir)
|
||||
utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
|
||||
utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
|
||||
wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
|
||||
wav_fi.write(utt2wav)
|
||||
utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
|
||||
utt2spk_fi.write(utt2spk)
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
main()
|
||||
37
egs/callhome/eend_ola/local/make_musan.sh
Normal file
37
egs/callhome/eend_ola/local/make_musan.sh
Normal file
@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2015 David Snyder
|
||||
# Apache 2.0.
|
||||
#
|
||||
# This script, called by ../run.sh, creates the MUSAN
|
||||
# data directory. The required dataset is freely available at
|
||||
# http://www.openslr.org/17/
|
||||
|
||||
set -e
|
||||
in_dir=$1
|
||||
data_dir=$2
|
||||
use_vocals='Y'
|
||||
|
||||
mkdir -p local/musan.tmp
|
||||
|
||||
echo "Preparing ${data_dir}/musan..."
|
||||
mkdir -p ${data_dir}/musan
|
||||
local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals}
|
||||
|
||||
utils/fix_data_dir.sh ${data_dir}/musan
|
||||
|
||||
grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
|
||||
grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
|
||||
grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
|
||||
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
|
||||
${data_dir}/musan ${data_dir}/musan_music
|
||||
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
|
||||
${data_dir}/musan ${data_dir}/musan_speech
|
||||
utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
|
||||
${data_dir}/musan ${data_dir}/musan_noise
|
||||
|
||||
utils/fix_data_dir.sh ${data_dir}/musan_music
|
||||
utils/fix_data_dir.sh ${data_dir}/musan_speech
|
||||
utils/fix_data_dir.sh ${data_dir}/musan_noise
|
||||
|
||||
rm -rf local/musan.tmp
|
||||
|
||||
63
egs/callhome/eend_ola/local/make_sre.pl
Normal file
63
egs/callhome/eend_ola/local/make_sre.pl
Normal file
@ -0,0 +1,63 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
# Copyright 2015 David Snyder
|
||||
# Apache 2.0.
|
||||
# Usage: make_sre.pl <path-to-data> <name-of-source> <sre-ref> <output-dir>
|
||||
|
||||
if (@ARGV != 4) {
|
||||
print STDERR "Usage: $0 <path-to-data> <name-of-source> <sre-ref> <output-dir>\n";
|
||||
print STDERR "e.g. $0 /export/corpora5/LDC/LDC2006S44 sre2004 sre_ref data/sre2004\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
($db_base, $sre_name, $sre_ref_filename, $out_dir) = @ARGV;
|
||||
%utt2sph = ();
|
||||
%spk2gender = ();
|
||||
|
||||
$tmp_dir = "$out_dir/tmp";
|
||||
if (system("mkdir -p $tmp_dir") != 0) {
|
||||
die "Error making directory $tmp_dir";
|
||||
}
|
||||
|
||||
if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) {
|
||||
die "Error getting list of sph files";
|
||||
}
|
||||
open(WAVLIST, "<", "$tmp_dir/sph.list") or die "cannot open wav list";
|
||||
|
||||
while(<WAVLIST>) {
|
||||
chomp;
|
||||
$sph = $_;
|
||||
@A1 = split("/",$sph);
|
||||
@A2 = split("[./]",$A1[$#A1]);
|
||||
$uttId=$A2[0];
|
||||
$utt2sph{$uttId} = $sph;
|
||||
}
|
||||
|
||||
open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
|
||||
open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
|
||||
open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
|
||||
open(SRE_REF, "<", $sre_ref_filename) or die "Cannot open SRE reference.";
|
||||
while (<SRE_REF>) {
|
||||
chomp;
|
||||
($speaker, $gender, $other_sre_name, $utt_id, $channel) = split(" ", $_);
|
||||
$channel_num = "1";
|
||||
if ($channel eq "A") {
|
||||
$channel_num = "1";
|
||||
} else {
|
||||
$channel_num = "2";
|
||||
}
|
||||
if (($other_sre_name eq $sre_name) and (exists $utt2sph{$utt_id})) {
|
||||
$full_utt_id = "$speaker-$gender-$sre_name-$utt_id-$channel";
|
||||
$spk2gender{"$speaker-$gender"} = $gender;
|
||||
print WAV "$full_utt_id"," sph2pipe -f wav -p -c $channel_num $utt2sph{$utt_id} |\n";
|
||||
print SPKR "$full_utt_id $speaker-$gender","\n";
|
||||
}
|
||||
}
|
||||
foreach $speaker (keys %spk2gender) {
|
||||
print GNDR "$speaker $spk2gender{$speaker}\n";
|
||||
}
|
||||
|
||||
close(GNDR) || die;
|
||||
close(SPKR) || die;
|
||||
close(WAV) || die;
|
||||
close(SRE_REF) || die;
|
||||
48
egs/callhome/eend_ola/local/make_sre.sh
Normal file
48
egs/callhome/eend_ola/local/make_sre.sh
Normal file
@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2015 David Snyder
|
||||
# Apache 2.0.
|
||||
#
|
||||
# See README.txt for more info on data required.
|
||||
|
||||
set -e
|
||||
|
||||
data_root=$1
|
||||
data_dir=$2
|
||||
|
||||
wget -P data/local/ http://www.openslr.org/resources/15/speaker_list.tgz
|
||||
tar -C data/local/ -xvf data/local/speaker_list.tgz
|
||||
sre_ref=data/local/speaker_list
|
||||
|
||||
local/make_sre.pl $data_root/LDC2006S44/ \
|
||||
sre2004 $sre_ref $data_dir/sre2004
|
||||
|
||||
local/make_sre.pl $data_root/LDC2011S01 \
|
||||
sre2005 $sre_ref $data_dir/sre2005_train
|
||||
|
||||
local/make_sre.pl $data_root/LDC2011S04 \
|
||||
sre2005 $sre_ref $data_dir/sre2005_test
|
||||
|
||||
local/make_sre.pl $data_root/LDC2011S09 \
|
||||
sre2006 $sre_ref $data_dir/sre2006_train
|
||||
|
||||
local/make_sre.pl $data_root/LDC2011S10 \
|
||||
sre2006 $sre_ref $data_dir/sre2006_test_1
|
||||
|
||||
local/make_sre.pl $data_root/LDC2012S01 \
|
||||
sre2006 $sre_ref $data_dir/sre2006_test_2
|
||||
|
||||
local/make_sre.pl $data_root/LDC2011S05 \
|
||||
sre2008 $sre_ref $data_dir/sre2008_train
|
||||
|
||||
local/make_sre.pl $data_root/LDC2011S08 \
|
||||
sre2008 $sre_ref $data_dir/sre2008_test
|
||||
|
||||
utils/combine_data.sh $data_dir/sre \
|
||||
$data_dir/sre2004 $data_dir/sre2005_train \
|
||||
$data_dir/sre2005_test $data_dir/sre2006_train \
|
||||
$data_dir/sre2006_test_1 $data_dir/sre2006_test_2 \
|
||||
$data_dir/sre2008_train $data_dir/sre2008_test
|
||||
|
||||
utils/validate_data_dir.sh --no-text --no-feats $data_dir/sre
|
||||
utils/fix_data_dir.sh $data_dir/sre
|
||||
rm data/local/speaker_list.*
|
||||
106
egs/callhome/eend_ola/local/make_swbd2_phase1.pl
Normal file
106
egs/callhome/eend_ola/local/make_swbd2_phase1.pl
Normal file
@ -0,0 +1,106 @@
|
||||
#!/usr/bin/perl
|
||||
use warnings; #sed replacement for -w perl parameter
|
||||
#
|
||||
# Copyright 2017 David Snyder
|
||||
# Apache 2.0
|
||||
|
||||
if (@ARGV != 2) {
|
||||
print STDERR "Usage: $0 <path-to-LDC98S75> <path-to-output>\n";
|
||||
print STDERR "e.g. $0 /export/corpora3/LDC/LDC98S75 data/swbd2_phase1_train\n";
|
||||
exit(1);
|
||||
}
|
||||
($db_base, $out_dir) = @ARGV;
|
||||
|
||||
if (system("mkdir -p $out_dir")) {
|
||||
die "Error making directory $out_dir";
|
||||
}
|
||||
|
||||
open(CS, "<$db_base/doc/callstat.tbl") || die "Could not open $db_base/doc/callstat.tbl";
|
||||
open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender";
|
||||
open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk";
|
||||
open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp";
|
||||
|
||||
@badAudio = ("3", "4");
|
||||
|
||||
$tmp_dir = "$out_dir/tmp";
|
||||
if (system("mkdir -p $tmp_dir") != 0) {
|
||||
die "Error making directory $tmp_dir";
|
||||
}
|
||||
|
||||
if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) {
|
||||
die "Error getting list of sph files";
|
||||
}
|
||||
|
||||
open(WAVLIST, "<$tmp_dir/sph.list") or die "cannot open wav list";
|
||||
|
||||
%wavs = ();
|
||||
while(<WAVLIST>) {
|
||||
chomp;
|
||||
$sph = $_;
|
||||
@t = split("/",$sph);
|
||||
@t1 = split("[./]",$t[$#t]);
|
||||
$uttId = $t1[0];
|
||||
$wavs{$uttId} = $sph;
|
||||
}
|
||||
|
||||
while (<CS>) {
|
||||
$line = $_ ;
|
||||
@A = split(",", $line);
|
||||
@A1 = split("[./]",$A[0]);
|
||||
$wav = $A1[0];
|
||||
if (/$wav/i ~~ @badAudio) {
|
||||
# do nothing
|
||||
print "Bad Audio = $wav";
|
||||
} else {
|
||||
$spkr1= "sw_" . $A[2];
|
||||
$spkr2= "sw_" . $A[3];
|
||||
$gender1 = $A[5];
|
||||
$gender2 = $A[6];
|
||||
if ($gender1 eq "M") {
|
||||
$gender1 = "m";
|
||||
} elsif ($gender1 eq "F") {
|
||||
$gender1 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if ($gender2 eq "M") {
|
||||
$gender2 = "m";
|
||||
} elsif ($gender2 eq "F") {
|
||||
$gender2 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if (-e "$wavs{$wav}") {
|
||||
$uttId = $spkr1 ."_" . $wav ."_1";
|
||||
if (!$spk2gender{$spkr1}) {
|
||||
$spk2gender{$spkr1} = $gender1;
|
||||
print GNDR "$spkr1"," $gender1\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wavs{$wav} |\n";
|
||||
print SPKR "$uttId"," $spkr1","\n";
|
||||
|
||||
$uttId = $spkr2 . "_" . $wav ."_2";
|
||||
if (!$spk2gender{$spkr2}) {
|
||||
$spk2gender{$spkr2} = $gender2;
|
||||
print GNDR "$spkr2"," $gender2\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 2 $wavs{$wav} |\n";
|
||||
print SPKR "$uttId"," $spkr2","\n";
|
||||
} else {
|
||||
print STDERR "Missing $wavs{$wav} for $wav\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(WAV) || die;
|
||||
close(SPKR) || die;
|
||||
close(GNDR) || die;
|
||||
if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
|
||||
die "Error creating spk2utt file in directory $out_dir";
|
||||
}
|
||||
if (system("utils/fix_data_dir.sh $out_dir") != 0) {
|
||||
die "Error fixing data dir $out_dir";
|
||||
}
|
||||
if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
|
||||
die "Error validating directory $out_dir";
|
||||
}
|
||||
107
egs/callhome/eend_ola/local/make_swbd2_phase2.pl
Normal file
107
egs/callhome/eend_ola/local/make_swbd2_phase2.pl
Normal file
@ -0,0 +1,107 @@
|
||||
#!/usr/bin/perl
|
||||
use warnings; #sed replacement for -w perl parameter
|
||||
#
|
||||
# Copyright 2013 Daniel Povey
|
||||
# Apache 2.0
|
||||
|
||||
if (@ARGV != 2) {
|
||||
print STDERR "Usage: $0 <path-to-LDC99S79> <path-to-output>\n";
|
||||
print STDERR "e.g. $0 /export/corpora5/LDC/LDC99S79 data/swbd2_phase2_train\n";
|
||||
exit(1);
|
||||
}
|
||||
($db_base, $out_dir) = @ARGV;
|
||||
|
||||
if (system("mkdir -p $out_dir")) {
|
||||
die "Error making directory $out_dir";
|
||||
}
|
||||
|
||||
open(CS, "<$db_base/DISC1/doc/callstat.tbl") || die "Could not open $db_base/DISC1/doc/callstat.tbl";
|
||||
open(CI, "<$db_base/DISC1/doc/callinfo.tbl") || die "Could not open $db_base/DISC1/doc/callinfo.tbl";
|
||||
open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender";
|
||||
open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk";
|
||||
open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp";
|
||||
|
||||
@badAudio = ("3", "4");
|
||||
|
||||
$tmp_dir = "$out_dir/tmp";
|
||||
if (system("mkdir -p $tmp_dir") != 0) {
|
||||
die "Error making directory $tmp_dir";
|
||||
}
|
||||
|
||||
if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) {
|
||||
die "Error getting list of sph files";
|
||||
}
|
||||
|
||||
open(WAVLIST, "<$tmp_dir/sph.list") or die "cannot open wav list";
|
||||
|
||||
while(<WAVLIST>) {
|
||||
chomp;
|
||||
$sph = $_;
|
||||
@t = split("/",$sph);
|
||||
@t1 = split("[./]",$t[$#t]);
|
||||
$uttId=$t1[0];
|
||||
$wav{$uttId} = $sph;
|
||||
}
|
||||
|
||||
while (<CS>) {
|
||||
$line = $_ ;
|
||||
$ci = <CI>;
|
||||
$ci = <CI>;
|
||||
@ci = split(",",$ci);
|
||||
$wav = $ci[0];
|
||||
@A = split(",", $line);
|
||||
if (/$wav/i ~~ @badAudio) {
|
||||
# do nothing
|
||||
} else {
|
||||
$spkr1= "sw_" . $A[2];
|
||||
$spkr2= "sw_" . $A[3];
|
||||
$gender1 = $A[4];
|
||||
$gender2 = $A[5];
|
||||
if ($gender1 eq "M") {
|
||||
$gender1 = "m";
|
||||
} elsif ($gender1 eq "F") {
|
||||
$gender1 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if ($gender2 eq "M") {
|
||||
$gender2 = "m";
|
||||
} elsif ($gender2 eq "F") {
|
||||
$gender2 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if (-e "$wav{$wav}") {
|
||||
$uttId = $spkr1 ."_" . $wav ."_1";
|
||||
if (!$spk2gender{$spkr1}) {
|
||||
$spk2gender{$spkr1} = $gender1;
|
||||
print GNDR "$spkr1"," $gender1\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav{$wav} |\n";
|
||||
print SPKR "$uttId"," $spkr1","\n";
|
||||
|
||||
$uttId = $spkr2 . "_" . $wav ."_2";
|
||||
if (!$spk2gender{$spkr2}) {
|
||||
$spk2gender{$spkr2} = $gender2;
|
||||
print GNDR "$spkr2"," $gender2\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 2 $wav{$wav} |\n";
|
||||
print SPKR "$uttId"," $spkr2","\n";
|
||||
} else {
|
||||
print STDERR "Missing $wav{$wav} for $wav\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(WAV) || die;
|
||||
close(SPKR) || die;
|
||||
close(GNDR) || die;
|
||||
if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
|
||||
die "Error creating spk2utt file in directory $out_dir";
|
||||
}
|
||||
if (system("utils/fix_data_dir.sh $out_dir") != 0) {
|
||||
die "Error fixing data dir $out_dir";
|
||||
}
|
||||
if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
|
||||
die "Error validating directory $out_dir";
|
||||
}
|
||||
102
egs/callhome/eend_ola/local/make_swbd2_phase3.pl
Normal file
102
egs/callhome/eend_ola/local/make_swbd2_phase3.pl
Normal file
@ -0,0 +1,102 @@
|
||||
#!/usr/bin/perl
|
||||
use warnings; #sed replacement for -w perl parameter
|
||||
#
|
||||
# Copyright 2013 Daniel Povey
|
||||
# Apache 2.0
|
||||
|
||||
if (@ARGV != 2) {
|
||||
print STDERR "Usage: $0 <path-to-LDC2002S06> <path-to-output>\n";
|
||||
print STDERR "e.g. $0 /export/corpora5/LDC/LDC2002S06 data/swbd2_phase3_train\n";
|
||||
exit(1);
|
||||
}
|
||||
($db_base, $out_dir) = @ARGV;
|
||||
|
||||
if (system("mkdir -p $out_dir")) {
|
||||
die "Error making directory $out_dir";
|
||||
}
|
||||
|
||||
open(CS, "<$db_base/DISC1/docs/callstat.tbl") || die "Could not open $db_base/DISC1/docs/callstat.tbl";
|
||||
open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender";
|
||||
open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk";
|
||||
open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp";
|
||||
|
||||
@badAudio = ("3", "4");
|
||||
|
||||
$tmp_dir = "$out_dir/tmp";
|
||||
if (system("mkdir -p $tmp_dir") != 0) {
|
||||
die "Error making directory $tmp_dir";
|
||||
}
|
||||
|
||||
if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) {
|
||||
die "Error getting list of sph files";
|
||||
}
|
||||
|
||||
open(WAVLIST, "<$tmp_dir/sph.list") or die "cannot open wav list";
|
||||
while(<WAVLIST>) {
|
||||
chomp;
|
||||
$sph = $_;
|
||||
@t = split("/",$sph);
|
||||
@t1 = split("[./]",$t[$#t]);
|
||||
$uttId=$t1[0];
|
||||
$wav{$uttId} = $sph;
|
||||
}
|
||||
|
||||
while (<CS>) {
|
||||
$line = $_ ;
|
||||
@A = split(",", $line);
|
||||
$wav = "sw_" . $A[0] ;
|
||||
if (/$wav/i ~~ @badAudio) {
|
||||
# do nothing
|
||||
} else {
|
||||
$spkr1= "sw_" . $A[3];
|
||||
$spkr2= "sw_" . $A[4];
|
||||
$gender1 = $A[5];
|
||||
$gender2 = $A[6];
|
||||
if ($gender1 eq "M") {
|
||||
$gender1 = "m";
|
||||
} elsif ($gender1 eq "F") {
|
||||
$gender1 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if ($gender2 eq "M") {
|
||||
$gender2 = "m";
|
||||
} elsif ($gender2 eq "F") {
|
||||
$gender2 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if (-e "$wav{$wav}") {
|
||||
$uttId = $spkr1 ."_" . $wav ."_1";
|
||||
if (!$spk2gender{$spkr1}) {
|
||||
$spk2gender{$spkr1} = $gender1;
|
||||
print GNDR "$spkr1"," $gender1\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav{$wav} |\n";
|
||||
print SPKR "$uttId"," $spkr1","\n";
|
||||
|
||||
$uttId = $spkr2 . "_" . $wav ."_2";
|
||||
if (!$spk2gender{$spkr2}) {
|
||||
$spk2gender{$spkr2} = $gender2;
|
||||
print GNDR "$spkr2"," $gender2\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 2 $wav{$wav} |\n";
|
||||
print SPKR "$uttId"," $spkr2","\n";
|
||||
} else {
|
||||
print STDERR "Missing $wav{$wav} for $wav\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(WAV) || die;
|
||||
close(SPKR) || die;
|
||||
close(GNDR) || die;
|
||||
if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
|
||||
die "Error creating spk2utt file in directory $out_dir";
|
||||
}
|
||||
if (system("utils/fix_data_dir.sh $out_dir") != 0) {
|
||||
die "Error fixing data dir $out_dir";
|
||||
}
|
||||
if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
|
||||
die "Error validating directory $out_dir";
|
||||
}
|
||||
83
egs/callhome/eend_ola/local/make_swbd_cellular1.pl
Normal file
83
egs/callhome/eend_ola/local/make_swbd_cellular1.pl
Normal file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/perl
|
||||
use warnings; #sed replacement for -w perl parameter
|
||||
#
|
||||
# Copyright 2013 Daniel Povey
|
||||
# Apache 2.0
|
||||
|
||||
if (@ARGV != 2) {
|
||||
print STDERR "Usage: $0 <path-to-LDC2001S13> <path-to-output>\n";
|
||||
print STDERR "e.g. $0 /export/corpora5/LDC/LDC2001S13 data/swbd_cellular1_train\n";
|
||||
exit(1);
|
||||
}
|
||||
($db_base, $out_dir) = @ARGV;
|
||||
|
||||
if (system("mkdir -p $out_dir")) {
|
||||
die "Error making directory $out_dir";
|
||||
}
|
||||
|
||||
open(CS, "<$db_base/doc/swb_callstats.tbl") || die "Could not open $db_base/doc/swb_callstats.tbl";
|
||||
open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender";
|
||||
open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk";
|
||||
open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp";
|
||||
|
||||
@badAudio = ("40019", "45024", "40022");
|
||||
|
||||
while (<CS>) {
|
||||
$line = $_ ;
|
||||
@A = split(",", $line);
|
||||
if (/$A[0]/i ~~ @badAudio) {
|
||||
# do nothing
|
||||
} else {
|
||||
$wav = "sw_" . $A[0];
|
||||
$spkr1= "sw_" . $A[1];
|
||||
$spkr2= "sw_" . $A[2];
|
||||
$gender1 = $A[3];
|
||||
$gender2 = $A[4];
|
||||
if ($A[3] eq "M") {
|
||||
$gender1 = "m";
|
||||
} elsif ($A[3] eq "F") {
|
||||
$gender1 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if ($A[4] eq "M") {
|
||||
$gender2 = "m";
|
||||
} elsif ($A[4] eq "F") {
|
||||
$gender2 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if (-e "$db_base/$wav.sph") {
|
||||
$uttId = $spkr1 . "-swbdc_" . $wav ."_1";
|
||||
if (!$spk2gender{$spkr1}) {
|
||||
$spk2gender{$spkr1} = $gender1;
|
||||
print GNDR "$spkr1"," $gender1\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $db_base/$wav.sph |\n";
|
||||
print SPKR "$uttId"," $spkr1","\n";
|
||||
|
||||
$uttId = $spkr2 . "-swbdc_" . $wav ."_2";
|
||||
if (!$spk2gender{$spkr2}) {
|
||||
$spk2gender{$spkr2} = $gender2;
|
||||
print GNDR "$spkr2"," $gender2\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 2 $db_base/$wav.sph |\n";
|
||||
print SPKR "$uttId"," $spkr2","\n";
|
||||
} else {
|
||||
print STDERR "Missing $db_base/$wav.sph\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(WAV) || die;
|
||||
close(SPKR) || die;
|
||||
close(GNDR) || die;
|
||||
if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
|
||||
die "Error creating spk2utt file in directory $out_dir";
|
||||
}
|
||||
if (system("utils/fix_data_dir.sh $out_dir") != 0) {
|
||||
die "Error fixing data dir $out_dir";
|
||||
}
|
||||
if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
|
||||
die "Error validating directory $out_dir";
|
||||
}
|
||||
83
egs/callhome/eend_ola/local/make_swbd_cellular2.pl
Normal file
83
egs/callhome/eend_ola/local/make_swbd_cellular2.pl
Normal file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/perl
|
||||
use warnings; #sed replacement for -w perl parameter
|
||||
#
|
||||
# Copyright 2013 Daniel Povey
|
||||
# Apache 2.0
|
||||
|
||||
if (@ARGV != 2) {
|
||||
print STDERR "Usage: $0 <path-to-LDC2004S07> <path-to-output>\n";
|
||||
print STDERR "e.g. $0 /export/corpora5/LDC/LDC2004S07 data/swbd_cellular2_train\n";
|
||||
exit(1);
|
||||
}
|
||||
($db_base, $out_dir) = @ARGV;
|
||||
|
||||
if (system("mkdir -p $out_dir")) {
|
||||
die "Error making directory $out_dir";
|
||||
}
|
||||
|
||||
open(CS, "<$db_base/docs/swb_callstats.tbl") || die "Could not open $db_base/docs/swb_callstats.tbl";
|
||||
open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender";
|
||||
open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk";
|
||||
open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp";
|
||||
|
||||
@badAudio=("45024", "40022");
|
||||
|
||||
while (<CS>) {
|
||||
$line = $_ ;
|
||||
@A = split(",", $line);
|
||||
if (/$A[0]/i ~~ @badAudio) {
|
||||
# do nothing
|
||||
} else {
|
||||
$wav = "sw_" . $A[0];
|
||||
$spkr1= "sw_" . $A[1];
|
||||
$spkr2= "sw_" . $A[2];
|
||||
$gender1 = $A[3];
|
||||
$gender2 = $A[4];
|
||||
if ($A[3] eq "M") {
|
||||
$gender1 = "m";
|
||||
} elsif ($A[3] eq "F") {
|
||||
$gender1 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if ($A[4] eq "M") {
|
||||
$gender2 = "m";
|
||||
} elsif ($A[4] eq "F") {
|
||||
$gender2 = "f";
|
||||
} else {
|
||||
die "Unknown Gender in $line";
|
||||
}
|
||||
if (-e "$db_base/data/$wav.sph") {
|
||||
$uttId = $spkr1 . "-swbdc_" . $wav ."_1";
|
||||
if (!$spk2gender{$spkr1}) {
|
||||
$spk2gender{$spkr1} = $gender1;
|
||||
print GNDR "$spkr1"," $gender1\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $db_base/data/$wav.sph |\n";
|
||||
print SPKR "$uttId"," $spkr1","\n";
|
||||
|
||||
$uttId = $spkr2 . "-swbdc_" . $wav ."_2";
|
||||
if (!$spk2gender{$spkr2}) {
|
||||
$spk2gender{$spkr2} = $gender2;
|
||||
print GNDR "$spkr2"," $gender2\n";
|
||||
}
|
||||
print WAV "$uttId"," sph2pipe -f wav -p -c 2 $db_base/data/$wav.sph |\n";
|
||||
print SPKR "$uttId"," $spkr2","\n";
|
||||
} else {
|
||||
print STDERR "Missing $db_base/data/$wav.sph\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(WAV) || die;
|
||||
close(SPKR) || die;
|
||||
close(GNDR) || die;
|
||||
if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
|
||||
die "Error creating spk2utt file in directory $out_dir";
|
||||
}
|
||||
if (system("utils/fix_data_dir.sh $out_dir") != 0) {
|
||||
die "Error fixing data dir $out_dir";
|
||||
}
|
||||
if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
|
||||
die "Error validating directory $out_dir";
|
||||
}
|
||||
145
egs/callhome/eend_ola/local/random_mixture.py
Normal file
145
egs/callhome/eend_ola/local/random_mixture.py
Normal file
@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2019 Hitachi, Ltd. (author: Yusuke Fujita)
|
||||
# Licensed under the MIT license.
|
||||
|
||||
"""
|
||||
This script generates random multi-talker mixtures for diarization.
|
||||
It generates a scp-like outputs: lines of "[recid] [json]".
|
||||
recid: recording id of mixture
|
||||
serial numbers like mix_0000001, mix_0000002, ...
|
||||
json: mixture configuration formatted in "one-line"
|
||||
The json format is as following:
|
||||
{
|
||||
'speakers':[ # list of speakers
|
||||
{
|
||||
'spkid': 'Name', # speaker id
|
||||
'rir': '/rirdir/rir.wav', # wav_rxfilename of room impulse response
|
||||
'utts': [ # list of wav_rxfilenames of utterances
|
||||
'/wavdir/utt1.wav',
|
||||
'/wavdir/utt2.wav',...],
|
||||
'intervals': [1.2, 3.4, ...] # list of silence durations before utterances
|
||||
}, ... ],
|
||||
'noise': '/noisedir/noise.wav' # wav_rxfilename of background noise
|
||||
'snr': 15.0, # SNR for mixing background noise
|
||||
'recid': 'mix_000001' # recording id of the mixture
|
||||
}
|
||||
|
||||
Usage:
|
||||
common/random_mixture.py \
|
||||
--n_mixtures=10000 \ # number of mixtures
|
||||
data/voxceleb1_train \ # kaldi-style data dir of utterances
|
||||
data/musan_noise_bg \ # background noises
|
||||
data/simu_rirs \ # room impulse responses
|
||||
> mixture.scp # output scp-like file
|
||||
|
||||
The actual data dir and wav files are generated using make_mixture.py:
|
||||
common/make_mixture.py \
|
||||
mixture.scp \ # scp-like file for mixture
|
||||
data/mixture \ # output data dir
|
||||
wav/mixture # output wav dir
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from eend import kaldi_data
|
||||
import random
|
||||
import numpy as np
|
||||
import json
|
||||
import itertools
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('data_dir',
|
||||
help='data dir of single-speaker recordings')
|
||||
parser.add_argument('noise_dir',
|
||||
help='data dir of background noise recordings')
|
||||
parser.add_argument('rir_dir',
|
||||
help='data dir of room impulse responses')
|
||||
parser.add_argument('--n_mixtures', type=int, default=10,
|
||||
help='number of mixture recordings')
|
||||
parser.add_argument('--n_speakers', type=int, default=4,
|
||||
help='number of speakers in a mixture')
|
||||
parser.add_argument('--min_utts', type=int, default=10,
|
||||
help='minimum number of uttenraces per speaker')
|
||||
parser.add_argument('--max_utts', type=int, default=20,
|
||||
help='maximum number of utterances per speaker')
|
||||
parser.add_argument('--sil_scale', type=float, default=10.0,
|
||||
help='average silence time')
|
||||
parser.add_argument('--noise_snrs', default="10:15:20",
|
||||
help='colon-delimited SNRs for background noises')
|
||||
parser.add_argument('--random_seed', type=int, default=777,
|
||||
help='random seed')
|
||||
parser.add_argument('--speech_rvb_probability', type=float, default=1,
|
||||
help='reverb probability')
|
||||
args = parser.parse_args()
|
||||
|
||||
random.seed(args.random_seed)
|
||||
np.random.seed(args.random_seed)
|
||||
|
||||
# load list of wav files from kaldi-style data dirs
|
||||
wavs = kaldi_data.load_wav_scp(
|
||||
os.path.join(args.data_dir, 'wav.scp'))
|
||||
noises = kaldi_data.load_wav_scp(
|
||||
os.path.join(args.noise_dir, 'wav.scp'))
|
||||
rirs = kaldi_data.load_wav_scp(
|
||||
os.path.join(args.rir_dir, 'wav.scp'))
|
||||
|
||||
# spk2utt is used for counting number of utterances per speaker
|
||||
spk2utt = kaldi_data.load_spk2utt(
|
||||
os.path.join(args.data_dir, 'spk2utt'))
|
||||
|
||||
segments = kaldi_data.load_segments_hash(
|
||||
os.path.join(args.data_dir, 'segments'))
|
||||
|
||||
# choice lists for random sampling
|
||||
all_speakers = list(spk2utt.keys())
|
||||
all_noises = list(noises.keys())
|
||||
all_rirs = list(rirs.keys())
|
||||
noise_snrs = [float(x) for x in args.noise_snrs.split(':')]
|
||||
|
||||
mixtures = []
|
||||
for it in range(args.n_mixtures):
|
||||
# recording ids are mix_0000001, mix_0000002, ...
|
||||
recid = 'mix_{:07d}'.format(it + 1)
|
||||
# randomly select speakers, a background noise and a SNR
|
||||
speakers = random.sample(all_speakers, args.n_speakers)
|
||||
noise = random.choice(all_noises)
|
||||
noise_snr = random.choice(noise_snrs)
|
||||
mixture = {'speakers': []}
|
||||
for speaker in speakers:
|
||||
# randomly select the number of utterances
|
||||
n_utts = np.random.randint(args.min_utts, args.max_utts + 1)
|
||||
# utts = spk2utt[speaker][:n_utts]
|
||||
cycle_utts = itertools.cycle(spk2utt[speaker])
|
||||
# random start utterance
|
||||
roll = np.random.randint(0, len(spk2utt[speaker]))
|
||||
for i in range(roll):
|
||||
next(cycle_utts)
|
||||
utts = [next(cycle_utts) for i in range(n_utts)]
|
||||
# randomly select wait time before appending utterance
|
||||
intervals = np.random.exponential(args.sil_scale, size=n_utts)
|
||||
# randomly select a room impulse response
|
||||
if random.random() < args.speech_rvb_probability:
|
||||
rir = rirs[random.choice(all_rirs)]
|
||||
else:
|
||||
rir = None
|
||||
if segments is not None:
|
||||
utts = [segments[utt] for utt in utts]
|
||||
utts = [(wavs[rec], st, et) for (rec, st, et) in utts]
|
||||
mixture['speakers'].append({
|
||||
'spkid': speaker,
|
||||
'rir': rir,
|
||||
'utts': utts,
|
||||
'intervals': intervals.tolist()
|
||||
})
|
||||
else:
|
||||
mixture['speakers'].append({
|
||||
'spkid': speaker,
|
||||
'rir': rir,
|
||||
'utts': [wavs[utt] for utt in utts],
|
||||
'intervals': intervals.tolist()
|
||||
})
|
||||
mixture['noise'] = noises[noise]
|
||||
mixture['snr'] = noise_snr
|
||||
mixture['recid'] = recid
|
||||
print(recid, json.dumps(mixture))
|
||||
9
egs/callhome/eend_ola/local/run_blstm.sh
Normal file
9
egs/callhome/eend_ola/local/run_blstm.sh
Normal file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2019 Hitachi, Ltd. (author: Yusuke Fujita)
|
||||
# Licensed under the MIT license.
|
||||
#
|
||||
# BLSTM-based model experiment
|
||||
./run.sh --train-config conf/blstm/train.yaml --average-start 20 --average-end 20 \
|
||||
--adapt-config conf/blstm/adapt.yaml --adapt-average-start 10 --adapt-average-end 10 \
|
||||
--infer-config conf/blstm/infer.yaml $*
|
||||
235
egs/callhome/eend_ola/local/run_prepare_shared_eda.sh
Normal file
235
egs/callhome/eend_ola/local/run_prepare_shared_eda.sh
Normal file
@ -0,0 +1,235 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2019 Hitachi, Ltd. (author: Yusuke Fujita, Shota Horiguchi)
|
||||
# Licensed under the MIT license.
|
||||
#
|
||||
# This script prepares kaldi-style data sets shared with different experiments
|
||||
# - data/xxxx
|
||||
# callhome, sre, swb2, and swb_cellular datasets
|
||||
# - data/simu_${simu_outputs}
|
||||
# simulation mixtures generated with various options
|
||||
|
||||
stage=0
|
||||
|
||||
# Modify corpus directories
|
||||
# - callhome_dir
|
||||
# CALLHOME (LDC2001S97)
|
||||
# - swb2_phase1_train
|
||||
# Switchboard-2 Phase 1 (LDC98S75)
|
||||
# - data_root
|
||||
# LDC99S79, LDC2002S06, LDC2001S13, LDC2004S07,
|
||||
# LDC2006S44, LDC2011S01, LDC2011S04, LDC2011S09,
|
||||
# LDC2011S10, LDC2012S01, LDC2011S05, LDC2011S08
|
||||
# - musan_root
|
||||
# MUSAN corpus (https://www.openslr.org/17/)
|
||||
callhome_dir=/export/corpora/NIST/LDC2001S97
|
||||
swb2_phase1_train=/export/corpora/LDC/LDC98S75
|
||||
data_root=/export/corpora5/LDC
|
||||
musan_root=/export/corpora/JHU/musan
|
||||
# Modify simulated data storage area.
|
||||
# This script distributes simulated data under these directories
|
||||
simu_actual_dirs=(
|
||||
/export/c05/$USER/diarization-data
|
||||
/export/c08/$USER/diarization-data
|
||||
/export/c09/$USER/diarization-data
|
||||
)
|
||||
|
||||
# data preparation options
|
||||
max_jobs_run=4
|
||||
sad_num_jobs=30
|
||||
sad_opts="--extra-left-context 79 --extra-right-context 21 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3"
|
||||
sad_graph_opts="--min-silence-duration=0.03 --min-speech-duration=0.3 --max-speech-duration=10.0"
|
||||
sad_priors_opts="--sil-scale=0.1"
|
||||
|
||||
# simulation options
|
||||
simu_opts_overlap=yes
|
||||
simu_opts_num_speaker_array=(1 2 3 4)
|
||||
simu_opts_sil_scale_array=(2 2 5 9)
|
||||
simu_opts_rvb_prob=0.5
|
||||
simu_opts_num_train=100000
|
||||
simu_opts_min_utts=10
|
||||
simu_opts_max_utts=20
|
||||
|
||||
simu_cmd="run.pl"
|
||||
train_cmd="run.pl"
|
||||
random_mixture_cmd="run.pl"
|
||||
make_mixture_cmd="run.pl"
|
||||
|
||||
. parse_options.sh || exit
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
echo "prepare kaldi-style datasets"
|
||||
# Prepare CALLHOME dataset. This will be used to evaluation.
|
||||
if ! validate_data_dir.sh --no-text --no-feats data/callhome1_spkall \
|
||||
|| ! validate_data_dir.sh --no-text --no-feats data/callhome2_spkall; then
|
||||
# imported from https://github.com/kaldi-asr/kaldi/blob/master/egs/callhome_diarization/v1
|
||||
local/make_callhome.sh $callhome_dir data
|
||||
# Generate two-speaker subsets
|
||||
for dset in callhome1 callhome2; do
|
||||
# Extract two-speaker recordings in wav.scp
|
||||
copy_data_dir.sh data/${dset} data/${dset}_spkall
|
||||
# Regenerate segments file from fullref.rttm
|
||||
# $2: recid, $4: start_time, $5: duration, $8: speakerid
|
||||
awk '{printf "%s_%s_%07d_%07d %s %.2f %.2f\n", \
|
||||
$2, $8, $4*100, ($4+$5)*100, $2, $4, $4+$5}' \
|
||||
data/callhome/fullref.rttm | sort > data/${dset}_spkall/segments
|
||||
utils/fix_data_dir.sh data/${dset}_spkall
|
||||
# Speaker ID is '[recid]_[speakerid]
|
||||
awk '{split($1,A,"_"); printf "%s %s_%s\n", $1, A[1], A[2]}' \
|
||||
data/${dset}_spkall/segments > data/${dset}_spkall/utt2spk
|
||||
utils/fix_data_dir.sh data/${dset}_spkall
|
||||
# Generate rttm files for scoring
|
||||
steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
|
||||
data/${dset}_spkall/utt2spk data/${dset}_spkall/segments \
|
||||
data/${dset}_spkall/rttm
|
||||
utils/data/get_reco2dur.sh data/${dset}_spkall
|
||||
done
|
||||
fi
|
||||
# Prepare a collection of NIST SRE and SWB data. This will be used to train,
|
||||
if ! validate_data_dir.sh --no-text --no-feats data/swb_sre_comb; then
|
||||
local/make_sre.sh $data_root data
|
||||
# Prepare SWB for x-vector DNN training.
|
||||
local/make_swbd2_phase1.pl $swb2_phase1_train \
|
||||
data/swbd2_phase1_train
|
||||
local/make_swbd2_phase2.pl $data_root/LDC99S79 \
|
||||
data/swbd2_phase2_train
|
||||
local/make_swbd2_phase3.pl $data_root/LDC2002S06 \
|
||||
data/swbd2_phase3_train
|
||||
local/make_swbd_cellular1.pl $data_root/LDC2001S13 \
|
||||
data/swbd_cellular1_train
|
||||
local/make_swbd_cellular2.pl $data_root/LDC2004S07 \
|
||||
data/swbd_cellular2_train
|
||||
# Combine swb and sre data
|
||||
utils/combine_data.sh data/swb_sre_comb \
|
||||
data/swbd_cellular1_train data/swbd_cellular2_train \
|
||||
data/swbd2_phase1_train \
|
||||
data/swbd2_phase2_train data/swbd2_phase3_train data/sre
|
||||
fi
|
||||
# musan data. "back-ground
|
||||
if ! validate_data_dir.sh --no-text --no-feats data/musan_noise_bg; then
|
||||
local/make_musan.sh $musan_root data
|
||||
utils/copy_data_dir.sh data/musan_noise data/musan_noise_bg
|
||||
awk '{if(NR>1) print $1,$1}' $musan_root/noise/free-sound/ANNOTATIONS > data/musan_noise_bg/utt2spk
|
||||
utils/fix_data_dir.sh data/musan_noise_bg
|
||||
fi
|
||||
# simu rirs 8k
|
||||
if ! validate_data_dir.sh --no-text --no-feats data/simu_rirs_8k; then
|
||||
mkdir -p data/simu_rirs_8k
|
||||
if [ ! -e sim_rir_8k.zip ]; then
|
||||
wget --no-check-certificate http://www.openslr.org/resources/26/sim_rir_8k.zip
|
||||
fi
|
||||
unzip sim_rir_8k.zip -d data/sim_rir_8k
|
||||
find $PWD/data/sim_rir_8k -iname "*.wav" \
|
||||
| awk '{n=split($1,A,/[\/\.]/); print A[n-3]"_"A[n-1], $1}' \
|
||||
| sort > data/simu_rirs_8k/wav.scp
|
||||
awk '{print $1, $1}' data/simu_rirs_8k/wav.scp > data/simu_rirs_8k/utt2spk
|
||||
utils/fix_data_dir.sh data/simu_rirs_8k
|
||||
fi
|
||||
# Automatic segmentation using pretrained SAD model
|
||||
# it will take one day using 30 CPU jobs:
|
||||
# make_mfcc: 1 hour, compute_output: 18 hours, decode: 0.5 hours
|
||||
sad_nnet_dir=exp/segmentation_1a/tdnn_stats_asr_sad_1a
|
||||
sad_work_dir=exp/segmentation_1a/tdnn_stats_asr_sad_1a
|
||||
if ! validate_data_dir.sh --no-text $sad_work_dir/swb_sre_comb_seg; then
|
||||
if [ ! -d exp/segmentation_1a ]; then
|
||||
wget http://kaldi-asr.org/models/4/0004_tdnn_stats_asr_sad_1a.tar.gz
|
||||
tar zxf 0004_tdnn_stats_asr_sad_1a.tar.gz
|
||||
fi
|
||||
steps/segmentation/detect_speech_activity.sh \
|
||||
--nj $sad_num_jobs \
|
||||
--graph-opts "$sad_graph_opts" \
|
||||
--transform-probs-opts "$sad_priors_opts" $sad_opts \
|
||||
data/swb_sre_comb $sad_nnet_dir mfcc_hires $sad_work_dir \
|
||||
$sad_work_dir/swb_sre_comb || exit 1
|
||||
fi
|
||||
# Extract >1.5 sec segments and split into train/valid sets
|
||||
if ! validate_data_dir.sh --no-text --no-feats data/swb_sre_cv; then
|
||||
copy_data_dir.sh data/swb_sre_comb data/swb_sre_comb_seg
|
||||
awk '$4-$3>1.5{print;}' $sad_work_dir/swb_sre_comb_seg/segments > data/swb_sre_comb_seg/segments
|
||||
cp $sad_work_dir/swb_sre_comb_seg/{utt2spk,spk2utt} data/swb_sre_comb_seg
|
||||
fix_data_dir.sh data/swb_sre_comb_seg
|
||||
utils/subset_data_dir_tr_cv.sh data/swb_sre_comb_seg data/swb_sre_tr data/swb_sre_cv
|
||||
fi
|
||||
fi
|
||||
|
||||
simudir=data/simu
|
||||
if [ $stage -le 1 ]; then
|
||||
echo "simulation of mixture"
|
||||
mkdir -p $simudir/.work
|
||||
local/random_mixture_cmd=random_mixture.py
|
||||
local/make_mixture_cmd=make_mixture.py
|
||||
|
||||
for ((i=0; i<${#simu_opts_sil_scale_array[@]}; ++i)); do
|
||||
simu_opts_num_speaker=${simu_opts_num_speaker_array[i]}
|
||||
simu_opts_sil_scale=${simu_opts_sil_scale_array[i]}
|
||||
for dset in swb_sre_tr swb_sre_cv; do
|
||||
if [ "$dset" == "swb_sre_tr" ]; then
|
||||
n_mixtures=${simu_opts_num_train}
|
||||
else
|
||||
n_mixtures=500
|
||||
fi
|
||||
simuid=${dset}_ns${simu_opts_num_speaker}_beta${simu_opts_sil_scale}_${n_mixtures}
|
||||
# check if you have the simulation
|
||||
if ! validate_data_dir.sh --no-text --no-feats $simudir/data/$simuid; then
|
||||
# random mixture generation
|
||||
$train_cmd $simudir/.work/random_mixture_$simuid.log \
|
||||
$random_mixture_cmd --n_speakers $simu_opts_num_speaker --n_mixtures $n_mixtures \
|
||||
--speech_rvb_probability $simu_opts_rvb_prob \
|
||||
--sil_scale $simu_opts_sil_scale \
|
||||
data/$dset data/musan_noise_bg data/simu_rirs_8k \
|
||||
\> $simudir/.work/mixture_$simuid.scp
|
||||
nj=64
|
||||
mkdir -p $simudir/wav/$simuid
|
||||
# distribute simulated data to $simu_actual_dir
|
||||
split_scps=
|
||||
for n in $(seq $nj); do
|
||||
split_scps="$split_scps $simudir/.work/mixture_$simuid.$n.scp"
|
||||
mkdir -p $simudir/.work/data_$simuid.$n
|
||||
actual=${simu_actual_dirs[($n-1)%${#simu_actual_dirs[@]}]}/$simudir/wav/$simuid/$n
|
||||
mkdir -p $actual
|
||||
ln -nfs $actual $simudir/wav/$simuid/$n
|
||||
done
|
||||
utils/split_scp.pl $simudir/.work/mixture_$simuid.scp $split_scps || exit 1
|
||||
|
||||
$simu_cmd --max-jobs-run 64 JOB=1:$nj $simudir/.work/make_mixture_$simuid.JOB.log \
|
||||
$make_mixture_cmd --rate=8000 \
|
||||
$simudir/.work/mixture_$simuid.JOB.scp \
|
||||
$simudir/.work/data_$simuid.JOB $simudir/wav/$simuid/JOB
|
||||
utils/combine_data.sh $simudir/data/$simuid $simudir/.work/data_$simuid.*
|
||||
steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
|
||||
$simudir/data/$simuid/utt2spk $simudir/data/$simuid/segments \
|
||||
$simudir/data/$simuid/rttm
|
||||
utils/data/get_reco2dur.sh $simudir/data/$simuid
|
||||
fi
|
||||
simuid_concat=${dset}_ns"$(IFS="n"; echo "${simu_opts_num_speaker_array[*]}")"_beta"$(IFS="n"; echo "${simu_opts_sil_scale_array[*]}")"_${n_mixtures}
|
||||
mkdir -p $simudir/data/$simuid_concat
|
||||
for f in `ls -F $simudir/data/$simuid | grep -v "/"`; do
|
||||
cat $simudir/data/$simuid/$f >> $simudir/data/$simuid_concat/$f
|
||||
done
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ]; then
|
||||
# compose eval/callhome2_spkall
|
||||
eval_set=data/eval/callhome2_spkall
|
||||
if ! validate_data_dir.sh --no-text --no-feats $eval_set; then
|
||||
utils/copy_data_dir.sh data/callhome2_spkall $eval_set
|
||||
cp data/callhome2_spkall/rttm $eval_set/rttm
|
||||
awk -v dstdir=wav/eval/callhome2_spkall '{print $1, dstdir"/"$1".wav"}' data/callhome2_spkall/wav.scp > $eval_set/wav.scp
|
||||
mkdir -p wav/eval/callhome2_spkall
|
||||
wav-copy scp:data/callhome2_spkall/wav.scp scp:$eval_set/wav.scp
|
||||
utils/data/get_reco2dur.sh $eval_set
|
||||
fi
|
||||
|
||||
# compose eval/callhome1_spkall
|
||||
adapt_set=data/eval/callhome1_spkall
|
||||
if ! validate_data_dir.sh --no-text --no-feats $adapt_set; then
|
||||
utils/copy_data_dir.sh data/callhome1_spkall $adapt_set
|
||||
cp data/callhome1_spkall/rttm $adapt_set/rttm
|
||||
awk -v dstdir=wav/eval/callhome1_spkall '{print $1, dstdir"/"$1".wav"}' data/callhome1_spkall/wav.scp > $adapt_set/wav.scp
|
||||
mkdir -p wav/eval/callhome1_spkall
|
||||
wav-copy scp:data/callhome1_spkall/wav.scp scp:$adapt_set/wav.scp
|
||||
utils/data/get_reco2dur.sh $adapt_set
|
||||
fi
|
||||
fi
|
||||
@ -1,5 +1,12 @@
|
||||
export FUNASR_DIR=$PWD/../../..
|
||||
|
||||
# kaldi-related
|
||||
export KALDI_ROOT=
|
||||
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
|
||||
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
|
||||
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
|
||||
. $KALDI_ROOT/tools/config/common_path.sh
|
||||
|
||||
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
||||
export PYTHONIOENCODING=UTF-8
|
||||
export PYTHONPATH=../../../:$PYTHONPATH
|
||||
|
||||
@ -27,8 +27,8 @@ callhome_average_end=100
|
||||
|
||||
exp_dir="."
|
||||
input_size=345
|
||||
stage=1
|
||||
stop_stage=4
|
||||
stage=-1
|
||||
stop_stage=-1
|
||||
|
||||
# exp tag
|
||||
tag="exp_fix"
|
||||
@ -50,11 +50,26 @@ simu_allspkr_model_dir="baseline_$(basename "${simu_allspkr_diar_config}" .yaml)
|
||||
simu_allspkr_chunk2000_model_dir="baseline_$(basename "${simu_allspkr_chunk2000_diar_config}" .yaml)_${tag}"
|
||||
callhome_model_dir="baseline_$(basename "${callhome_diar_config}" .yaml)_${tag}"
|
||||
|
||||
# Prepare data for training and inference
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
echo "stage 0: Prepare data for training and inference"
|
||||
# simulate mixture data for training and inference
|
||||
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
|
||||
echo "stage 0: Simulate mixture data for training and inference"
|
||||
echo "The detail can be found in https://github.com/hitachi-speech/EEND"
|
||||
ehco "Before running this step, you should download and compile kaldi and set KALDI_ROOT in this script and path.sh"
|
||||
echo "This stage may take a long time, please waiting..."
|
||||
KALDI_ROOT=
|
||||
ln -s $KALDI_ROOT/egs/wsj/s5/steps steps
|
||||
ln -s $KALDI_ROOT/egs/wsj/s5/utils utils
|
||||
. local/run_prepare_shared_eda.sh
|
||||
fi
|
||||
|
||||
## Prepare data for training and inference
|
||||
#if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
# echo "stage 0: Prepare data for training and inference"
|
||||
# echo "The detail can be found in https://github.com/hitachi-speech/EEND"
|
||||
# . ./local/
|
||||
#fi
|
||||
#
|
||||
|
||||
# Training on simulated two-speaker data
|
||||
world_size=$gpu_num
|
||||
simu_2spkr_ave_id=avg${simu_average_2spkr_start}-${simu_average_2spkr_end}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user