mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
update repo
This commit is contained in:
parent
4c87b59319
commit
57b47cb826
@ -34,14 +34,14 @@ fi
|
||||
# validate utt-key list, IC0803W0380 is a bad utterance
|
||||
awk '{print $1}' $corpus/wav.scp | grep -v 'IC0803W0380' > $tmp/wav_utt.list
|
||||
awk '{print $1}' $corpus/trans.txt > $tmp/trans_utt.list
|
||||
tools/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
|
||||
utils/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
|
||||
|
||||
# wav.scp
|
||||
awk -F'\t' -v path_prefix=$corpus '{printf("%s\t%s/%s\n",$1,path_prefix,$2)}' $corpus/wav.scp > $tmp/tmp_wav.scp
|
||||
tools/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
|
||||
utils/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
|
||||
|
||||
# text
|
||||
tools/filter_scp.pl -f 1 $tmp/utt.list $corpus/trans.txt | sort -k 1 | uniq > $tmp/text
|
||||
utils/filter_scp.pl -f 1 $tmp/utt.list $corpus/trans.txt | sort -k 1 | uniq > $tmp/text
|
||||
|
||||
# copy prepared resources from tmp_dir to target dir
|
||||
mkdir -p $dir
|
||||
|
||||
Loading…
Reference in New Issue
Block a user