mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
update
This commit is contained in:
parent
9f270ead3d
commit
e81eef957f
@ -1,8 +1,10 @@
|
||||
from kaldiio import ReadHelper
|
||||
|
||||
import argparse
|
||||
import numpy as np
|
||||
import json
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import torchaudio
|
||||
import torchaudio.compliance.kaldi as kaldi
|
||||
|
||||
|
||||
def get_parser():
|
||||
@ -11,55 +13,83 @@ def get_parser():
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dims",
|
||||
"-d",
|
||||
"--dim",
|
||||
default=80,
|
||||
type=int,
|
||||
help="feature dims",
|
||||
help="feature dimension",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ark-file",
|
||||
"-a",
|
||||
"--wav_path",
|
||||
default=False,
|
||||
required=True,
|
||||
type=str,
|
||||
help="fbank ark file",
|
||||
help="the path of wav scps",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ark-index",
|
||||
"-i",
|
||||
"--idx",
|
||||
default=1,
|
||||
required=True,
|
||||
type=int,
|
||||
help="ark index",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
"-o",
|
||||
default=False,
|
||||
required=True,
|
||||
type=str,
|
||||
help="output dir",
|
||||
help="index",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def compute_fbank(wav_file,
|
||||
num_mel_bins=80,
|
||||
frame_length=25,
|
||||
frame_shift=10,
|
||||
dither=0.0,
|
||||
resample_rate=16000,
|
||||
speed=1.0,
|
||||
window_type="hamming"):
|
||||
waveform, sample_rate = torchaudio.load(wav_file)
|
||||
if resample_rate != sample_rate:
|
||||
waveform = torchaudio.transforms.Resample(orig_freq=sample_rate,
|
||||
new_freq=resample_rate)(waveform)
|
||||
if speed != 1.0:
|
||||
waveform, _ = torchaudio.sox_effects.apply_effects_tensor(
|
||||
waveform, resample_rate,
|
||||
[['speed', str(speed)], ['rate', str(resample_rate)]]
|
||||
)
|
||||
|
||||
waveform = waveform * (1 << 15)
|
||||
mat = kaldi.fbank(waveform,
|
||||
num_mel_bins=num_mel_bins,
|
||||
frame_length=frame_length,
|
||||
frame_shift=frame_shift,
|
||||
dither=dither,
|
||||
energy_floor=0.0,
|
||||
window_type=window_type,
|
||||
sample_frequency=resample_rate)
|
||||
|
||||
return mat.numpy()
|
||||
|
||||
|
||||
def main():
|
||||
parser = get_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
ark_file = args.ark_file + "/feats." + str(args.ark_index) + ".ark"
|
||||
cmvn_file = args.output_dir + "/cmvn." + str(args.ark_index) + ".json"
|
||||
wav_scp_file = os.path.join(args.wav_path + "{}/wav.scp".format(args.idx))
|
||||
cmvn_file = os.path.join(args.wav_path + "{}/cmvn.json".format(args.idx))
|
||||
|
||||
mean_stats = np.zeros(args.dims)
|
||||
var_stats = np.zeros(args.dims)
|
||||
total_frames = 0
|
||||
|
||||
with ReadHelper('ark:{}'.format(ark_file)) as ark_reader:
|
||||
for key, mat in ark_reader:
|
||||
mean_stats += np.sum(mat, axis=0)
|
||||
var_stats += np.sum(np.square(mat), axis=0)
|
||||
total_frames += mat.shape[0]
|
||||
# with ReadHelper('ark:{}'.format(ark_file)) as ark_reader:
|
||||
# for key, mat in ark_reader:
|
||||
# mean_stats += np.sum(mat, axis=0)
|
||||
# var_stats += np.sum(np.square(mat), axis=0)
|
||||
# total_frames += mat.shape[0]
|
||||
with open(wav_scp_file) as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
_, wav_file = line.strip().split()
|
||||
fbank = compute_fbank(wav_file, num_mel_bins=args.dims)
|
||||
mean_stats += np.sum(fbank, axis=0)
|
||||
var_stats += np.sum(np.square(fbank), axis=0)
|
||||
total_frames += fbank.shape[0]
|
||||
|
||||
cmvn_info = {
|
||||
'mean_stats': list(mean_stats.tolist()),
|
||||
|
||||
@ -23,9 +23,8 @@ utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1;
|
||||
output_dir=${fbankdir}/cmvn
|
||||
logdir=${fbankdir}/cmvn/log
|
||||
$cmd JOB=1:$nj $logdir/cmvn.JOB.log \
|
||||
python utils/compute_cmvn.py -d ${feats_dim} -a $fbankdir/ark -i JOB -o ${output_dir} \
|
||||
|| exit 1;
|
||||
python utils/compute_cmvn.py -dim ${feats_dim} -wav_path $split_dir -idx JOB
|
||||
|
||||
python utils/combine_cmvn_file.py -d ${feats_dim} -c ${output_dir} -n $nj -o $fbankdir
|
||||
|
||||
echo "$0: Succeeded compute global cmvn"
|
||||
#python utils/combine_cmvn_file.py -d ${feats_dim} -c ${output_dir} -n $nj -o $fbankdir
|
||||
#
|
||||
#echo "$0: Succeeded compute global cmvn"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user