mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Merge branch 'main' of github.com:alibaba-damo-academy/FunASR
add
This commit is contained in:
commit
7f46849cad
@ -174,90 +174,94 @@ class IterableESPnetDataset(IterableDataset):
|
||||
def __iter__(self) -> Iterator[Tuple[Union[str, int], Dict[str, np.ndarray]]]:
|
||||
count = 0
|
||||
if len(self.path_name_type_list) != 0 and (self.path_name_type_list[0][2] == "bytes" or self.path_name_type_list[0][2] == "waveform"):
|
||||
linenum = len(self.path_name_type_list)
|
||||
data = {}
|
||||
value = self.path_name_type_list[0][0]
|
||||
uid = 'utt_id'
|
||||
name = self.path_name_type_list[0][1]
|
||||
_type = self.path_name_type_list[0][2]
|
||||
func = DATA_TYPES[_type]
|
||||
array = func(value)
|
||||
if self.fs is not None and name == "speech":
|
||||
audio_fs = self.fs["audio_fs"]
|
||||
model_fs = self.fs["model_fs"]
|
||||
if audio_fs is not None and model_fs is not None:
|
||||
array = torch.from_numpy(array)
|
||||
array = array.unsqueeze(0)
|
||||
array = torchaudio.transforms.Resample(orig_freq=audio_fs,
|
||||
new_freq=model_fs)(array)
|
||||
array = array.squeeze(0).numpy()
|
||||
data[name] = array
|
||||
for i in range(linenum):
|
||||
value = self.path_name_type_list[i][0]
|
||||
uid = 'utt_id'
|
||||
name = self.path_name_type_list[i][1]
|
||||
_type = self.path_name_type_list[i][2]
|
||||
func = DATA_TYPES[_type]
|
||||
array = func(value)
|
||||
if self.fs is not None and (name == "speech" or name == "ref_speech"):
|
||||
audio_fs = self.fs["audio_fs"]
|
||||
model_fs = self.fs["model_fs"]
|
||||
if audio_fs is not None and model_fs is not None:
|
||||
array = torch.from_numpy(array)
|
||||
array = array.unsqueeze(0)
|
||||
array = torchaudio.transforms.Resample(orig_freq=audio_fs,
|
||||
new_freq=model_fs)(array)
|
||||
array = array.squeeze(0).numpy()
|
||||
data[name] = array
|
||||
|
||||
if self.preprocess is not None:
|
||||
data = self.preprocess(uid, data)
|
||||
for name in data:
|
||||
count += 1
|
||||
value = data[name]
|
||||
if not isinstance(value, np.ndarray):
|
||||
raise RuntimeError(
|
||||
f'All values must be converted to np.ndarray object '
|
||||
f'by preprocessing, but "{name}" is still {type(value)}.')
|
||||
# Cast to desired type
|
||||
if value.dtype.kind == 'f':
|
||||
value = value.astype(self.float_dtype)
|
||||
elif value.dtype.kind == 'i':
|
||||
value = value.astype(self.int_dtype)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f'Not supported dtype: {value.dtype}')
|
||||
data[name] = value
|
||||
if self.preprocess is not None:
|
||||
data = self.preprocess(uid, data)
|
||||
for name in data:
|
||||
count += 1
|
||||
value = data[name]
|
||||
if not isinstance(value, np.ndarray):
|
||||
raise RuntimeError(
|
||||
f'All values must be converted to np.ndarray object '
|
||||
f'by preprocessing, but "{name}" is still {type(value)}.')
|
||||
# Cast to desired type
|
||||
if value.dtype.kind == 'f':
|
||||
value = value.astype(self.float_dtype)
|
||||
elif value.dtype.kind == 'i':
|
||||
value = value.astype(self.int_dtype)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f'Not supported dtype: {value.dtype}')
|
||||
data[name] = value
|
||||
|
||||
yield uid, data
|
||||
|
||||
elif len(self.path_name_type_list) != 0 and self.path_name_type_list[0][2] == "sound" and not self.path_name_type_list[0][0].lower().endswith(".scp"):
|
||||
linenum = len(self.path_name_type_list)
|
||||
data = {}
|
||||
value = self.path_name_type_list[0][0]
|
||||
uid = os.path.basename(self.path_name_type_list[0][0]).split(".")[0]
|
||||
name = self.path_name_type_list[0][1]
|
||||
_type = self.path_name_type_list[0][2]
|
||||
if _type == "sound":
|
||||
audio_type = os.path.basename(value).split(".")[1].lower()
|
||||
if audio_type not in SUPPORT_AUDIO_TYPE_SETS:
|
||||
raise NotImplementedError(
|
||||
f'Not supported audio type: {audio_type}')
|
||||
if audio_type == "pcm":
|
||||
_type = "pcm"
|
||||
for i in range(linenum):
|
||||
value = self.path_name_type_list[i][0]
|
||||
uid = os.path.basename(self.path_name_type_list[i][0]).split(".")[0]
|
||||
name = self.path_name_type_list[i][1]
|
||||
_type = self.path_name_type_list[i][2]
|
||||
if _type == "sound":
|
||||
audio_type = os.path.basename(value).split(".")[1].lower()
|
||||
if audio_type not in SUPPORT_AUDIO_TYPE_SETS:
|
||||
raise NotImplementedError(
|
||||
f'Not supported audio type: {audio_type}')
|
||||
if audio_type == "pcm":
|
||||
_type = "pcm"
|
||||
|
||||
func = DATA_TYPES[_type]
|
||||
array = func(value)
|
||||
if self.fs is not None and name == "speech":
|
||||
audio_fs = self.fs["audio_fs"]
|
||||
model_fs = self.fs["model_fs"]
|
||||
if audio_fs is not None and model_fs is not None:
|
||||
array = torch.from_numpy(array)
|
||||
array = array.unsqueeze(0)
|
||||
array = torchaudio.transforms.Resample(orig_freq=audio_fs,
|
||||
new_freq=model_fs)(array)
|
||||
array = array.squeeze(0).numpy()
|
||||
data[name] = array
|
||||
func = DATA_TYPES[_type]
|
||||
array = func(value)
|
||||
if self.fs is not None and (name == "speech" or name == "ref_speech"):
|
||||
audio_fs = self.fs["audio_fs"]
|
||||
model_fs = self.fs["model_fs"]
|
||||
if audio_fs is not None and model_fs is not None:
|
||||
array = torch.from_numpy(array)
|
||||
array = array.unsqueeze(0)
|
||||
array = torchaudio.transforms.Resample(orig_freq=audio_fs,
|
||||
new_freq=model_fs)(array)
|
||||
array = array.squeeze(0).numpy()
|
||||
data[name] = array
|
||||
|
||||
if self.preprocess is not None:
|
||||
data = self.preprocess(uid, data)
|
||||
for name in data:
|
||||
count += 1
|
||||
value = data[name]
|
||||
if not isinstance(value, np.ndarray):
|
||||
raise RuntimeError(
|
||||
f'All values must be converted to np.ndarray object '
|
||||
f'by preprocessing, but "{name}" is still {type(value)}.')
|
||||
# Cast to desired type
|
||||
if value.dtype.kind == 'f':
|
||||
value = value.astype(self.float_dtype)
|
||||
elif value.dtype.kind == 'i':
|
||||
value = value.astype(self.int_dtype)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f'Not supported dtype: {value.dtype}')
|
||||
data[name] = value
|
||||
if self.preprocess is not None:
|
||||
data = self.preprocess(uid, data)
|
||||
for name in data:
|
||||
count += 1
|
||||
value = data[name]
|
||||
if not isinstance(value, np.ndarray):
|
||||
raise RuntimeError(
|
||||
f'All values must be converted to np.ndarray object '
|
||||
f'by preprocessing, but "{name}" is still {type(value)}.')
|
||||
# Cast to desired type
|
||||
if value.dtype.kind == 'f':
|
||||
value = value.astype(self.float_dtype)
|
||||
elif value.dtype.kind == 'i':
|
||||
value = value.astype(self.int_dtype)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f'Not supported dtype: {value.dtype}')
|
||||
data[name] = value
|
||||
|
||||
yield uid, data
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user