diff --git a/funasr/datasets/iterable_dataset.py b/funasr/datasets/iterable_dataset.py index 44366183f..bd8da7a57 100644 --- a/funasr/datasets/iterable_dataset.py +++ b/funasr/datasets/iterable_dataset.py @@ -244,10 +244,14 @@ class IterableESPnetDataset(IterableDataset): array = torchaudio.transforms.Resample(orig_freq=audio_fs, new_freq=model_fs)(array) array = array.numpy() - if self.mc: - data[name] = array.transpose(0, 1) + + if _type == "sound": + if self.mc: + data[name] = array.transpose(0, 1) + else: + data[name] = array[0] else: - data[name] = array[0] + data[name] = array if self.preprocess is not None: data = self.preprocess(uid, data)