diff --git a/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml b/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml index 9842fa4f1..a9658b8eb 100644 --- a/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml +++ b/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml @@ -90,7 +90,7 @@ specaug_conf: dataset_conf: data_names: speech,text - data_types: sound,text + data_types: sound,text_nospace shuffle: True shuffle_conf: shuffle_size: 2048 diff --git a/funasr/datasets/large_datasets/dataset.py b/funasr/datasets/large_datasets/dataset.py index 5df61fdb0..68b63e137 100644 --- a/funasr/datasets/large_datasets/dataset.py +++ b/funasr/datasets/large_datasets/dataset.py @@ -148,6 +148,12 @@ class AudioDataset(IterableDataset): if "key" not in sample_dict: sample_dict["key"] = segs[0] sample_dict['hw_tag'] = 1 + elif data_type == "text_nospace": + text = item + segs = text.strip().split(maxsplit=1) + sample_dict[data_name] = [x for x in segs[1]] + if "key" not in sample_dict: + sample_dict["key"] = segs[0] else: text = item segs = text.strip().split()