update repo

This commit is contained in:
嘉渊 2023-05-26 11:51:07 +08:00
parent 3a15e5392b
commit 167bab54bb
2 changed files with 7 additions and 1 deletions

View File

@ -90,7 +90,7 @@ specaug_conf:
dataset_conf:
data_names: speech,text
data_types: sound,text
data_types: sound,text_nospace
shuffle: True
shuffle_conf:
shuffle_size: 2048

View File

@ -148,6 +148,12 @@ class AudioDataset(IterableDataset):
if "key" not in sample_dict:
sample_dict["key"] = segs[0]
sample_dict['hw_tag'] = 1
elif data_type == "text_nospace":
text = item
segs = text.strip().split(maxsplit=1)
sample_dict[data_name] = [x for x in segs[1]]
if "key" not in sample_dict:
sample_dict["key"] = segs[0]
else:
text = item
segs = text.strip().split()