This commit is contained in:
haoneng.lhn 2023-10-11 17:19:38 +08:00
parent 3d17f80b99
commit 191b8018f2

View File

@ -108,7 +108,7 @@ class AudioDataset(IterableDataset):
ark_reader = ReadHelper('ark:{}'.format(data_file))
reader_list.append(ark_reader)
elif data_type == "text" or data_type == "sound" or data_type == 'text_hotword':
text_reader = open(data_file, "r")
text_reader = open(data_file, "r", encoding="utf-8")
reader_list.append(text_reader)
elif data_type == "none":
continue
@ -205,7 +205,7 @@ def Dataset(data_list_file,
# pre_prob = conf.get("pre_prob", 0) # unused yet
if pre_hwfile is not None:
pre_hwlist = []
with open(pre_hwfile, 'r') as fin:
with open(pre_hwfile, 'r', encoding="utf-8") as fin:
for line in fin.readlines():
pre_hwlist.append(line.strip())
else: