diff --git a/funasr/datasets/audio_datasets/index_ds.py b/funasr/datasets/audio_datasets/index_ds.py index 06bd4de16..2677d3348 100644 --- a/funasr/datasets/audio_datasets/index_ds.py +++ b/funasr/datasets/audio_datasets/index_ds.py @@ -34,7 +34,7 @@ class IndexDSJsonlRankFull(torch.utils.data.Dataset): with open(path, encoding='utf-8') as fin: file_list_all = fin.readlines() - num_per_slice = len(file_list_all) // data_split_num + num_per_slice = (len(file_list_all)-1) // data_split_num + 1 file_list = file_list_all[data_split_i * num_per_slice:(data_split_i + 1) * num_per_slice] logging.info( f"is_training: {is_training}, data_split_num: {data_split_num}, data_split_i: {data_split_i}, \nfile_list: {file_list}, \nfile_list_all: {file_list_all}")