diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py index c4a078931..10aa29f7d 100644 --- a/funasr/utils/postprocess_utils.py +++ b/funasr/utils/postprocess_utils.py @@ -125,9 +125,11 @@ def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]: if time_stamp is not None: end = time_stamp[ts_nums[num]][1] ts_lists.append([begin, end]) - else: - word_lists.append(words[num]) - if time_stamp is not None and words[num] != " ": + else: + # length of time_stamp may not equal to length of words because of the (somehow improper) threshold set in timestamp_tools.py line 46, e.g., length of time_stamp can be zero but length of words is not. + # Moreover, move "word_lists.append(words[num])" into if clause, to keep length of word_lists and length of ts_lists equal. + if time_stamp is not None and ts_nums[num]") else: - timestamp_list[-1][1] = num_frames * TIME_RATE + if len(timestamp_list)>0: + timestamp_list[-1][1] = num_frames * TIME_RATE if vad_offset: # add offset time in model with vad for i in range(len(timestamp_list)): timestamp_list[i][0] = timestamp_list[i][0] + vad_offset / 1000.0