mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Add sentence timestamp support
Added support for statement event timestamp, which is particularly useful for applications such as lyrics and subtitles.
This commit is contained in:
parent
837dc3758a
commit
bea5d98423
@ -47,6 +47,8 @@ from funasr.utils.timestamp_tools import time_stamp_lfr6_pl
|
||||
from funasr.bin.punctuation_infer import Text2Punc
|
||||
from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer
|
||||
|
||||
from FunASR.funasr.utils.timestamp_tools import time_stamp_sentence
|
||||
|
||||
header_colors = '\033[95m'
|
||||
end_colors = '\033[0m'
|
||||
|
||||
@ -720,6 +722,7 @@ def inference_modelscope(
|
||||
text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
|
||||
|
||||
text_postprocessed_punc = text_postprocessed
|
||||
punc_id_list = []
|
||||
if len(word_lists) > 0 and text2punc is not None:
|
||||
text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
|
||||
|
||||
@ -729,6 +732,8 @@ def inference_modelscope(
|
||||
if time_stamp_postprocessed != "":
|
||||
item['time_stamp'] = time_stamp_postprocessed
|
||||
|
||||
item['sentences'] = time_stamp_sentence(punc_id_list, time_stamp_postprocessed, text_postprocessed)
|
||||
|
||||
asr_result_list.append(item)
|
||||
finish_count += 1
|
||||
# asr_utils.print_progress(finish_count / file_count)
|
||||
|
||||
@ -54,3 +54,55 @@ def time_stamp_lfr6_pl(us_alphas, us_cif_peak, char_list, begin_time=0.0, end_ti
|
||||
res.append([int(timestamp[0] * 1000), int(timestamp[1] * 1000)])
|
||||
return res
|
||||
|
||||
def time_stamp_sentence(punc_id_list, time_stamp_postprocessed, text_postprocessed):
|
||||
res = []
|
||||
if text_postprocessed is None:
|
||||
return res
|
||||
if time_stamp_postprocessed is None:
|
||||
return res
|
||||
if len(time_stamp_postprocessed) == 0:
|
||||
return res
|
||||
if len(text_postprocessed) == 0:
|
||||
return res
|
||||
if punc_id_list is None or len(punc_id_list) == 0:
|
||||
res.append({
|
||||
'text': text_postprocessed.split(),
|
||||
"start": time_stamp_postprocessed[0][0],
|
||||
"end": time_stamp_postprocessed[-1][1]
|
||||
})
|
||||
return res
|
||||
if len(punc_id_list) != len(time_stamp_postprocessed):
|
||||
res.append({
|
||||
'text': text_postprocessed.split(),
|
||||
"start": time_stamp_postprocessed[0][0],
|
||||
"end": time_stamp_postprocessed[-1][1]
|
||||
})
|
||||
return res
|
||||
|
||||
sentence_text = ''
|
||||
sentence_start = time_stamp_postprocessed[0][0]
|
||||
texts = text_postprocessed.split()
|
||||
for i in range(len(punc_id_list)):
|
||||
sentence_text += texts[i]
|
||||
if punc_id_list[i] == 2:
|
||||
sentence_text += ','
|
||||
res.append({
|
||||
'text': sentence_text,
|
||||
"start": sentence_start,
|
||||
"end": time_stamp_postprocessed[i][1]
|
||||
})
|
||||
sentence_text = ''
|
||||
sentence_start = time_stamp_postprocessed[i][1]
|
||||
elif punc_id_list[i] == 3:
|
||||
sentence_text += '.'
|
||||
res.append({
|
||||
'text': sentence_text,
|
||||
"start": sentence_start,
|
||||
"end": time_stamp_postprocessed[i][1]
|
||||
})
|
||||
sentence_text = ''
|
||||
sentence_start = time_stamp_postprocessed[i][1]
|
||||
return res
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user