support for turning off timestamps

2025-09-15 14:48:36 +08:00 · 2023-02-10 13:46:01 +08:00 · 2023-02-10 13:46:01 +08:00 · 7aa2e885f4
commit 7aa2e885f4
parent bd8f163ee6
8 changed files with 45 additions and 17 deletions
--- a/funasr/bin/asr_inference.py
+++ b/funasr/bin/asr_inference.py
@ -453,7 +453,7 @@ def inference_modelscope(
                    ibest_writer["score"][key] = str(hyp.score)
                if text is not None:
-                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
+                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1
--- a/funasr/bin/asr_inference_paraformer.py
+++ b/funasr/bin/asr_inference_paraformer.py
@ -428,7 +428,11 @@ def inference_modelscope(
        format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
    )
-    hotword_list_or_file = param_dict['hotword']
+    if param_dict is not None:
        hotword_list_or_file = param_dict.get('hotword')
    else:
        hotword_list_or_file = None
    if ngpu >= 1 and torch.cuda.is_available():
        device = "cuda"
    else:
@ -539,7 +543,7 @@ def inference_modelscope(
                        ibest_writer["rtf"][key] = rtf_cur
                    if text is not None:
-                        text_postprocessed = postprocess_utils.sentence_postprocess(token)
+                        text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                        item = {'key': key, 'value': text_postprocessed}
                        asr_result_list.append(item)
                        finish_count += 1
--- a/funasr/bin/asr_inference_paraformer_timestamp.py
+++ b/funasr/bin/asr_inference_paraformer_timestamp.py
@ -436,7 +436,7 @@ def inference(
                    ibest_writer["score"][key] = str(hyp.score)
                if text is not None:
-                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
+                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1
--- a/funasr/bin/asr_inference_paraformer_vad.py
+++ b/funasr/bin/asr_inference_paraformer_vad.py
@ -242,6 +242,11 @@ def inference_modelscope(
            inference=True,
        )
        if param_dict is not None:
            use_timestamp = param_dict.get('use_timestamp', True)
        else:
            use_timestamp = True
        finish_count = 0
        file_count = 1
        lfr_factor = 6
@ -284,8 +289,10 @@ def inference_modelscope(
                text, token, token_int = result[0], result[1], result[2]
                time_stamp = None if len(result) < 4 else result[3]
-                
+                if use_timestamp and time_stamp is not None:
-                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+                    postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
                else:
                    postprocessed_result = postprocess_utils.sentence_postprocess(token)
                text_postprocessed = ""
                time_stamp_postprocessed = ""
                text_postprocessed_punc = postprocessed_result
@ -293,9 +300,11 @@ def inference_modelscope(
                    text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
                                                                               postprocessed_result[1], \
                                                                               postprocessed_result[2]
-                    text_postprocessed_punc = text_postprocessed
+                else:
-                    if len(word_lists) > 0 and text2punc is not None:
+                    text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
-                        text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
+                text_postprocessed_punc = text_postprocessed
                if len(word_lists) > 0 and text2punc is not None:
                    text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
                item = {'key': key, 'value': text_postprocessed_punc}
--- a/funasr/bin/asr_inference_paraformer_vad_punc.py
+++ b/funasr/bin/asr_inference_paraformer_vad_punc.py
@ -571,6 +571,11 @@ def inference_modelscope(
            inference=True,
        )
        if param_dict is not None:
            use_timestamp = param_dict.get('use_timestamp', True)
        else:
            use_timestamp = True
        finish_count = 0
        file_count = 1
        lfr_factor = 6
@ -613,7 +618,10 @@ def inference_modelscope(
                text, token, token_int = result[0], result[1], result[2]
                time_stamp = None if len(result) < 4 else result[3]
-                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+                if use_timestamp and time_stamp is not None: 
                    postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
                else:
                    postprocessed_result = postprocess_utils.sentence_postprocess(token)
                text_postprocessed = ""
                time_stamp_postprocessed = ""
                text_postprocessed_punc = postprocessed_result
@ -621,9 +629,12 @@ def inference_modelscope(
                    text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
                                                                               postprocessed_result[1], \
                                                                               postprocessed_result[2]
-                    text_postprocessed_punc = text_postprocessed
+                else:
-                    if len(word_lists) > 0 and text2punc is not None:
+                    text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
-                        text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
+
                text_postprocessed_punc = text_postprocessed
                if len(word_lists) > 0 and text2punc is not None:
                    text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
                item = {'key': key, 'value': text_postprocessed_punc}
                if text_postprocessed != "":
--- a/funasr/bin/asr_inference_uniasr.py
+++ b/funasr/bin/asr_inference_uniasr.py
@ -492,7 +492,7 @@ def inference_modelscope(
                    ibest_writer["score"][key] = str(hyp.score)
                if text is not None:
-                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
+                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1
--- a/funasr/bin/asr_inference_uniasr_vad.py
+++ b/funasr/bin/asr_inference_uniasr_vad.py
@ -492,7 +492,7 @@ def inference_modelscope(
                    ibest_writer["score"][key] = str(hyp.score)
                if text is not None:
-                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
+                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@ -232,5 +232,9 @@ def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
        return sentence, ts_lists, real_word_lists
    else:
        word_lists = abbr_dispose(word_lists)
        real_word_lists = []
        for ch in word_lists:
            if ch != ' ':
                real_word_lists.append(ch)
        sentence = ''.join(word_lists).strip()
-        return sentence
+        return sentence, real_word_lists