From 7aa2e885f41829e5148ed3be44d3ebb43e04ff40 Mon Sep 17 00:00:00 2001 From: lzr265946 Date: Fri, 10 Feb 2023 13:46:01 +0800 Subject: [PATCH] support for turning off timestamps --- funasr/bin/asr_inference.py | 2 +- funasr/bin/asr_inference_paraformer.py | 8 +++++-- .../bin/asr_inference_paraformer_timestamp.py | 2 +- funasr/bin/asr_inference_paraformer_vad.py | 19 ++++++++++++----- .../bin/asr_inference_paraformer_vad_punc.py | 21 ++++++++++++++----- funasr/bin/asr_inference_uniasr.py | 2 +- funasr/bin/asr_inference_uniasr_vad.py | 2 +- funasr/utils/postprocess_utils.py | 6 +++++- 8 files changed, 45 insertions(+), 17 deletions(-) diff --git a/funasr/bin/asr_inference.py b/funasr/bin/asr_inference.py index 16fa3e51c..ca8f2bced 100644 --- a/funasr/bin/asr_inference.py +++ b/funasr/bin/asr_inference.py @@ -453,7 +453,7 @@ def inference_modelscope( ibest_writer["score"][key] = str(hyp.score) if text is not None: - text_postprocessed = postprocess_utils.sentence_postprocess(token) + text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) item = {'key': key, 'value': text_postprocessed} asr_result_list.append(item) finish_count += 1 diff --git a/funasr/bin/asr_inference_paraformer.py b/funasr/bin/asr_inference_paraformer.py index 709c5bfb4..6c5acfc00 100644 --- a/funasr/bin/asr_inference_paraformer.py +++ b/funasr/bin/asr_inference_paraformer.py @@ -428,7 +428,11 @@ def inference_modelscope( format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) - hotword_list_or_file = param_dict['hotword'] + if param_dict is not None: + hotword_list_or_file = param_dict.get('hotword') + else: + hotword_list_or_file = None + if ngpu >= 1 and torch.cuda.is_available(): device = "cuda" else: @@ -539,7 +543,7 @@ def inference_modelscope( ibest_writer["rtf"][key] = rtf_cur if text is not None: - text_postprocessed = postprocess_utils.sentence_postprocess(token) + text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) item = {'key': key, 'value': text_postprocessed} asr_result_list.append(item) finish_count += 1 diff --git a/funasr/bin/asr_inference_paraformer_timestamp.py b/funasr/bin/asr_inference_paraformer_timestamp.py index 7e2e41456..7da48e2b8 100644 --- a/funasr/bin/asr_inference_paraformer_timestamp.py +++ b/funasr/bin/asr_inference_paraformer_timestamp.py @@ -436,7 +436,7 @@ def inference( ibest_writer["score"][key] = str(hyp.score) if text is not None: - text_postprocessed = postprocess_utils.sentence_postprocess(token) + text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) item = {'key': key, 'value': text_postprocessed} asr_result_list.append(item) finish_count += 1 diff --git a/funasr/bin/asr_inference_paraformer_vad.py b/funasr/bin/asr_inference_paraformer_vad.py index 2832504c0..dbb271986 100644 --- a/funasr/bin/asr_inference_paraformer_vad.py +++ b/funasr/bin/asr_inference_paraformer_vad.py @@ -241,6 +241,11 @@ def inference_modelscope( allow_variable_data_keys=allow_variable_data_keys, inference=True, ) + + if param_dict is not None: + use_timestamp = param_dict.get('use_timestamp', True) + else: + use_timestamp = True finish_count = 0 file_count = 1 @@ -284,8 +289,10 @@ def inference_modelscope( text, token, token_int = result[0], result[1], result[2] time_stamp = None if len(result) < 4 else result[3] - - postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp) + if use_timestamp and time_stamp is not None: + postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp) + else: + postprocessed_result = postprocess_utils.sentence_postprocess(token) text_postprocessed = "" time_stamp_postprocessed = "" text_postprocessed_punc = postprocessed_result @@ -293,9 +300,11 @@ def inference_modelscope( text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \ postprocessed_result[1], \ postprocessed_result[2] - text_postprocessed_punc = text_postprocessed - if len(word_lists) > 0 and text2punc is not None: - text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) + else: + text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1] + text_postprocessed_punc = text_postprocessed + if len(word_lists) > 0 and text2punc is not None: + text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) item = {'key': key, 'value': text_postprocessed_punc} diff --git a/funasr/bin/asr_inference_paraformer_vad_punc.py b/funasr/bin/asr_inference_paraformer_vad_punc.py index 7d18e0218..c4bb61bd1 100644 --- a/funasr/bin/asr_inference_paraformer_vad_punc.py +++ b/funasr/bin/asr_inference_paraformer_vad_punc.py @@ -570,6 +570,11 @@ def inference_modelscope( allow_variable_data_keys=allow_variable_data_keys, inference=True, ) + + if param_dict is not None: + use_timestamp = param_dict.get('use_timestamp', True) + else: + use_timestamp = True finish_count = 0 file_count = 1 @@ -612,8 +617,11 @@ def inference_modelscope( result = result_segments[0] text, token, token_int = result[0], result[1], result[2] time_stamp = None if len(result) < 4 else result[3] - - postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp) + + if use_timestamp and time_stamp is not None: + postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp) + else: + postprocessed_result = postprocess_utils.sentence_postprocess(token) text_postprocessed = "" time_stamp_postprocessed = "" text_postprocessed_punc = postprocessed_result @@ -621,9 +629,12 @@ def inference_modelscope( text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \ postprocessed_result[1], \ postprocessed_result[2] - text_postprocessed_punc = text_postprocessed - if len(word_lists) > 0 and text2punc is not None: - text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) + else: + text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1] + + text_postprocessed_punc = text_postprocessed + if len(word_lists) > 0 and text2punc is not None: + text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) item = {'key': key, 'value': text_postprocessed_punc} if text_postprocessed != "": diff --git a/funasr/bin/asr_inference_uniasr.py b/funasr/bin/asr_inference_uniasr.py index cfec9a00c..0a5824c5c 100644 --- a/funasr/bin/asr_inference_uniasr.py +++ b/funasr/bin/asr_inference_uniasr.py @@ -492,7 +492,7 @@ def inference_modelscope( ibest_writer["score"][key] = str(hyp.score) if text is not None: - text_postprocessed = postprocess_utils.sentence_postprocess(token) + text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) item = {'key': key, 'value': text_postprocessed} asr_result_list.append(item) finish_count += 1 diff --git a/funasr/bin/asr_inference_uniasr_vad.py b/funasr/bin/asr_inference_uniasr_vad.py index cfec9a00c..0a5824c5c 100644 --- a/funasr/bin/asr_inference_uniasr_vad.py +++ b/funasr/bin/asr_inference_uniasr_vad.py @@ -492,7 +492,7 @@ def inference_modelscope( ibest_writer["score"][key] = str(hyp.score) if text is not None: - text_postprocessed = postprocess_utils.sentence_postprocess(token) + text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) item = {'key': key, 'value': text_postprocessed} asr_result_list.append(item) finish_count += 1 diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py index 4da0d5963..575fb90dd 100644 --- a/funasr/utils/postprocess_utils.py +++ b/funasr/utils/postprocess_utils.py @@ -232,5 +232,9 @@ def sentence_postprocess(words: List[Any], time_stamp: List[List] = None): return sentence, ts_lists, real_word_lists else: word_lists = abbr_dispose(word_lists) + real_word_lists = [] + for ch in word_lists: + if ch != ' ': + real_word_lists.append(ch) sentence = ''.join(word_lists).strip() - return sentence + return sentence, real_word_lists