From da830cb825edfe55a5a40eddfdd2a5fb8d8e8247 Mon Sep 17 00:00:00 2001 From: lzr265946 Date: Fri, 24 Mar 2023 10:52:59 +0800 Subject: [PATCH] fix uniasr postprocess --- funasr/bin/asr_inference_uniasr.py | 3 ++- funasr/bin/asr_inference_uniasr_vad.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/funasr/bin/asr_inference_uniasr.py b/funasr/bin/asr_inference_uniasr.py index ac71538a6..7961d5af3 100644 --- a/funasr/bin/asr_inference_uniasr.py +++ b/funasr/bin/asr_inference_uniasr.py @@ -261,6 +261,7 @@ class Speech2Text: # Change integer-ids to tokens token = self.converter.ids2tokens(token_int) + token = list(filter(lambda x: x != "", token)) if self.tokenizer is not None: text = self.tokenizer.tokens2text(token) @@ -512,7 +513,7 @@ def inference_modelscope( finish_count += 1 asr_utils.print_progress(finish_count / file_count) if writer is not None: - ibest_writer["text"][key] = text + ibest_writer["text"][key] = text_postprocessed return asr_result_list return _forward diff --git a/funasr/bin/asr_inference_uniasr_vad.py b/funasr/bin/asr_inference_uniasr_vad.py index 7cb889b7d..3164d0d24 100644 --- a/funasr/bin/asr_inference_uniasr_vad.py +++ b/funasr/bin/asr_inference_uniasr_vad.py @@ -261,6 +261,7 @@ class Speech2Text: # Change integer-ids to tokens token = self.converter.ids2tokens(token_int) + token = list(filter(lambda x: x != "", token)) if self.tokenizer is not None: text = self.tokenizer.tokens2text(token) @@ -512,7 +513,7 @@ def inference_modelscope( finish_count += 1 asr_utils.print_progress(finish_count / file_count) if writer is not None: - ibest_writer["text"][key] = text + ibest_writer["text"][key] = text_postprocessed return asr_result_list return _forward