update streaming paraformer text process

2025-09-15 14:48:36 +08:00 · 2023-05-05 11:36:26 +08:00 · 2023-05-05 11:36:26 +08:00 · 7987a7b6fa
commit 7987a7b6fa
parent 297fafd674
1 changed files with 2 additions and 15 deletions
--- a/funasr/bin/asr_inference_paraformer_streaming.py
+++ b/funasr/bin/asr_inference_paraformer_streaming.py
@ -301,10 +301,6 @@ class Speech2Text:
                token = self.converter.ids2tokens(token_int)
                token = " ".join(token)

-                #if self.tokenizer is not None:
-                #    text = self.tokenizer.tokens2text(token)
-                #else:
-                #    text = None
                results.append(token)

        # assert check_return_type(results)
@ -556,8 +552,8 @@ def inference_modelscope(
                input_lens = torch.tensor([stride_size])
                asr_result = speech2text(cache, raw_inputs[:, sample_offset: sample_offset + stride_size], input_lens)
                if len(asr_result) != 0: 
-                    final_result += asr_result[0]
-            item = {'key': "utt", 'value': [final_result]}
+                    final_result += " ".join(asr_result) + " "
+            item = {'key': "utt", 'value': [final_result.strip()]}
        else:
            input_lens = torch.tensor([raw_inputs.shape[1]])
            cache["encoder"]["is_final"] = is_final
@ -751,12 +747,3 @@ def main(cmd=None):
 if __name__ == "__main__":
    main()

-    # from modelscope.pipelines import pipeline
-    # from modelscope.utils.constant import Tasks
-    #
-    # inference_16k_pipline = pipeline(
-    #     task=Tasks.auto_speech_recognition,
-    #     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch')
-    #
-    # rec_result = inference_16k_pipline(audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
-    # print(rec_result)