From 374998bd363eb789f592281442b0ecc601261a80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Tue, 16 Jul 2024 14:30:16 +0800 Subject: [PATCH] sensevoice --- examples/README.md | 28 ++++++++++++++++++++++++++++ examples/README_zh.md | 28 ++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/examples/README.md b/examples/README.md index 0191a2d8a..fe9a0ed93 100644 --- a/examples/README.md +++ b/examples/README.md @@ -69,6 +69,34 @@ res = model.generate(input=[str], output_dir=[str]) #### Speech Recognition (Non-streaming) +##### SenseVoice +```python +from funasr import AutoModel +from funasr.utils.postprocess_utils import rich_transcription_postprocess + +model_dir = "iic/SenseVoiceSmall" + +model = AutoModel( + model=model_dir, + vad_model="fsmn-vad", + vad_kwargs={"max_single_segment_time": 30000}, + device="cuda:0", +) + +# en +res = model.generate( + input=f"{model.model_path}/example/en.mp3", + cache={}, + language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech" + use_itn=True, + batch_size_s=60, + merge_vad=True, # + merge_length_s=15, +) +text = rich_transcription_postprocess(res[0]["text"]) +print(text) +``` +##### Paraformer ```python from funasr import AutoModel # paraformer-zh is a multi-functional asr model diff --git a/examples/README_zh.md b/examples/README_zh.md index b0a666512..f95ee64df 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -70,6 +70,34 @@ res = model.generate(input=[str], output_dir=[str]) #### 非实时语音识别 +##### SenseVoice +```python +from funasr import AutoModel +from funasr.utils.postprocess_utils import rich_transcription_postprocess + +model_dir = "iic/SenseVoiceSmall" + +model = AutoModel( + model=model_dir, + vad_model="fsmn-vad", + vad_kwargs={"max_single_segment_time": 30000}, + device="cuda:0", +) + +# en +res = model.generate( + input=f"{model.model_path}/example/en.mp3", + cache={}, + language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech" + use_itn=True, + batch_size_s=60, + merge_vad=True, # + merge_length_s=15, +) +text = rich_transcription_postprocess(res[0]["text"]) +print(text) +``` +##### Paraformer ```python from funasr import AutoModel # paraformer-zh is a multi-functional asr model