sensevoice

This commit is contained in:
游雁 2024-07-16 14:30:16 +08:00
parent 774caaf752
commit 374998bd36
2 changed files with 56 additions and 0 deletions

View File

@ -69,6 +69,34 @@ res = model.generate(input=[str], output_dir=[str])
#### Speech Recognition (Non-streaming)
##### SenseVoice
```python
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess
model_dir = "iic/SenseVoiceSmall"
model = AutoModel(
model=model_dir,
vad_model="fsmn-vad",
vad_kwargs={"max_single_segment_time": 30000},
device="cuda:0",
)
# en
res = model.generate(
input=f"{model.model_path}/example/en.mp3",
cache={},
language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn=True,
batch_size_s=60,
merge_vad=True, #
merge_length_s=15,
)
text = rich_transcription_postprocess(res[0]["text"])
print(text)
```
##### Paraformer
```python
from funasr import AutoModel
# paraformer-zh is a multi-functional asr model

View File

@ -70,6 +70,34 @@ res = model.generate(input=[str], output_dir=[str])
#### 非实时语音识别
##### SenseVoice
```python
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess
model_dir = "iic/SenseVoiceSmall"
model = AutoModel(
model=model_dir,
vad_model="fsmn-vad",
vad_kwargs={"max_single_segment_time": 30000},
device="cuda:0",
)
# en
res = model.generate(
input=f"{model.model_path}/example/en.mp3",
cache={},
language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn=True,
batch_size_s=60,
merge_vad=True, #
merge_length_s=15,
)
text = rich_transcription_postprocess(res[0]["text"])
print(text)
```
##### Paraformer
```python
from funasr import AutoModel
# paraformer-zh is a multi-functional asr model