mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
25 lines
612 B
Python
25 lines
612 B
Python
|
|
|
|
from funasr.register import tables
|
|
|
|
@tables.register("tokenizer_classes", "WhisperTokenizer")
|
|
def WhisperTokenizer(**kwargs):
|
|
try:
|
|
from whisper.tokenizer import get_tokenizer
|
|
except:
|
|
print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
|
|
|
|
language = kwargs.get("language", None)
|
|
task = kwargs.get("task", "transcribe")
|
|
is_multilingual = kwargs.get("is_multilingual", True)
|
|
num_languages = kwargs.get("num_languages", 99)
|
|
tokenizer = get_tokenizer(
|
|
multilingual=is_multilingual,
|
|
num_languages=num_languages,
|
|
language=language,
|
|
task=task,
|
|
)
|
|
|
|
return tokenizer
|
|
|