mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
* sensevoice finetune * sensevoice finetune * sensevoice finetune * sensevoice finetune * sensevoice finetune * sensevoice finetune * sensevoice finetune * sensevoice finetune * sensevoice finetune * sensevoice finetune * bugfix * update with main (#1631) * update seaco finetune * v1.0.24 --------- Co-authored-by: 维石 <shixian.shi@alibaba-inc.com> * sensevoice * sensevoice * sensevoice * update with main (#1638) * update seaco finetune * v1.0.24 * update rwkv template --------- Co-authored-by: 维石 <shixian.shi@alibaba-inc.com> * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sensevoice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * sense voice * whisper * whisper * update style * update style --------- Co-authored-by: 维石 <shixian.shi@alibaba-inc.com>
46 lines
1.4 KiB
Python
46 lines
1.4 KiB
Python
from funasr.register import tables
|
|
|
|
|
|
@tables.register("tokenizer_classes", "WhisperTokenizer")
|
|
def WhisperTokenizer(**kwargs):
|
|
try:
|
|
from whisper.tokenizer import get_tokenizer
|
|
except:
|
|
print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
|
|
|
|
language = kwargs.get("language", None)
|
|
task = kwargs.get("task", "transcribe")
|
|
is_multilingual = kwargs.get("is_multilingual", True)
|
|
num_languages = kwargs.get("num_languages", 99)
|
|
tokenizer = get_tokenizer(
|
|
multilingual=is_multilingual,
|
|
num_languages=num_languages,
|
|
language=language,
|
|
task=task,
|
|
)
|
|
|
|
return tokenizer
|
|
|
|
|
|
@tables.register("tokenizer_classes", "SenseVoiceTokenizer")
|
|
def SenseVoiceTokenizer(**kwargs):
|
|
try:
|
|
from funasr.models.sense_voice.whisper_lib.tokenizer import get_tokenizer
|
|
except:
|
|
print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
|
|
|
|
language = kwargs.get("language", None)
|
|
task = kwargs.get("task", None)
|
|
is_multilingual = kwargs.get("is_multilingual", True)
|
|
num_languages = kwargs.get("num_languages", 8749)
|
|
vocab_path = kwargs.get("vocab_path", None)
|
|
tokenizer = get_tokenizer(
|
|
multilingual=is_multilingual,
|
|
num_languages=num_languages,
|
|
language=language,
|
|
task=task,
|
|
vocab_path=vocab_path,
|
|
)
|
|
|
|
return tokenizer
|