From b207ac3dda1259a08d42940d59081332155926d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Fri, 11 Oct 2024 14:03:30 +0800 Subject: [PATCH] whisper-large-v2-turbo --- .../whisper/demo_from_openai.py | 6 +-- funasr/download/name_maps_from_hub.py | 3 +- funasr/download/upload_model.py | 51 ++++++++++++++++++- funasr/models/whisper/model.py | 1 + 4 files changed, 55 insertions(+), 6 deletions(-) diff --git a/examples/industrial_data_pretraining/whisper/demo_from_openai.py b/examples/industrial_data_pretraining/whisper/demo_from_openai.py index 097e942e8..f76491487 100644 --- a/examples/industrial_data_pretraining/whisper/demo_from_openai.py +++ b/examples/industrial_data_pretraining/whisper/demo_from_openai.py @@ -11,9 +11,9 @@ from funasr import AutoModel # model = AutoModel(model="Whisper-medium", hub="openai") # model = AutoModel(model="Whisper-large-v2", hub="openai") model = AutoModel( - model="Whisper-large-v3", - vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch", - vad_kwargs={"max_single_segment_time": 30000}, + model="Whisper-large-v3-turbo", + # vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch", + # vad_kwargs={"max_single_segment_time": 30000}, hub="openai", ) diff --git a/funasr/download/name_maps_from_hub.py b/funasr/download/name_maps_from_hub.py index 54ec61fa3..e4d0f8cc5 100644 --- a/funasr/download/name_maps_from_hub.py +++ b/funasr/download/name_maps_from_hub.py @@ -47,8 +47,9 @@ name_maps_openai = { "Whisper-small": "small", "Whisper-medium.en": "medium.en", "Whisper-medium": "medium", + "Whisper-large": "large", "Whisper-large-v1": "large-v1", "Whisper-large-v2": "large-v2", "Whisper-large-v3": "large-v3", - "Whisper-large": "large", + "Whisper-large-v3-turbo": "turbo", } diff --git a/funasr/download/upload_model.py b/funasr/download/upload_model.py index 9e6e1f841..f23a0dfb9 100644 --- a/funasr/download/upload_model.py +++ b/funasr/download/upload_model.py @@ -1,8 +1,25 @@ import os import hydra from omegaconf import DictConfig, OmegaConf, ListConfig - +import traceback from modelscope.hub.api import HubApi +from modelscope.hub.constants import ( + API_HTTP_CLIENT_TIMEOUT, + API_RESPONSE_FIELD_DATA, + API_RESPONSE_FIELD_EMAIL, + API_RESPONSE_FIELD_GIT_ACCESS_TOKEN, + API_RESPONSE_FIELD_MESSAGE, + API_RESPONSE_FIELD_USERNAME, + DEFAULT_CREDENTIALS_PATH, + MODELSCOPE_CLOUD_ENVIRONMENT, + MODELSCOPE_CLOUD_USERNAME, + MODELSCOPE_REQUEST_ID, + ONE_YEAR_SECONDS, + REQUESTS_API_HTTP_METHOD, + DatasetVisibility, + Licenses, + ModelVisibility, +) @hydra.main(config_name=None, version_base=None) @@ -22,11 +39,41 @@ def main_hydra(cfg: DictConfig): assert TOKEN is not None, f"{TOKEN} is None" + visibility = cfg.get("visibility", "PRIVATE") # PRIVATE #ModelVisibility.PUBLIC + visibility = getattr(ModelVisibility, visibility) + model_name = cfg.get("model_name", "测试") + api = HubApi() api.login(TOKEN) - api.push_model(model_id=model_id, model_dir=model_dir) + try: + api.create_model( + model_id=model_id, + visibility=visibility, + license=Licenses.APACHE_V2, + chinese_name=model_name, + ) + except Exception as e: + print(f"Create_model failed! {str(e)}, {traceback.format_exc()}") + print(f"model url: https://modelscope.cn/models/{model_id}") + + api.push_model(model_id=model_id, model_dir=model_dir) + print( + f"Upload model finished." + f"model_dir: {model_dir}" + f"model_id: {model_id}" + f"url: https://modelscope.cn/models/{model_id}" + ) + + +""" +TOKEN="fadd1abb-4df6-4807-9051-5ab01ac81071" +model_id="iic/Whisper-large-v3-turbo" +model_dir="/Users/zhifu/Downloads/Whisper-large-v3-turbo" + +python -m funasr.download.upload_model ++TOKEN=${TOKEN} ++model_id=${model_id} ++model_dir=${model_dir} +""" if __name__ == "__main__": main_hydra() diff --git a/funasr/models/whisper/model.py b/funasr/models/whisper/model.py index 791fddd47..398eea3fd 100644 --- a/funasr/models/whisper/model.py +++ b/funasr/models/whisper/model.py @@ -28,6 +28,7 @@ from funasr.register import tables @tables.register("model_classes", "Whisper-large-v1") @tables.register("model_classes", "Whisper-large-v2") @tables.register("model_classes", "Whisper-large-v3") +@tables.register("model_classes", "Whisper-large-v3-turbo") @tables.register("model_classes", "WhisperWarp") class WhisperWarp(nn.Module): def __init__(self, *args, **kwargs):