diff --git a/funasr/bin/asr_infer.py b/funasr/bin/asr_infer.py index 80732133f..ef3ed6e3d 100644 --- a/funasr/bin/asr_infer.py +++ b/funasr/bin/asr_infer.py @@ -38,9 +38,7 @@ from funasr.text.build_tokenizer import build_tokenizer from funasr.text.token_id_converter import TokenIDConverter from funasr.torch_utils.device_funcs import to_device from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard -from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode -from funasr.utils.whisper_utils.transcribe import transcribe -from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram + class Speech2Text: """Speech2Text class @@ -1923,9 +1921,13 @@ class Speech2TextWhisper: **kwargs, ): + from funasr.tasks.whisper import ASRTask + from funasr.utils.whisper_utils.transcribe import transcribe + from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram + from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode + # 1. Build ASR model scorers = {} - from funasr.tasks.whisper import ASRTask asr_model, asr_train_args = ASRTask.build_model_from_file( asr_train_config, asr_model_file, cmvn_file, device ) diff --git a/funasr/utils/whisper_utils/tokenizer.py b/funasr/utils/whisper_utils/tokenizer.py index 2a936478d..bf7cb4781 100644 --- a/funasr/utils/whisper_utils/tokenizer.py +++ b/funasr/utils/whisper_utils/tokenizer.py @@ -5,7 +5,12 @@ from typing import List, Optional, Tuple, Union import numpy as np import torch -from transformers import GPT2TokenizerFast +try: + from transformers import GPT2TokenizerFast +except ImportError: + raise ImportError( + "transformers was not installed. Please install transformers first." + ) LANGUAGES = { "en": "english",