diff --git a/funasr/utils/load_utils.py b/funasr/utils/load_utils.py index 1d80fcf36..072b578ef 100644 --- a/funasr/utils/load_utils.py +++ b/funasr/utils/load_utils.py @@ -86,8 +86,10 @@ def load_audio_text_image_video( ): # download url to local file data_or_path_or_list = download_from_url(data_or_path_or_list) - if isinstance(data_or_path_or_list, str) and os.path.exists(data_or_path_or_list): # local file + if (isinstance(data_or_path_or_list, str) and os.path.exists(data_or_path_or_list)) or hasattr(data_or_path_or_list, 'read'): # local file or bytes io if data_type is None or data_type == "sound": + if hasattr(data_or_path_or_list, "read") and hasattr(data_or_path_or_list, "seek"): + data_or_path_or_list.seek(0) # if use_ffmpeg: # data_or_path_or_list = _load_audio_ffmpeg(data_or_path_or_list, sr=fs) # data_or_path_or_list = torch.from_numpy(data_or_path_or_list).squeeze() # [n_samples,]