diff --git a/funasr/utils/prepare_data.py b/funasr/utils/prepare_data.py index c9a99e5cb..a9beaf4e8 100644 --- a/funasr/utils/prepare_data.py +++ b/funasr/utils/prepare_data.py @@ -157,6 +157,10 @@ def generate_data_list(data_dir, dataset, nj=100): def prepare_data(args, distributed_option): + if args.dataset_type == "small" and args.train_data_path_and_name_and_type is not None: + return + if args.dataset_type == "large" and args.train_data_file is not None: + return distributed = distributed_option.distributed if not distributed or distributed_option.dist_rank == 0: filter_wav_text(args.data_dir, args.train_set)