FunASR/funasr/build_utils/build_dataloader.py
2023-07-06 16:24:35 +08:00

29 lines
1.4 KiB
Python

from funasr.datasets.large_datasets.build_dataloader import LargeDataLoader
from funasr.datasets.small_datasets.sequence_iter_factory import SequenceIterFactory
def build_dataloader(args):
if args.dataset_type == "small":
if args.task_name == "diar" and args.model == "eend_ola":
from funasr.modules.eend_ola.eend_ola_dataloader import EENDOLADataLoader
train_iter_factory = EENDOLADataLoader(
data_file=args.train_data_path_and_name_and_type[0][0],
batch_size=args.dataset_conf["batch_conf"]["batch_size"],
num_workers=args.dataset_conf["num_workers"],
shuffle=True)
valid_iter_factory = EENDOLADataLoader(
data_file=args.valid_data_path_and_name_and_type[0][0],
batch_size=args.dataset_conf["batch_conf"]["batch_size"],
num_workers=0,
shuffle=False)
else:
train_iter_factory = SequenceIterFactory(args, mode="train")
valid_iter_factory = SequenceIterFactory(args, mode="valid")
elif args.dataset_type == "large":
train_iter_factory = LargeDataLoader(args, mode="train")
valid_iter_factory = LargeDataLoader(args, mode="valid")
else:
raise ValueError(f"Not supported dataset_type={args.dataset_type}")
return train_iter_factory, valid_iter_factory