From d45230c6ba95a71f7df53cf8d1955feaa8696c1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Sat, 21 Sep 2024 11:46:52 +0800 Subject: [PATCH] batch --- funasr/datasets/audio_datasets/index_ds.py | 30 +++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/funasr/datasets/audio_datasets/index_ds.py b/funasr/datasets/audio_datasets/index_ds.py index 39ef409af..1541de33f 100644 --- a/funasr/datasets/audio_datasets/index_ds.py +++ b/funasr/datasets/audio_datasets/index_ds.py @@ -92,21 +92,21 @@ class IndexDSJsonlRankFull(torch.utils.data.Dataset): target = data["target"] source_len = data.get("source_len", 1) target_len = data.get("target_len", 0) - if "aishell" in source: - target = target.replace(" ", "") - if ( - source_len < self.min_source_length - or source_len > self.max_source_length - ): - continue - if ( - target_len < self.min_target_length - or target_len > self.max_target_length - ): - continue - - if (source_len + target_len) > self.max_token_length: - continue + # if "aishell" in source: + # target = target.replace(" ", "") + # if ( + # source_len < self.min_source_length + # or source_len > self.max_source_length + # ): + # continue + # if ( + # target_len < self.min_target_length + # or target_len > self.max_target_length + # ): + # continue + # + # if (source_len + target_len) > self.max_token_length: + # continue contents_i = { "source": source,