From 4e57ba7b92bfd5208ef81f8b666b5f5af10c531a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Thu, 26 Sep 2024 13:47:00 +0800 Subject: [PATCH] v3 --- funasr/datasets/openai_datasets/index_ds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/funasr/datasets/openai_datasets/index_ds.py b/funasr/datasets/openai_datasets/index_ds.py index ea9fc5f5a..c7bcff453 100644 --- a/funasr/datasets/openai_datasets/index_ds.py +++ b/funasr/datasets/openai_datasets/index_ds.py @@ -53,7 +53,7 @@ class OpenAIIndexDSJsonl(torch.utils.data.Dataset): # torch.utils.data.Dataset data = data_dict["messages"] speech_length = data_dict.get("speech_length", -1) // 8 text_length = data_dict.get("text_length", 0) - if speech_length * 2 > self.max_source_length: + if speech_length * 1.3 > self.max_source_length: logging.info( f"speech_length: {speech_length*8} > {self.max_source_length}, drop it: {data_dict}" )