From 1a39b6f981d2cf5c8066d7fa4d38c58bccbbfe13 Mon Sep 17 00:00:00 2001 From: zhuzizyf <42790740+zhuzizyf@users.noreply.github.com> Date: Fri, 3 Mar 2023 10:33:51 +0800 Subject: [PATCH] Update wav_utils.py Because there are no uppercase letters in the dictionary, when there are uppercase letters in the annotated text, the finetune result will be "unk", so uniformly converted to lowercase when read the annotated text. --- funasr/utils/wav_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/funasr/utils/wav_utils.py b/funasr/utils/wav_utils.py index d7afe4acc..4a764a9d1 100644 --- a/funasr/utils/wav_utils.py +++ b/funasr/utils/wav_utils.py @@ -309,7 +309,7 @@ def filter_wav_text(data_dir, dataset): if len(parts) < 2: continue sample_name = parts[0] - text_dict[sample_name] = " ".join(parts[1:]) + text_dict[sample_name] = " ".join(parts[1:]).lower() filter_count = 0 with open(wav_file, "w") as f_wav, open(text_file, "w") as f_text: for sample_name, wav_path in wav_dict.items():