mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
fix sense_voice_datasets
This commit is contained in:
parent
beef97a2fc
commit
a98550fdf5
@ -328,7 +328,7 @@ class SenseVoiceCTCDataset(torch.utils.data.Dataset):
|
||||
emo_target = item["emo_target"]
|
||||
event_target = item["event_target"]
|
||||
text_language = item.get("text_language", "<|zh|>")
|
||||
punc_itn_bottom = item.get("with_or_wo_itn", "<|SPECIAL_TOKEN_13|>")
|
||||
punc_itn_bottom = item.get("with_or_wo_itn", "<|woitn|>")
|
||||
|
||||
target_ids = self.tokenizer.encode(asr_target, allowed_special="all")
|
||||
target_ids_len = len(target_ids) # [text]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user