diff --git a/funasr/models/sense_voice/encoder.py b/funasr/models/sense_voice/encoder.py index 6c6d15600..64156e9dd 100644 --- a/funasr/models/sense_voice/encoder.py +++ b/funasr/models/sense_voice/encoder.py @@ -42,7 +42,7 @@ def sense_voice_encode_forward( olens = None if use_padmask and olens is not None: - padding_mask = (~make_pad_mask(olens)[:, None, :]).to(torch.bool).to(x.device) + padding_mask = (~make_pad_mask(olens)[:, :, None]).to(torch.bool).to(x.device) else: padding_mask = None