diff --git a/funasr/bin/tp_inference.py b/funasr/bin/tp_inference.py index 766f94f1c..7f9890f4a 100644 --- a/funasr/bin/tp_inference.py +++ b/funasr/bin/tp_inference.py @@ -148,11 +148,11 @@ class SpeechText2Timestamp: # Input as audio signal if isinstance(speech, np.ndarray): speech = torch.tensor(speech) - if self.frontend is not None: feats, feats_len = self.frontend.forward(speech, speech_lengths) feats = to_device(feats, device=self.device) feats_len = feats_len.int() + self.tp_model.frontend = None else: feats = speech feats_len = speech_lengths