From 3da40ad9fe5d1c9003014fdce75fffca23fd6900 Mon Sep 17 00:00:00 2001 From: lingyunfly <121302812+lingyunfly@users.noreply.github.com> Date: Tue, 13 Jun 2023 11:13:03 +0800 Subject: [PATCH] vad bugfix (#624) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 凌匀 --- funasr/models/e2e_vad.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/funasr/models/e2e_vad.py b/funasr/models/e2e_vad.py index 846341d1e..14d56a85d 100644 --- a/funasr/models/e2e_vad.py +++ b/funasr/models/e2e_vad.py @@ -296,13 +296,14 @@ class E2EVadModel(nn.Module): self.sil_frame = 0 self.frame_probs = [] - assert self.output_data_buf[-1].contain_seg_end_point == True - drop_frames = int(self.output_data_buf[-1].end_ms / self.vad_opts.frame_in_ms) - real_drop_frames = drop_frames - self.last_drop_frames - self.last_drop_frames = drop_frames - self.data_buf_all = self.data_buf_all[real_drop_frames * int(self.vad_opts.frame_in_ms * self.vad_opts.sample_rate / 1000):] - self.decibel = self.decibel[real_drop_frames:] - self.scores = self.scores[:, real_drop_frames:, :] + if self.output_data_buf: + assert self.output_data_buf[-1].contain_seg_end_point == True + drop_frames = int(self.output_data_buf[-1].end_ms / self.vad_opts.frame_in_ms) + real_drop_frames = drop_frames - self.last_drop_frames + self.last_drop_frames = drop_frames + self.data_buf_all = self.data_buf_all[real_drop_frames * int(self.vad_opts.frame_in_ms * self.vad_opts.sample_rate / 1000):] + self.decibel = self.decibel[real_drop_frames:] + self.scores = self.scores[:, real_drop_frames:, :] def ComputeDecibel(self) -> None: frame_sample_length = int(self.vad_opts.frame_length_ms * self.vad_opts.sample_rate / 1000)