update eend_ola

This commit is contained in:
嘉渊 2023-07-06 17:54:29 +08:00
parent c46a271415
commit 91425c670b
5 changed files with 8 additions and 14 deletions

View File

@ -12,7 +12,7 @@ encoder_decoder_attractor_conf:
n_units: 256 n_units: 256
# model related # model related
model: eend_ola_similar_eend model: eend_ola
model_conf: model_conf:
attractor_loss_weight: 0.01 attractor_loss_weight: 0.01
max_n_speaker: 8 max_n_speaker: 8

View File

@ -12,7 +12,7 @@ encoder_decoder_attractor_conf:
n_units: 256 n_units: 256
# model related # model related
model: eend_ola_similar_eend model: eend_ola
model_conf: model_conf:
max_n_speaker: 8 max_n_speaker: 8

View File

@ -12,7 +12,7 @@ encoder_decoder_attractor_conf:
n_units: 256 n_units: 256
# model related # model related
model: eend_ola_similar_eend model: eend_ola
model_conf: model_conf:
max_n_speaker: 8 max_n_speaker: 8

View File

@ -12,7 +12,7 @@ encoder_decoder_attractor_conf:
n_units: 256 n_units: 256
# model related # model related
model: eend_ola_similar_eend model: eend_ola
model_conf: model_conf:
max_n_speaker: 8 max_n_speaker: 8

View File

@ -12,7 +12,7 @@ from funasr.models.base_model import FunASRModel
from funasr.models.frontend.wav_frontend import WavFrontendMel23 from funasr.models.frontend.wav_frontend import WavFrontendMel23
from funasr.modules.eend_ola.encoder import EENDOLATransformerEncoder from funasr.modules.eend_ola.encoder import EENDOLATransformerEncoder
from funasr.modules.eend_ola.encoder_decoder_attractor import EncoderDecoderAttractor from funasr.modules.eend_ola.encoder_decoder_attractor import EncoderDecoderAttractor
from funasr.modules.eend_ola.utils.losses import fast_batch_pit_n_speaker_loss, standard_loss, cal_power_loss from funasr.modules.eend_ola.utils.losses import standard_loss, cal_power_loss, fast_batch_pit_n_speaker_loss
from funasr.modules.eend_ola.utils.power import create_powerlabel from funasr.modules.eend_ola.utils.power import create_powerlabel
from funasr.modules.eend_ola.utils.power import generate_mapping_dict from funasr.modules.eend_ola.utils.power import generate_mapping_dict
from funasr.torch_utils.device_funcs import force_gatherable from funasr.torch_utils.device_funcs import force_gatherable
@ -109,23 +109,17 @@ class DiarEENDOLAModel(FunASRModel):
def forward( def forward(
self, self,
speech: List[torch.Tensor], speech: List[torch.Tensor],
speech_lengths: torch.Tensor, # num_frames of each sample
speaker_labels: List[torch.Tensor], speaker_labels: List[torch.Tensor],
speaker_labels_lengths: torch.Tensor, # num_speakers of each sample
orders: torch.Tensor, orders: torch.Tensor,
) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]: ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]:
# Check that batch_size is unified # Check that batch_size is unified
assert ( assert (len(speech) == len(speaker_labels)), (len(speech), len(speaker_labels))
len(speech) speech_lengths = torch.tensor([len(sph) for sph in speech]).to(torch.int64)
== len(speech_lengths) speaker_labels_lengths = torch.tensor([spk.shape[-1] for spk in speaker_labels]).to(torch.int64)
== len(speaker_labels)
== len(speaker_labels_lengths)
), (len(speech), len(speech_lengths), len(speaker_labels), len(speaker_labels_lengths))
batch_size = len(speech) batch_size = len(speech)
# Encoder # Encoder
speech = [s[:s_len] for s, s_len in zip(speech, speech_lengths)]
encoder_out = self.forward_encoder(speech, speech_lengths) encoder_out = self.forward_encoder(speech, speech_lengths)
# Encoder-decoder attractor # Encoder-decoder attractor