mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Merge branch 'dev_gzf_funasr2' of github.com:alibaba-damo-academy/FunASR into dev_gzf_funasr2
add
This commit is contained in:
commit
3cee2214b7
@ -1,3 +1,39 @@
|
||||
FunASR Model Open Source License
|
||||
Version 1.0
|
||||
|
||||
Copyright (C) [2023-2028] Alibaba Group. All rights reserved.
|
||||
|
||||
Thank you for choosing the FunASR open source models. The FunASR open source models contain a series of open-source models that allow everyone to use, modify, share, and learn from it.
|
||||
|
||||
To ensure better community collaboration, we have developed the following agreement and hope that you carefully read and abide by it.
|
||||
|
||||
1 Definitions
|
||||
In this agreement, [FunASR software] refers to the FunASR open source model, and its derivatives, including fine-tuned models. [You] refer to individuals or organizations who use, modify, share, and learn from [FunASR software].
|
||||
|
||||
2 License and Restrictions
|
||||
|
||||
2.1 License
|
||||
You are free to use, copy, modify, and share [FunASR software] under the conditions of this agreement.
|
||||
|
||||
2.2 Restrictions
|
||||
You should indicate the code and model source and author information when using, copying, modifying and sharing [FunASR software]. You should keep the relevant names of models in [FunASR software].
|
||||
|
||||
3 Responsibility and Risk
|
||||
[FunASR software] is for reference and learning purposes only and is not responsible for any direct or indirect losses caused by your use or modification of [FunASR software]. You should take responsibility and risks for your use and modification of [FunASR software].
|
||||
|
||||
4 Termination
|
||||
If you violate any terms of this agreement, your license will be automatically terminated, and you must stop using, copying, modifying, and sharing [FunASR software].
|
||||
|
||||
5 Revision
|
||||
This agreement may be updated and revised from time to time. The revised agreement will be published in the FunASR official repository and automatically take effect. If you continue to use, copy, modify, and share [FunASR software], it means you agree to the revised agreement.
|
||||
|
||||
6 Other Provisions
|
||||
This agreement is subject to the laws of [Country/Region]. If any provisions are found to be illegal, invalid, or unenforceable, they shall be deemed deleted from this agreement, and the remaining provisions shall remain valid and binding.
|
||||
|
||||
If you have any questions or comments about this agreement, please contact us.
|
||||
|
||||
Copyright (c) [2023-2028] Alibaba Group. All rights reserved.
|
||||
|
||||
FunASR 模型开源协议
|
||||
|
||||
版本号:1.0
|
||||
@ -36,38 +72,3 @@ FunASR 模型开源协议
|
||||
|
||||
版权所有© [2023-2028] [阿里巴巴集团]。保留所有权利。
|
||||
|
||||
FunASR Model Open Source License
|
||||
Version 1.0
|
||||
|
||||
Copyright (C) [2023-2028] Alibaba Group. All rights reserved.
|
||||
|
||||
Thank you for choosing the FunASR open source models. The FunASR open source models contain a series of open-source models that allow everyone to use, modify, share, and learn from it.
|
||||
|
||||
To ensure better community collaboration, we have developed the following agreement and hope that you carefully read and abide by it.
|
||||
|
||||
1 Definitions
|
||||
In this agreement, [FunASR software] refers to the FunASR open source model, and its derivatives, including fine-tuned models. [You] refer to individuals or organizations who use, modify, share, and learn from [FunASR software].
|
||||
|
||||
2 License and Restrictions
|
||||
|
||||
2.1 License
|
||||
You are free to use, copy, modify, and share [FunASR software] under the conditions of this agreement.
|
||||
|
||||
2.2 Restrictions
|
||||
You should indicate the code and model source and author information when using, copying, modifying and sharing [FunASR software]. You should keep the relevant names of models in [FunASR software].
|
||||
|
||||
3 Responsibility and Risk
|
||||
[FunASR software] is for reference and learning purposes only and is not responsible for any direct or indirect losses caused by your use or modification of [FunASR software]. You should take responsibility and risks for your use and modification of [FunASR software].
|
||||
|
||||
4 Termination
|
||||
If you violate any terms of this agreement, your license will be automatically terminated, and you must stop using, copying, modifying, and sharing [FunASR software].
|
||||
|
||||
5 Revision
|
||||
This agreement may be updated and revised from time to time. The revised agreement will be published in the FunASR official repository and automatically take effect. If you continue to use, copy, modify, and share [FunASR software], it means you agree to the revised agreement.
|
||||
|
||||
6 Other Provisions
|
||||
This agreement is subject to the laws of [Country/Region]. If any provisions are found to be illegal, invalid, or unenforceable, they shall be deemed deleted from this agreement, and the remaining provisions shall remain valid and binding.
|
||||
|
||||
If you have any questions or comments about this agreement, please contact us.
|
||||
|
||||
Copyright (c) [2023-2028] Alibaba Group. All rights reserved.
|
||||
|
||||
@ -956,24 +956,29 @@ def inference_paraformer_vad_speaker(
|
||||
ed = int(vadsegment[1]) / 1000
|
||||
vad_segments.append(
|
||||
[st, ed, audio[int(st * 16000):int(ed * 16000)]])
|
||||
check_audio_list(vad_segments)
|
||||
# sv pipeline
|
||||
segments = sv_chunk(vad_segments)
|
||||
embeddings = []
|
||||
for s in segments:
|
||||
#_, embs = self.sv_pipeline([s[2]], output_emb=True)
|
||||
# embeddings.append(embs)
|
||||
wavs = sv_preprocess([s[2]])
|
||||
# embs = self.forward(wavs)
|
||||
embs = []
|
||||
for x in wavs:
|
||||
x = extract_feature([x])
|
||||
embs.append(sv_model(x))
|
||||
embs = torch.cat(embs)
|
||||
embeddings.append(embs.detach().numpy())
|
||||
embeddings = np.concatenate(embeddings)
|
||||
labels = cb_model(embeddings)
|
||||
sv_output = postprocess(segments, vad_segments, labels, embeddings)
|
||||
audio_dur = check_audio_list(vad_segments)
|
||||
if audio_dur > 5:
|
||||
# sv pipeline
|
||||
segments = sv_chunk(vad_segments)
|
||||
embeddings = []
|
||||
for s in segments:
|
||||
#_, embs = self.sv_pipeline([s[2]], output_emb=True)
|
||||
# embeddings.append(embs)
|
||||
wavs = sv_preprocess([s[2]])
|
||||
# embs = self.forward(wavs)
|
||||
embs = []
|
||||
for x in wavs:
|
||||
x = extract_feature([x])
|
||||
embs.append(sv_model(x))
|
||||
embs = torch.cat(embs)
|
||||
embeddings.append(embs.detach().numpy())
|
||||
embeddings = np.concatenate(embeddings)
|
||||
labels = cb_model(embeddings)
|
||||
sv_output = postprocess(segments, vad_segments, labels, embeddings)
|
||||
else:
|
||||
# fake speaker res for too shot utterance
|
||||
sv_output = [[0.0, vadsegments[-1][-1]/1000.0, 0]]
|
||||
logging.warning("Too short utterence found: {}, return default speaker results.".format(keys))
|
||||
|
||||
speech, speech_lengths = batch["speech"], batch["speech_lengths"]
|
||||
|
||||
|
||||
@ -134,7 +134,7 @@ class NeatContextualParaformer(Paraformer):
|
||||
text_lengths: torch.Tensor,
|
||||
hotword_pad: torch.Tensor,
|
||||
hotword_lengths: torch.Tensor,
|
||||
ideal_attn: torch.Tensor,
|
||||
dha_pad: torch.Tensor,
|
||||
) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]:
|
||||
"""Frontend + Encoder + Decoder + Calc loss
|
||||
|
||||
@ -207,7 +207,7 @@ class NeatContextualParaformer(Paraformer):
|
||||
# 2b. Attention decoder branch
|
||||
if self.ctc_weight != 1.0:
|
||||
loss_att, acc_att, cer_att, wer_att, loss_pre, loss_ideal = self._calc_att_clas_loss(
|
||||
encoder_out, encoder_out_lens, text, text_lengths, hotword_pad, hotword_lengths, ideal_attn
|
||||
encoder_out, encoder_out_lens, text, text_lengths, hotword_pad, hotword_lengths
|
||||
)
|
||||
|
||||
# 3. CTC-Att loss definition
|
||||
@ -242,7 +242,6 @@ class NeatContextualParaformer(Paraformer):
|
||||
ys_pad_lens: torch.Tensor,
|
||||
hotword_pad: torch.Tensor,
|
||||
hotword_lengths: torch.Tensor,
|
||||
ideal_attn: torch.Tensor,
|
||||
):
|
||||
encoder_out_mask = (~make_pad_mask(encoder_out_lens, maxlen=encoder_out.size(1))[:, None, :]).to(
|
||||
encoder_out.device)
|
||||
|
||||
@ -35,7 +35,8 @@ def check_audio_list(audio: list):
|
||||
assert seg[0] >= audio[
|
||||
i - 1][1], 'modelscope error: Wrong time stamps.'
|
||||
audio_dur += seg[1] - seg[0]
|
||||
assert audio_dur > 5, 'modelscope error: The effective audio duration is too short.'
|
||||
return audio_dur
|
||||
# assert audio_dur > 5, 'modelscope error: The effective audio duration is too short.'
|
||||
|
||||
|
||||
def sv_preprocess(inputs: Union[np.ndarray, list]):
|
||||
|
||||
@ -110,6 +110,7 @@ int FsmnVadOnline::OnlineLfrCmvn(vector<vector<float>> &vad_feats, bool input_fi
|
||||
p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end());
|
||||
}
|
||||
out_feats.emplace_back(p);
|
||||
p.clear();
|
||||
} else {
|
||||
lfr_splice_frame_idxs = i;
|
||||
break;
|
||||
|
||||
@ -264,6 +264,7 @@ void FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats) {
|
||||
p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end());
|
||||
}
|
||||
out_feats.emplace_back(p);
|
||||
p.clear();
|
||||
}
|
||||
}
|
||||
// Apply cmvn
|
||||
|
||||
@ -164,6 +164,7 @@ int ParaformerOnline::OnlineLfrCmvn(vector<vector<float>> &wav_feats, bool input
|
||||
p.insert(p.end(), wav_feats[wav_feats.size() - 1].begin(), wav_feats[wav_feats.size() - 1].end());
|
||||
}
|
||||
out_feats.emplace_back(p);
|
||||
p.clear();
|
||||
} else {
|
||||
lfr_splice_frame_idxs = i;
|
||||
break;
|
||||
|
||||
@ -436,6 +436,7 @@ void Paraformer::LfrCmvn(std::vector<std::vector<float>> &asr_feats) {
|
||||
p.insert(p.end(), asr_feats[asr_feats.size() - 1].begin(), asr_feats[asr_feats.size() - 1].end());
|
||||
}
|
||||
out_feats.emplace_back(p);
|
||||
p.clear();
|
||||
}
|
||||
}
|
||||
// Apply cmvn
|
||||
|
||||
Loading…
Reference in New Issue
Block a user