diff --git a/MODEL_LICENSE b/MODEL_LICENSE index 3d9e410f9..09ee9ebe4 100644 --- a/MODEL_LICENSE +++ b/MODEL_LICENSE @@ -1,3 +1,39 @@ +FunASR Model Open Source License +Version 1.0 + +Copyright (C) [2023-2028] Alibaba Group. All rights reserved. + +Thank you for choosing the FunASR open source models. The FunASR open source models contain a series of open-source models that allow everyone to use, modify, share, and learn from it. + +To ensure better community collaboration, we have developed the following agreement and hope that you carefully read and abide by it. + +1 Definitions +In this agreement, [FunASR software] refers to the FunASR open source model, and its derivatives, including fine-tuned models. [You] refer to individuals or organizations who use, modify, share, and learn from [FunASR software]. + +2 License and Restrictions + +2.1 License +You are free to use, copy, modify, and share [FunASR software] under the conditions of this agreement. + +2.2 Restrictions +You should indicate the code and model source and author information when using, copying, modifying and sharing [FunASR software]. You should keep the relevant names of models in [FunASR software]. + +3 Responsibility and Risk +[FunASR software] is for reference and learning purposes only and is not responsible for any direct or indirect losses caused by your use or modification of [FunASR software]. You should take responsibility and risks for your use and modification of [FunASR software]. + +4 Termination +If you violate any terms of this agreement, your license will be automatically terminated, and you must stop using, copying, modifying, and sharing [FunASR software]. + +5 Revision +This agreement may be updated and revised from time to time. The revised agreement will be published in the FunASR official repository and automatically take effect. If you continue to use, copy, modify, and share [FunASR software], it means you agree to the revised agreement. + +6 Other Provisions +This agreement is subject to the laws of [Country/Region]. If any provisions are found to be illegal, invalid, or unenforceable, they shall be deemed deleted from this agreement, and the remaining provisions shall remain valid and binding. + +If you have any questions or comments about this agreement, please contact us. + +Copyright (c) [2023-2028] Alibaba Group. All rights reserved. + FunASR 模型开源协议 版本号:1.0 @@ -36,38 +72,3 @@ FunASR 模型开源协议 版权所有© [2023-2028] [阿里巴巴集团]。保留所有权利。 -FunASR Model Open Source License -Version 1.0 - -Copyright (C) [2023-2028] Alibaba Group. All rights reserved. - -Thank you for choosing the FunASR open source models. The FunASR open source models contain a series of open-source models that allow everyone to use, modify, share, and learn from it. - -To ensure better community collaboration, we have developed the following agreement and hope that you carefully read and abide by it. - -1 Definitions -In this agreement, [FunASR software] refers to the FunASR open source model, and its derivatives, including fine-tuned models. [You] refer to individuals or organizations who use, modify, share, and learn from [FunASR software]. - -2 License and Restrictions - -2.1 License -You are free to use, copy, modify, and share [FunASR software] under the conditions of this agreement. - -2.2 Restrictions -You should indicate the code and model source and author information when using, copying, modifying and sharing [FunASR software]. You should keep the relevant names of models in [FunASR software]. - -3 Responsibility and Risk -[FunASR software] is for reference and learning purposes only and is not responsible for any direct or indirect losses caused by your use or modification of [FunASR software]. You should take responsibility and risks for your use and modification of [FunASR software]. - -4 Termination -If you violate any terms of this agreement, your license will be automatically terminated, and you must stop using, copying, modifying, and sharing [FunASR software]. - -5 Revision -This agreement may be updated and revised from time to time. The revised agreement will be published in the FunASR official repository and automatically take effect. If you continue to use, copy, modify, and share [FunASR software], it means you agree to the revised agreement. - -6 Other Provisions -This agreement is subject to the laws of [Country/Region]. If any provisions are found to be illegal, invalid, or unenforceable, they shall be deemed deleted from this agreement, and the remaining provisions shall remain valid and binding. - -If you have any questions or comments about this agreement, please contact us. - -Copyright (c) [2023-2028] Alibaba Group. All rights reserved. diff --git a/funasr/bin/asr_inference_launch.py b/funasr/bin/asr_inference_launch.py index 7dd27fc71..f61c0859d 100644 --- a/funasr/bin/asr_inference_launch.py +++ b/funasr/bin/asr_inference_launch.py @@ -956,24 +956,29 @@ def inference_paraformer_vad_speaker( ed = int(vadsegment[1]) / 1000 vad_segments.append( [st, ed, audio[int(st * 16000):int(ed * 16000)]]) - check_audio_list(vad_segments) - # sv pipeline - segments = sv_chunk(vad_segments) - embeddings = [] - for s in segments: - #_, embs = self.sv_pipeline([s[2]], output_emb=True) - # embeddings.append(embs) - wavs = sv_preprocess([s[2]]) - # embs = self.forward(wavs) - embs = [] - for x in wavs: - x = extract_feature([x]) - embs.append(sv_model(x)) - embs = torch.cat(embs) - embeddings.append(embs.detach().numpy()) - embeddings = np.concatenate(embeddings) - labels = cb_model(embeddings) - sv_output = postprocess(segments, vad_segments, labels, embeddings) + audio_dur = check_audio_list(vad_segments) + if audio_dur > 5: + # sv pipeline + segments = sv_chunk(vad_segments) + embeddings = [] + for s in segments: + #_, embs = self.sv_pipeline([s[2]], output_emb=True) + # embeddings.append(embs) + wavs = sv_preprocess([s[2]]) + # embs = self.forward(wavs) + embs = [] + for x in wavs: + x = extract_feature([x]) + embs.append(sv_model(x)) + embs = torch.cat(embs) + embeddings.append(embs.detach().numpy()) + embeddings = np.concatenate(embeddings) + labels = cb_model(embeddings) + sv_output = postprocess(segments, vad_segments, labels, embeddings) + else: + # fake speaker res for too shot utterance + sv_output = [[0.0, vadsegments[-1][-1]/1000.0, 0]] + logging.warning("Too short utterence found: {}, return default speaker results.".format(keys)) speech, speech_lengths = batch["speech"], batch["speech_lengths"] diff --git a/funasr/models/e2e_asr_contextual_paraformer.py b/funasr/models/e2e_asr_contextual_paraformer.py index 64e0f8d03..a2f7078ae 100644 --- a/funasr/models/e2e_asr_contextual_paraformer.py +++ b/funasr/models/e2e_asr_contextual_paraformer.py @@ -134,7 +134,7 @@ class NeatContextualParaformer(Paraformer): text_lengths: torch.Tensor, hotword_pad: torch.Tensor, hotword_lengths: torch.Tensor, - ideal_attn: torch.Tensor, + dha_pad: torch.Tensor, ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]: """Frontend + Encoder + Decoder + Calc loss @@ -207,7 +207,7 @@ class NeatContextualParaformer(Paraformer): # 2b. Attention decoder branch if self.ctc_weight != 1.0: loss_att, acc_att, cer_att, wer_att, loss_pre, loss_ideal = self._calc_att_clas_loss( - encoder_out, encoder_out_lens, text, text_lengths, hotword_pad, hotword_lengths, ideal_attn + encoder_out, encoder_out_lens, text, text_lengths, hotword_pad, hotword_lengths ) # 3. CTC-Att loss definition @@ -242,7 +242,6 @@ class NeatContextualParaformer(Paraformer): ys_pad_lens: torch.Tensor, hotword_pad: torch.Tensor, hotword_lengths: torch.Tensor, - ideal_attn: torch.Tensor, ): encoder_out_mask = (~make_pad_mask(encoder_out_lens, maxlen=encoder_out.size(1))[:, None, :]).to( encoder_out.device) diff --git a/funasr/utils/speaker_utils.py b/funasr/utils/speaker_utils.py index 38ef11cd1..edaf58b75 100644 --- a/funasr/utils/speaker_utils.py +++ b/funasr/utils/speaker_utils.py @@ -35,7 +35,8 @@ def check_audio_list(audio: list): assert seg[0] >= audio[ i - 1][1], 'modelscope error: Wrong time stamps.' audio_dur += seg[1] - seg[0] - assert audio_dur > 5, 'modelscope error: The effective audio duration is too short.' + return audio_dur + # assert audio_dur > 5, 'modelscope error: The effective audio duration is too short.' def sv_preprocess(inputs: Union[np.ndarray, list]): diff --git a/runtime/onnxruntime/src/fsmn-vad-online.cpp b/runtime/onnxruntime/src/fsmn-vad-online.cpp index b9cda4bd3..a8cc5d8b8 100644 --- a/runtime/onnxruntime/src/fsmn-vad-online.cpp +++ b/runtime/onnxruntime/src/fsmn-vad-online.cpp @@ -110,6 +110,7 @@ int FsmnVadOnline::OnlineLfrCmvn(vector> &vad_feats, bool input_fi p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end()); } out_feats.emplace_back(p); + p.clear(); } else { lfr_splice_frame_idxs = i; break; diff --git a/runtime/onnxruntime/src/fsmn-vad.cpp b/runtime/onnxruntime/src/fsmn-vad.cpp index db633b0b2..c83227405 100644 --- a/runtime/onnxruntime/src/fsmn-vad.cpp +++ b/runtime/onnxruntime/src/fsmn-vad.cpp @@ -264,6 +264,7 @@ void FsmnVad::LfrCmvn(std::vector> &vad_feats) { p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end()); } out_feats.emplace_back(p); + p.clear(); } } // Apply cmvn diff --git a/runtime/onnxruntime/src/paraformer-online.cpp b/runtime/onnxruntime/src/paraformer-online.cpp index 3b629c59a..d08b57e28 100644 --- a/runtime/onnxruntime/src/paraformer-online.cpp +++ b/runtime/onnxruntime/src/paraformer-online.cpp @@ -164,6 +164,7 @@ int ParaformerOnline::OnlineLfrCmvn(vector> &wav_feats, bool input p.insert(p.end(), wav_feats[wav_feats.size() - 1].begin(), wav_feats[wav_feats.size() - 1].end()); } out_feats.emplace_back(p); + p.clear(); } else { lfr_splice_frame_idxs = i; break; diff --git a/runtime/onnxruntime/src/paraformer.cpp b/runtime/onnxruntime/src/paraformer.cpp index 9e8e3361d..4e89ea28c 100644 --- a/runtime/onnxruntime/src/paraformer.cpp +++ b/runtime/onnxruntime/src/paraformer.cpp @@ -436,6 +436,7 @@ void Paraformer::LfrCmvn(std::vector> &asr_feats) { p.insert(p.end(), asr_feats[asr_feats.size() - 1].begin(), asr_feats[asr_feats.size() - 1].end()); } out_feats.emplace_back(p); + p.clear(); } } // Apply cmvn