From 15c4709beb4b588db2135fc1133cd6955b5ef819 Mon Sep 17 00:00:00 2001 From: zhifu gao Date: Mon, 11 Mar 2024 22:04:03 +0800 Subject: [PATCH] onnx (#1473) * qwenaudio qwenaudiochat * qwenaudio qwenaudiochat * whisper * whisper * llm * llm * llm * llm * llm * llm * llm * llm * export onnx * export onnx * export onnx * dingding * dingding * llm * doc * onnx * onnx * onnx * onnx --- funasr/auto/auto_model.py | 8 +++++++ funasr/models/bicif_paraformer/model.py | 7 ++---- .../ct_transformer_streaming/encoder.py | 9 +++---- funasr/models/paraformer/decoder.py | 24 +++++++------------ funasr/models/paraformer/model.py | 10 +++----- funasr/models/paraformer_streaming/model.py | 10 +++----- funasr/models/sanm/encoder.py | 10 ++++---- .../python/onnxruntime/funasr_onnx/vad_bin.py | 16 ++++++------- 8 files changed, 40 insertions(+), 54 deletions(-) diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py index edcede5bf..a18224f46 100644 --- a/funasr/auto/auto_model.py +++ b/funasr/auto/auto_model.py @@ -494,11 +494,19 @@ class AutoModel: export_dir = export_utils.export_onnx( model=model, data_in=data_list, + quantize=quantize, + fallback_num=fallback_num, + calib_num=calib_num, + opset_version=opset_version, **kwargs) else: export_dir = export_utils.export_torchscripts( model=model, data_in=data_list, + quantize=quantize, + fallback_num=fallback_num, + calib_num=calib_num, + opset_version=opset_version, **kwargs) return export_dir \ No newline at end of file diff --git a/funasr/models/bicif_paraformer/model.py b/funasr/models/bicif_paraformer/model.py index b93f93a88..9849c8c22 100644 --- a/funasr/models/bicif_paraformer/model.py +++ b/funasr/models/bicif_paraformer/model.py @@ -359,13 +359,10 @@ class BiCifParaformer(Paraformer): decoder_class = tables.decoder_classes.get(kwargs["decoder"] + "Export") self.decoder = decoder_class(self.decoder, onnx=is_onnx) - from funasr.utils.torch_function import MakePadMask from funasr.utils.torch_function import sequence_mask + + self.make_pad_mask = sequence_mask(max_seq_len, flip=False) - if is_onnx: - self.make_pad_mask = MakePadMask(max_seq_len, flip=False) - else: - self.make_pad_mask = sequence_mask(max_seq_len, flip=False) self.forward = self.export_forward diff --git a/funasr/models/ct_transformer_streaming/encoder.py b/funasr/models/ct_transformer_streaming/encoder.py index badf5f6a5..bf0b8b277 100644 --- a/funasr/models/ct_transformer_streaming/encoder.py +++ b/funasr/models/ct_transformer_streaming/encoder.py @@ -416,13 +416,10 @@ class SANMVadEncoderExport(torch.nn.Module): self.model = model self._output_size = model._output_size - from funasr.utils.torch_function import MakePadMask from funasr.utils.torch_function import sequence_mask - - if onnx: - self.make_pad_mask = MakePadMask(max_seq_len, flip=False) - else: - self.make_pad_mask = sequence_mask(max_seq_len, flip=False) + + + self.make_pad_mask = sequence_mask(max_seq_len, flip=False) from funasr.models.sanm.attention import MultiHeadedAttentionSANMExport diff --git a/funasr/models/paraformer/decoder.py b/funasr/models/paraformer/decoder.py index 59c6e1d0e..7c370ba0c 100644 --- a/funasr/models/paraformer/decoder.py +++ b/funasr/models/paraformer/decoder.py @@ -628,14 +628,12 @@ class ParaformerSANMDecoderExport(torch.nn.Module): ): super().__init__() # self.embed = model.embed #Embedding(model.embed, max_seq_len) - from funasr.utils.torch_function import MakePadMask + from funasr.utils.torch_function import sequence_mask self.model = model - if onnx: - self.make_pad_mask = MakePadMask(max_seq_len, flip=False) - else: - self.make_pad_mask = sequence_mask(max_seq_len, flip=False) + + self.make_pad_mask = sequence_mask(max_seq_len, flip=False) from funasr.models.sanm.attention import MultiHeadedAttentionSANMDecoderExport from funasr.models.sanm.attention import MultiHeadedAttentionCrossAttExport @@ -763,14 +761,12 @@ class ParaformerSANMDecoderOnlineExport(torch.nn.Module): super().__init__() # self.embed = model.embed #Embedding(model.embed, max_seq_len) self.model = model - from funasr.utils.torch_function import MakePadMask + from funasr.utils.torch_function import sequence_mask self.model = model - if onnx: - self.make_pad_mask = MakePadMask(max_seq_len, flip=False) - else: - self.make_pad_mask = sequence_mask(max_seq_len, flip=False) + + self.make_pad_mask = sequence_mask(max_seq_len, flip=False) from funasr.models.sanm.attention import MultiHeadedAttentionSANMDecoderExport from funasr.models.sanm.attention import MultiHeadedAttentionCrossAttExport @@ -1036,14 +1032,12 @@ class ParaformerDecoderSANExport(torch.nn.Module): # self.embed = model.embed #Embedding(model.embed, max_seq_len) self.model = model - from funasr.utils.torch_function import MakePadMask + from funasr.utils.torch_function import sequence_mask self.model = model - if onnx: - self.make_pad_mask = MakePadMask(max_seq_len, flip=False) - else: - self.make_pad_mask = sequence_mask(max_seq_len, flip=False) + + self.make_pad_mask = sequence_mask(max_seq_len, flip=False) from funasr.models.transformer.decoder import DecoderLayerExport diff --git a/funasr/models/paraformer/model.py b/funasr/models/paraformer/model.py index 2e2a36e06..41a1bf73e 100644 --- a/funasr/models/paraformer/model.py +++ b/funasr/models/paraformer/model.py @@ -566,15 +566,11 @@ class Paraformer(torch.nn.Module): decoder_class = tables.decoder_classes.get(kwargs["decoder"]+"Export") self.decoder = decoder_class(self.decoder, onnx=is_onnx) - from funasr.utils.torch_function import MakePadMask from funasr.utils.torch_function import sequence_mask - - - if is_onnx: - self.make_pad_mask = MakePadMask(max_seq_len, flip=False) - else: - self.make_pad_mask = sequence_mask(max_seq_len, flip=False) + + self.make_pad_mask = sequence_mask(max_seq_len, flip=False) + self.forward = self.export_forward return self diff --git a/funasr/models/paraformer_streaming/model.py b/funasr/models/paraformer_streaming/model.py index 518fe9369..33ec9762c 100644 --- a/funasr/models/paraformer_streaming/model.py +++ b/funasr/models/paraformer_streaming/model.py @@ -579,14 +579,10 @@ class ParaformerStreaming(Paraformer): decoder_class = tables.decoder_classes.get(kwargs["decoder"] + "Export") self.decoder = decoder_class(self.decoder, onnx=is_onnx) - from funasr.utils.torch_function import MakePadMask from funasr.utils.torch_function import sequence_mask - - if is_onnx: - self.make_pad_mask = MakePadMask(max_seq_len, flip=False) - else: - self.make_pad_mask = sequence_mask(max_seq_len, flip=False) - + + + self.make_pad_mask = sequence_mask(max_seq_len, flip=False) import copy import types diff --git a/funasr/models/sanm/encoder.py b/funasr/models/sanm/encoder.py index f0a37227d..f5748187a 100644 --- a/funasr/models/sanm/encoder.py +++ b/funasr/models/sanm/encoder.py @@ -503,13 +503,11 @@ class SANMEncoderExport(nn.Module): self.feats_dim = feats_dim self._output_size = model._output_size - from funasr.utils.torch_function import MakePadMask + from funasr.utils.torch_function import sequence_mask - - if onnx: - self.make_pad_mask = MakePadMask(max_seq_len, flip=False) - else: - self.make_pad_mask = sequence_mask(max_seq_len, flip=False) + + + self.make_pad_mask = sequence_mask(max_seq_len, flip=False) from funasr.models.sanm.attention import MultiHeadedAttentionSANMExport if hasattr(model, 'encoders0'): diff --git a/runtime/python/onnxruntime/funasr_onnx/vad_bin.py b/runtime/python/onnxruntime/funasr_onnx/vad_bin.py index 384f3779e..6b3a1bcfe 100644 --- a/runtime/python/onnxruntime/funasr_onnx/vad_bin.py +++ b/runtime/python/onnxruntime/funasr_onnx/vad_bin.py @@ -63,8 +63,8 @@ class Fsmn_vad(): model = AutoModel(model=model_dir) model_dir = model.export(type="onnx", quantize=quantize) - config_file = os.path.join(model_dir, 'vad.yaml') - cmvn_file = os.path.join(model_dir, 'vad.mvn') + config_file = os.path.join(model_dir, 'config.yaml') + cmvn_file = os.path.join(model_dir, 'am.mvn') config = read_yaml(config_file) self.frontend = WavFrontend( @@ -73,8 +73,8 @@ class Fsmn_vad(): ) self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads) self.batch_size = batch_size - self.vad_scorer = E2EVadModel(config["vad_post_conf"]) - self.max_end_sil = max_end_sil if max_end_sil is not None else config["vad_post_conf"]["max_end_silence_time"] + self.vad_scorer = E2EVadModel(config["model_conf"]) + self.max_end_sil = max_end_sil if max_end_sil is not None else config["model_conf"]["max_end_silence_time"] self.encoder_conf = config["encoder_conf"] def prepare_cache(self, in_cache: list = []): @@ -228,8 +228,8 @@ class Fsmn_vad_online(): model = AutoModel(model=model_dir) model_dir = model.export(type="onnx", quantize=quantize) - config_file = os.path.join(model_dir, 'vad.yaml') - cmvn_file = os.path.join(model_dir, 'vad.mvn') + config_file = os.path.join(model_dir, 'config.yaml') + cmvn_file = os.path.join(model_dir, 'am.mvn') config = read_yaml(config_file) self.frontend = WavFrontendOnline( @@ -238,8 +238,8 @@ class Fsmn_vad_online(): ) self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads) self.batch_size = batch_size - self.vad_scorer = E2EVadModel(config["vad_post_conf"]) - self.max_end_sil = max_end_sil if max_end_sil is not None else config["vad_post_conf"]["max_end_silence_time"] + self.vad_scorer = E2EVadModel(config["model_conf"]) + self.max_end_sil = max_end_sil if max_end_sil is not None else config["model_conf"]["max_end_silence_time"] self.encoder_conf = config["encoder_conf"] def prepare_cache(self, in_cache: list = []):