From 88c4f4a25df3c171dc0d07efc400f73e6a09e165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Tue, 7 Feb 2023 21:43:30 +0800 Subject: [PATCH] export model --- funasr/export/export_model.py | 10 +++++----- funasr/export/models/e2e_asr_paraformer.py | 11 ++++++++++- funasr/export/test_onnx.py | 4 ++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/funasr/export/export_model.py b/funasr/export/export_model.py index e5a2320ec..9f5cb0e9b 100644 --- a/funasr/export/export_model.py +++ b/funasr/export/export_model.py @@ -42,10 +42,10 @@ class ASRModelExportParaformer: self.export_config, ) self._export_onnx(model, verbose, export_dir) - # if self.onnx: - # self._export_onnx(model, verbose, export_dir) - # else: - # self._export_torchscripts(model, verbose, export_dir) + if self.onnx: + self._export_onnx(model, verbose, export_dir) + else: + self._export_torchscripts(model, verbose, export_dir) logging.info("output dir: {}".format(export_dir)) @@ -54,7 +54,7 @@ class ASRModelExportParaformer: if enc_size: dummy_input = model.get_dummy_inputs(enc_size) else: - dummy_input = model.get_dummy_inputs() + dummy_input = model.get_dummy_inputs_txt() # model_script = torch.jit.script(model) model_script = torch.jit.trace(model, dummy_input) diff --git a/funasr/export/models/e2e_asr_paraformer.py b/funasr/export/models/e2e_asr_paraformer.py index dd87213ac..8388f4f67 100644 --- a/funasr/export/models/e2e_asr_paraformer.py +++ b/funasr/export/models/e2e_asr_paraformer.py @@ -63,8 +63,9 @@ class Paraformer(nn.Module): decoder_out, _ = self.decoder(enc, enc_len, pre_acoustic_embeds, pre_token_length) decoder_out = torch.log_softmax(decoder_out, dim=-1) + sample_ids = decoder_out.argmax(dim=-1) - return decoder_out, pre_token_length + return decoder_out, sample_ids # def get_output_size(self): # return self.model.encoders[0].size @@ -74,6 +75,14 @@ class Paraformer(nn.Module): speech_lengths = torch.tensor([6, 30], dtype=torch.int32) return (speech, speech_lengths) + def get_dummy_inputs_txt(self, txt_file: str = "/mnt/workspace/data_fbank/0207/12345.wav.fea.txt"): + import numpy as np + fbank = np.loadtxt(txt_file) + fbank_lengths = np.array([fbank.shape[0], ], dtype=np.int32) + speech = torch.from_numpy(fbank[None, :, :].astype(np.float32)) + speech_lengths = torch.from_numpy(fbank_lengths.astype(np.int32)) + return (speech, speech_lengths) + def get_input_names(self): return ['speech', 'speech_lengths'] diff --git a/funasr/export/test_onnx.py b/funasr/export/test_onnx.py index 91b128e8e..c62137ea7 100644 --- a/funasr/export/test_onnx.py +++ b/funasr/export/test_onnx.py @@ -3,13 +3,13 @@ import numpy as np if __name__ == '__main__': - onnx_path = "/root/cache/export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/torchscripts/model.onnx" + onnx_path = "/Users/zhifu/Downloads/model.onnx" sess = onnxruntime.InferenceSession(onnx_path) input_name = [nd.name for nd in sess.get_inputs()] output_name = [nd.name for nd in sess.get_outputs()] def _get_feed_dict(feats_length): - return {'speech': np.zeros((1, feats_length, 560), dtype=np.float32), 'speech_lengths': np.array([feats_length,], dtype=np.int32)} + return {'speech': np.zeros((1, feats_length, 560), dtype=np.float32), 'speech_lengths': np.array([feats_length,], dtype=np.int64)} def _run(feed_dict): output = sess.run(output_name, input_feed=feed_dict)