From be98c46a7bcacb74dfabe40f9da63d75863b4d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Fri, 17 Mar 2023 22:30:54 +0800 Subject: [PATCH] rtf benchmark --- funasr/export/models/modules/multihead_att.py | 6 ++-- funasr/runtime/python/benchmark_libtorch.md | 28 ++++--------------- funasr/runtime/python/benchmark_onnx.md | 17 +++++++++++ 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/funasr/export/models/modules/multihead_att.py b/funasr/export/models/modules/multihead_att.py index 0a5667689..1983db8b6 100644 --- a/funasr/export/models/modules/multihead_att.py +++ b/funasr/export/models/modules/multihead_att.py @@ -75,8 +75,10 @@ def preprocess_for_attn(x, mask, cache, pad_fn): return x, cache -import torch.fx -torch.fx.wrap('preprocess_for_attn') +torch_version = float(".".join(torch.__version__.split(".")[:2])) +if torch_version >= 1.8: + import torch.fx + torch.fx.wrap('preprocess_for_attn') class MultiHeadedAttentionSANMDecoder(nn.Module): diff --git a/funasr/runtime/python/benchmark_libtorch.md b/funasr/runtime/python/benchmark_libtorch.md index 43f3b0e93..6c068fef3 100644 --- a/funasr/runtime/python/benchmark_libtorch.md +++ b/funasr/runtime/python/benchmark_libtorch.md @@ -26,35 +26,17 @@ Aishell1 [test set](https://www.openslr.org/33/) , the total audio duration is 3 ## [Paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) -### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz 16core-32processor with avx512_vnni - -[//]: # (| concurrent-tasks | processing time(s) | RTF | Speedup Rate |) - -[//]: # (|:----------------:|:------------------:|:------:|:------------:|) - -[//]: # (| 1 (torch fp32) | 3522 | 0.0976 | 10.3 |) - -[//]: # (| 1 (torch int8) | 1746 | 0.0484 | 20.7 |) - -[//]: # (| 32 (torch fp32) | 236 | 0.0066 | 152.7 |) - -[//]: # (| 32 (torch int8) | 114 | 0.0032 | 317.4 |) - -[//]: # (| 64 (torch fp32) | 235 | 0.0065 | 153.7 |) - -[//]: # (| 64 (torch int8) | 113 | 0.0031 | 319.2 |) - ### Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz 16core-32processor with avx512_vnni | concurrent-tasks | processing time(s) | RTF | Speedup Rate | |:----------------:|:------------------:|:------:|:------------:| -| 1 (torch fp32) | 3522 | 0.0976 | 10.3 | +| 1 (torch fp32) | 3522 | 0.0976 | 10.3 | | 1 (torch int8) | 1746 | 0.0484 | 20.7 | -| 32 (torch fp32) | 236 | 0.0066 | 152.7 | -| 32 (torch int8) | 114 | 0.0032 | 317.4 | -| 64 (torch fp32) | 235 | 0.0065 | 153.7 | -| 64 (torch int8) | 113 | 0.0031 | 319.2 | +| 32 (torch fp32) | 236 | 0.0066 | 152.7 | +| 32 (torch int8) | 114 | 0.0032 | 317.4 | +| 64 (torch fp32) | 235 | 0.0065 | 153.7 | +| 64 (torch int8) | 113 | 0.0031 | 319.2 | [//]: # (### Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz 32core-64processor without avx512_vnni) diff --git a/funasr/runtime/python/benchmark_onnx.md b/funasr/runtime/python/benchmark_onnx.md index 35c7068b5..fe938ee74 100644 --- a/funasr/runtime/python/benchmark_onnx.md +++ b/funasr/runtime/python/benchmark_onnx.md @@ -72,3 +72,20 @@ Aishell1 [test set](https://www.openslr.org/33/) , the total audio duration is 3 | 96 (onnx int8) | 108 | 0.0030 | 334.1 | ## [Paraformer](https://modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1/summary) + + ### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz 16core-32processor with avx512_vnni + +| concurrent-tasks | processing time(s) | RTF | Speedup Rate | +|:----------------:|:------------------:|:------:|:------------:| +| 16 (onnx fp32) | 91 | 0.0025 | 395.2 | +| 16 (onnx int8) | 78 | 0.0022 | 463.0 | +| 32 (onnx fp32) | 60 | 0.0017 | 598.8 | +| 32 (onnx int8) | 40 | 0.0011 | 892.9 | +| 64 (onnx fp32) | 55 | 0.0015 | 653.6 | +| 64 (onnx int8) | 31 | 0.0009 | 1162.8 | +| 96 (onnx fp32) | 57 | 0.0016 | 632.9 | +| 96 (onnx int8) | 33 | 0.0009 | 1098.9 | + +[//]: # (| 1 (onnx fp32) | 2806 | 0.0777 | 12.9 |) + +[//]: # (| 1 (onnx int8) | 1611 | 0.0446 | 22.4 |) \ No newline at end of file