From 42b0da851d7e0ebdd71485eb1bfb07971325d42a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Fri, 7 Apr 2023 22:21:43 +0800 Subject: [PATCH] vad --- funasr/runtime/python/utils/test_rtf_gpu.py | 58 +++++++++++++++++++++ funasr/tasks/vad.py | 2 +- 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 funasr/runtime/python/utils/test_rtf_gpu.py diff --git a/funasr/runtime/python/utils/test_rtf_gpu.py b/funasr/runtime/python/utils/test_rtf_gpu.py new file mode 100644 index 000000000..84cd2c788 --- /dev/null +++ b/funasr/runtime/python/utils/test_rtf_gpu.py @@ -0,0 +1,58 @@ + +import time +import sys +import librosa +from funasr.utils.types import str2bool + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument('--model_dir', type=str, required=True) +parser.add_argument('--backend', type=str, default='onnx', help='["onnx", "torch"]') +parser.add_argument('--wav_file', type=str, default=None, help='amp fallback number') +parser.add_argument('--quantize', type=str2bool, default=False, help='quantized model') +parser.add_argument('--intra_op_num_threads', type=int, default=1, help='intra_op_num_threads for onnx') +parser.add_argument('--batch_size', type=int, default=1, help='batch_size for onnx') +args = parser.parse_args() + + +from funasr.runtime.python.libtorch.funasr_torch import Paraformer +if args.backend == "onnx": + from funasr.runtime.python.onnxruntime.funasr_onnx import Paraformer + +model = Paraformer(args.model_dir, batch_size=args.batch_size, quantize=args.quantize, intra_op_num_threads=args.intra_op_num_threads) + +wav_file_f = open(args.wav_file, 'r') +wav_files = wav_file_f.readlines() + +# warm-up +total = 0.0 +num = 30 +wav_path = wav_files[0].split("\t")[1].strip() if "\t" in wav_files[0] else wav_files[0].split(" ")[1].strip() +for i in range(num): + beg_time = time.time() + result = model(wav_path) + end_time = time.time() + duration = end_time-beg_time + total += duration + print(result) + print("num: {}, time, {}, avg: {}, rtf: {}".format(len(wav_path), duration, total/(i+1), (total/(i+1))/5.53)) + +# infer time +wav_path = [] +beg_time = time.time() +for i, wav_path_i in enumerate(wav_files): + wav_path_i = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip() + wav_path += [wav_path_i] +result = model(wav_path) +end_time = time.time() +duration = (end_time-beg_time)*1000 +print("total_time_comput_ms: {}".format(int(duration))) + +duration_time = 0.0 +for i, wav_path_i in enumerate(wav_files): + wav_path = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip() + waveform, _ = librosa.load(wav_path, sr=16000) + duration_time += len(waveform)/16.0 +print("total_time_wav_ms: {}".format(int(duration_time))) + +print("total_rtf: {:.5}".format(duration/duration_time)) \ No newline at end of file diff --git a/funasr/tasks/vad.py b/funasr/tasks/vad.py index 1ac77d39a..1c1d4e0ed 100644 --- a/funasr/tasks/vad.py +++ b/funasr/tasks/vad.py @@ -319,8 +319,8 @@ class VADTask(AbsTask): cls, config_file: Union[Path, str] = None, model_file: Union[Path, str] = None, - cmvn_file: Union[Path, str] = None, device: str = "cpu", + cmvn_file: Union[Path, str] = None, ): """Build model from the files.