bugfix python runtime

2025-09-15 14:48:36 +08:00 · 2024-07-25 09:49:22 +08:00 · 2024-07-25 09:49:22 +08:00 · 060f57d03a
commit 060f57d03a
parent 5851fc53cd
6 changed files with 243 additions and 1 deletions
--- a/funasr/datasets/audio_datasets/samplers.py
+++ b/funasr/datasets/audio_datasets/samplers.py
@ -368,6 +368,7 @@ class CustomDistributedBufferDynamicBatchSampler(DistributedSampler):
        self.batch_size_sample_max = kwargs.get("batch_size_sample_max", 200)
        self.start_step = start_step
        self.batch_num = 1
+        self.batch_size_scale_threshold = kwargs.get("batch_size_scale_threshold", 40.0)
        if self.start_step > 0:
            logging.info(f"Warning, start_step > 0, dataloader start from step: {self.start_step}")
        # super().__init__(
@ -399,7 +400,13 @@ class CustomDistributedBufferDynamicBatchSampler(DistributedSampler):
                    continue
                sample_length = 1 if self.batch_type == "example" else original_sample_length
                potential_batch_length = max(max_len_in_batch, sample_length) * (len(batch) + 1)
-                if potential_batch_length <= self.batch_size and count < self.batch_size_sample_max:
+                potential_max_len_in_batch = max(max_len_in_batch, sample_length)
+                batch_size = (
+                    self.batch_size * self.batch_size_scale_threshold / potential_max_len_in_batch
+                    if potential_max_len_in_batch > self.batch_size_scale_threshold
+                    else self.batch_size
+                )
+                if potential_batch_length <= batch_size and count < self.batch_size_sample_max:
                    batch.append(idx)
                    max_len_in_batch = max(max_len_in_batch, sample_length)
                    count += 1
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO
@ -0,0 +1,205 @@
+Metadata-Version: 2.1
+Name: funasr-onnx
+Version: 0.4.1
+Summary: FunASR: A Fundamental End-to-End Speech Recognition Toolkit
+Home-page: https://github.com/alibaba-damo-academy/FunASR.git
+Author: Speech Lab of DAMO Academy, Alibaba Group
+Author-email: funasr@list.alibaba-inc.com
+License: MIT
+Keywords: funasr,asr
+Platform: Any
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Description-Content-Type: text/markdown
+
+# ONNXRuntime-python
+
+## Install `funasr-onnx`
+
+install from pip
+
+```shell
+pip install -U funasr-onnx
+# For the users in China, you could install with the command:
+# pip install -U funasr-onnx -i https://mirror.sjtu.edu.cn/pypi/web/simple
+# If you want to export .onnx file, you should install modelscope and funasr
+pip install -U modelscope funasr
+# For the users in China, you could install with the command:
+# pip install -U modelscope funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple
+```
+
+or install from source code
+
+```shell
+git clone https://github.com/alibaba/FunASR.git && cd FunASR
+cd funasr/runtime/python/onnxruntime
+pip install -e ./
+# For the users in China, you could install with the command:
+# pip install -e ./ -i https://mirror.sjtu.edu.cn/pypi/web/simple
+```
+
+## Inference with runtime
+
+### Speech Recognition
+
+#### Paraformer
+
+ ```python
+from funasr_onnx import Paraformer
+from pathlib import Path
+
+model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
+model = Paraformer(model_dir, batch_size=1, quantize=True)
+
+wav_path = ['{}/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav'.format(Path.home())]
+
+result = model(wav_path)
+print(result)
+ ```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
+
+#### Paraformer-online
+
+### Voice Activity Detection
+
+#### FSMN-VAD
+
+```python
+from funasr_onnx import Fsmn_vad
+from pathlib import Path
+
+model_dir = "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
+wav_path = '{}/.cache/modelscope/hub/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav'.format(Path.home())
+
+model = Fsmn_vad(model_dir)
+
+result = model(wav_path)
+print(result)
+```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
+
+#### FSMN-VAD-online
+
+```python
+from funasr_onnx import Fsmn_vad_online
+import soundfile
+from pathlib import Path
+
+model_dir = "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
+wav_path = '{}/.cache/modelscope/hub/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav'.format(Path.home())
+
+model = Fsmn_vad_online(model_dir)
+
+
+##online vad
+speech, sample_rate = soundfile.read(wav_path)
+speech_length = speech.shape[0]
+#
+sample_offset = 0
+step = 1600
+param_dict = {'in_cache': []}
+for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)):
+    if sample_offset + step >= speech_length - 1:
+        step = speech_length - sample_offset
+        is_final = True
+    else:
+        is_final = False
+    param_dict['is_final'] = is_final
+    segments_result = model(audio_in=speech[sample_offset: sample_offset + step],
+                            param_dict=param_dict)
+    if segments_result:
+        print(segments_result)
+```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
+
+### Punctuation Restoration
+
+#### CT-Transformer
+
+```python
+from funasr_onnx import CT_Transformer
+
+model_dir = "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
+model = CT_Transformer(model_dir)
+
+text_in="跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益"
+result = model(text_in)
+print(result[0])
+```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: `str`, raw text of asr result
+
+Output: `List[str]`: recognition result
+
+#### CT-Transformer-online
+
+```python
+from funasr_onnx import CT_Transformer_VadRealtime
+
+model_dir = "damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727"
+model = CT_Transformer_VadRealtime(model_dir)
+
+text_in  = "跨境河流是养育沿岸|人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员|在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险|向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流>问题上的关切|愿意进一步完善双方联合工作机制|凡是|中方能做的我们|都会去做而且会做得更好我请印度朋友们放心中国在上游的|任何开发利用都会经过科学|规划和论证兼顾上下游的利益"
+
+vads = text_in.split("|")
+rec_result_all=""
+param_dict = {"cache": []}
+for vad in vads:
+    result = model(vad, param_dict=param_dict)
+    rec_result_all += result[0]
+
+print(rec_result_all)
+```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: `str`, raw text of asr result
+
+Output: `List[str]`: recognition result
+
+## Performance benchmark
+
+Please ref to [benchmark](https://github.com/alibaba-damo-academy/FunASR/blob/main/runtime/docs/benchmark_onnx.md)
+
+## Acknowledge
+
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
+2. We partially refer [SWHL](https://github.com/RapidAI/RapidASR) for onnxruntime (only for paraformer model).
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt
@ -0,0 +1,20 @@
+README.md
+setup.py
+funasr_onnx/__init__.py
+funasr_onnx/paraformer_bin.py
+funasr_onnx/paraformer_online_bin.py
+funasr_onnx/punc_bin.py
+funasr_onnx/sensevoice_bin.py
+funasr_onnx/vad_bin.py
+funasr_onnx.egg-info/PKG-INFO
+funasr_onnx.egg-info/SOURCES.txt
+funasr_onnx.egg-info/dependency_links.txt
+funasr_onnx.egg-info/requires.txt
+funasr_onnx.egg-info/top_level.txt
+funasr_onnx/utils/__init__.py
+funasr_onnx/utils/e2e_vad.py
+funasr_onnx/utils/frontend.py
+funasr_onnx/utils/postprocess_utils.py
+funasr_onnx/utils/sentencepiece_tokenizer.py
+funasr_onnx/utils/timestamp_utils.py
+funasr_onnx/utils/utils.py
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt
@ -0,0 +1 @@
+
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt
@ -0,0 +1,8 @@
+librosa
+onnxruntime>=1.7.0
+scipy
+numpy<=1.26.4
+kaldi-native-fbank
+PyYAML>=5.1.2
+onnx
+sentencepiece
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt
@ -0,0 +1 @@
+funasr_onnx