From 060f57d03a55df657baa37e8a7cc100f98d3ce82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= <zhifu.gzf@alibaba-inc.com>
Date: Thu, 25 Jul 2024 09:49:22 +0800
Subject: [PATCH] bugfix python runtime

---
 funasr/datasets/audio_datasets/samplers.py    |   9 +-
 .../onnxruntime/funasr_onnx.egg-info/PKG-INFO | 205 ++++++++++++++++++
 .../funasr_onnx.egg-info/SOURCES.txt          |  20 ++
 .../funasr_onnx.egg-info/dependency_links.txt |   1 +
 .../funasr_onnx.egg-info/requires.txt         |   8 +
 .../funasr_onnx.egg-info/top_level.txt        |   1 +
 6 files changed, 243 insertions(+), 1 deletion(-)
 create mode 100644 runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO
 create mode 100644 runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt
 create mode 100644 runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt
 create mode 100644 runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt
 create mode 100644 runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt

diff --git a/funasr/datasets/audio_datasets/samplers.py b/funasr/datasets/audio_datasets/samplers.py
index f7057de11..4a1794e67 100644
--- a/funasr/datasets/audio_datasets/samplers.py
+++ b/funasr/datasets/audio_datasets/samplers.py
@@ -368,6 +368,7 @@ class CustomDistributedBufferDynamicBatchSampler(DistributedSampler):
         self.batch_size_sample_max = kwargs.get("batch_size_sample_max", 200)
         self.start_step = start_step
         self.batch_num = 1
+        self.batch_size_scale_threshold = kwargs.get("batch_size_scale_threshold", 40.0)
         if self.start_step > 0:
             logging.info(f"Warning, start_step > 0, dataloader start from step: {self.start_step}")
         # super().__init__(
@@ -399,7 +400,13 @@ class CustomDistributedBufferDynamicBatchSampler(DistributedSampler):
                     continue
                 sample_length = 1 if self.batch_type == "example" else original_sample_length
                 potential_batch_length = max(max_len_in_batch, sample_length) * (len(batch) + 1)
-                if potential_batch_length <= self.batch_size and count < self.batch_size_sample_max:
+                potential_max_len_in_batch = max(max_len_in_batch, sample_length)
+                batch_size = (
+                    self.batch_size * self.batch_size_scale_threshold / potential_max_len_in_batch
+                    if potential_max_len_in_batch > self.batch_size_scale_threshold
+                    else self.batch_size
+                )
+                if potential_batch_length <= batch_size and count < self.batch_size_sample_max:
                     batch.append(idx)
                     max_len_in_batch = max(max_len_in_batch, sample_length)
                     count += 1
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO b/runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO
new file mode 100644
index 000000000..88f475087
--- /dev/null
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO
@@ -0,0 +1,205 @@
+Metadata-Version: 2.1
+Name: funasr-onnx
+Version: 0.4.1
+Summary: FunASR: A Fundamental End-to-End Speech Recognition Toolkit
+Home-page: https://github.com/alibaba-damo-academy/FunASR.git
+Author: Speech Lab of DAMO Academy, Alibaba Group
+Author-email: funasr@list.alibaba-inc.com
+License: MIT
+Keywords: funasr,asr
+Platform: Any
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Description-Content-Type: text/markdown
+
+# ONNXRuntime-python
+
+## Install `funasr-onnx`
+
+install from pip
+
+```shell
+pip install -U funasr-onnx
+# For the users in China, you could install with the command:
+# pip install -U funasr-onnx -i https://mirror.sjtu.edu.cn/pypi/web/simple
+# If you want to export .onnx file, you should install modelscope and funasr
+pip install -U modelscope funasr
+# For the users in China, you could install with the command:
+# pip install -U modelscope funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple
+```
+
+or install from source code
+
+```shell
+git clone https://github.com/alibaba/FunASR.git && cd FunASR
+cd funasr/runtime/python/onnxruntime
+pip install -e ./
+# For the users in China, you could install with the command:
+# pip install -e ./ -i https://mirror.sjtu.edu.cn/pypi/web/simple
+```
+
+## Inference with runtime
+
+### Speech Recognition
+
+#### Paraformer
+
+ ```python
+from funasr_onnx import Paraformer
+from pathlib import Path
+
+model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
+model = Paraformer(model_dir, batch_size=1, quantize=True)
+
+wav_path = ['{}/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav'.format(Path.home())]
+
+result = model(wav_path)
+print(result)
+ ```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
+
+#### Paraformer-online
+
+### Voice Activity Detection
+
+#### FSMN-VAD
+
+```python
+from funasr_onnx import Fsmn_vad
+from pathlib import Path
+
+model_dir = "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
+wav_path = '{}/.cache/modelscope/hub/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav'.format(Path.home())
+
+model = Fsmn_vad(model_dir)
+
+result = model(wav_path)
+print(result)
+```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
+
+#### FSMN-VAD-online
+
+```python
+from funasr_onnx import Fsmn_vad_online
+import soundfile
+from pathlib import Path
+
+model_dir = "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
+wav_path = '{}/.cache/modelscope/hub/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav'.format(Path.home())
+
+model = Fsmn_vad_online(model_dir)
+
+
+##online vad
+speech, sample_rate = soundfile.read(wav_path)
+speech_length = speech.shape[0]
+#
+sample_offset = 0
+step = 1600
+param_dict = {'in_cache': []}
+for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)):
+    if sample_offset + step >= speech_length - 1:
+        step = speech_length - sample_offset
+        is_final = True
+    else:
+        is_final = False
+    param_dict['is_final'] = is_final
+    segments_result = model(audio_in=speech[sample_offset: sample_offset + step],
+                            param_dict=param_dict)
+    if segments_result:
+        print(segments_result)
+```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
+
+### Punctuation Restoration
+
+#### CT-Transformer
+
+```python
+from funasr_onnx import CT_Transformer
+
+model_dir = "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
+model = CT_Transformer(model_dir)
+
+text_in="跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益"
+result = model(text_in)
+print(result[0])
+```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: `str`, raw text of asr result
+
+Output: `List[str]`: recognition result
+
+#### CT-Transformer-online
+
+```python
+from funasr_onnx import CT_Transformer_VadRealtime
+
+model_dir = "damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727"
+model = CT_Transformer_VadRealtime(model_dir)
+
+text_in  = "跨境河流是养育沿岸|人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员|在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险|向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流>问题上的关切|愿意进一步完善双方联合工作机制|凡是|中方能做的我们|都会去做而且会做得更好我请印度朋友们放心中国在上游的|任何开发利用都会经过科学|规划和论证兼顾上下游的利益"
+
+vads = text_in.split("|")
+rec_result_all=""
+param_dict = {"cache": []}
+for vad in vads:
+    result = model(vad, param_dict=param_dict)
+    rec_result_all += result[0]
+
+print(rec_result_all)
+```
+
+- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: `str`, raw text of asr result
+
+Output: `List[str]`: recognition result
+
+## Performance benchmark
+
+Please ref to [benchmark](https://github.com/alibaba-damo-academy/FunASR/blob/main/runtime/docs/benchmark_onnx.md)
+
+## Acknowledge
+
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
+2. We partially refer [SWHL](https://github.com/RapidAI/RapidASR) for onnxruntime (only for paraformer model).
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt b/runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt
new file mode 100644
index 000000000..248471276
--- /dev/null
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt
@@ -0,0 +1,20 @@
+README.md
+setup.py
+funasr_onnx/__init__.py
+funasr_onnx/paraformer_bin.py
+funasr_onnx/paraformer_online_bin.py
+funasr_onnx/punc_bin.py
+funasr_onnx/sensevoice_bin.py
+funasr_onnx/vad_bin.py
+funasr_onnx.egg-info/PKG-INFO
+funasr_onnx.egg-info/SOURCES.txt
+funasr_onnx.egg-info/dependency_links.txt
+funasr_onnx.egg-info/requires.txt
+funasr_onnx.egg-info/top_level.txt
+funasr_onnx/utils/__init__.py
+funasr_onnx/utils/e2e_vad.py
+funasr_onnx/utils/frontend.py
+funasr_onnx/utils/postprocess_utils.py
+funasr_onnx/utils/sentencepiece_tokenizer.py
+funasr_onnx/utils/timestamp_utils.py
+funasr_onnx/utils/utils.py
\ No newline at end of file
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt b/runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt b/runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt
new file mode 100644
index 000000000..6af99dea0
--- /dev/null
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt
@@ -0,0 +1,8 @@
+librosa
+onnxruntime>=1.7.0
+scipy
+numpy<=1.26.4
+kaldi-native-fbank
+PyYAML>=5.1.2
+onnx
+sentencepiece
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt b/runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt
new file mode 100644
index 000000000..de41eb90e
--- /dev/null
+++ b/runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt
@@ -0,0 +1 @@
+funasr_onnx