diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO b/runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO
deleted file mode 100644
index 88f475087..000000000
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/PKG-INFO
+++ /dev/null
@@ -1,205 +0,0 @@
-Metadata-Version: 2.1
-Name: funasr-onnx
-Version: 0.4.1
-Summary: FunASR: A Fundamental End-to-End Speech Recognition Toolkit
-Home-page: https://github.com/alibaba-damo-academy/FunASR.git
-Author: Speech Lab of DAMO Academy, Alibaba Group
-Author-email: funasr@list.alibaba-inc.com
-License: MIT
-Keywords: funasr,asr
-Platform: Any
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Description-Content-Type: text/markdown
-
-# ONNXRuntime-python
-
-## Install `funasr-onnx`
-
-install from pip
-
-```shell
-pip install -U funasr-onnx
-# For the users in China, you could install with the command:
-# pip install -U funasr-onnx -i https://mirror.sjtu.edu.cn/pypi/web/simple
-# If you want to export .onnx file, you should install modelscope and funasr
-pip install -U modelscope funasr
-# For the users in China, you could install with the command:
-# pip install -U modelscope funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple
-```
-
-or install from source code
-
-```shell
-git clone https://github.com/alibaba/FunASR.git && cd FunASR
-cd funasr/runtime/python/onnxruntime
-pip install -e ./
-# For the users in China, you could install with the command:
-# pip install -e ./ -i https://mirror.sjtu.edu.cn/pypi/web/simple
-```
-
-## Inference with runtime
-
-### Speech Recognition
-
-#### Paraformer
-
- ```python
-from funasr_onnx import Paraformer
-from pathlib import Path
-
-model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
-model = Paraformer(model_dir, batch_size=1, quantize=True)
-
-wav_path = ['{}/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav'.format(Path.home())]
-
-result = model(wav_path)
-print(result)
- ```
-
-- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
-- `batch_size`: `1` (Default), the batch size duration inference
-- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
-- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
-- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
-
-Input: wav formt file, support formats: `str, np.ndarray, List[str]`
-
-Output: `List[str]`: recognition result
-
-#### Paraformer-online
-
-### Voice Activity Detection
-
-#### FSMN-VAD
-
-```python
-from funasr_onnx import Fsmn_vad
-from pathlib import Path
-
-model_dir = "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
-wav_path = '{}/.cache/modelscope/hub/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav'.format(Path.home())
-
-model = Fsmn_vad(model_dir)
-
-result = model(wav_path)
-print(result)
-```
-
-- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
-- `batch_size`: `1` (Default), the batch size duration inference
-- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
-- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
-- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
-
-Input: wav formt file, support formats: `str, np.ndarray, List[str]`
-
-Output: `List[str]`: recognition result
-
-#### FSMN-VAD-online
-
-```python
-from funasr_onnx import Fsmn_vad_online
-import soundfile
-from pathlib import Path
-
-model_dir = "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
-wav_path = '{}/.cache/modelscope/hub/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav'.format(Path.home())
-
-model = Fsmn_vad_online(model_dir)
-
-
-##online vad
-speech, sample_rate = soundfile.read(wav_path)
-speech_length = speech.shape[0]
-#
-sample_offset = 0
-step = 1600
-param_dict = {'in_cache': []}
-for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)):
-    if sample_offset + step >= speech_length - 1:
-        step = speech_length - sample_offset
-        is_final = True
-    else:
-        is_final = False
-    param_dict['is_final'] = is_final
-    segments_result = model(audio_in=speech[sample_offset: sample_offset + step],
-                            param_dict=param_dict)
-    if segments_result:
-        print(segments_result)
-```
-
-- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
-- `batch_size`: `1` (Default), the batch size duration inference
-- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
-- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
-- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
-
-Input: wav formt file, support formats: `str, np.ndarray, List[str]`
-
-Output: `List[str]`: recognition result
-
-### Punctuation Restoration
-
-#### CT-Transformer
-
-```python
-from funasr_onnx import CT_Transformer
-
-model_dir = "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
-model = CT_Transformer(model_dir)
-
-text_in="跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益"
-result = model(text_in)
-print(result[0])
-```
-
-- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
-- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
-- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
-- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
-
-Input: `str`, raw text of asr result
-
-Output: `List[str]`: recognition result
-
-#### CT-Transformer-online
-
-```python
-from funasr_onnx import CT_Transformer_VadRealtime
-
-model_dir = "damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727"
-model = CT_Transformer_VadRealtime(model_dir)
-
-text_in  = "跨境河流是养育沿岸|人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员|在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险|向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流>问题上的关切|愿意进一步完善双方联合工作机制|凡是|中方能做的我们|都会去做而且会做得更好我请印度朋友们放心中国在上游的|任何开发利用都会经过科学|规划和论证兼顾上下游的利益"
-
-vads = text_in.split("|")
-rec_result_all=""
-param_dict = {"cache": []}
-for vad in vads:
-    result = model(vad, param_dict=param_dict)
-    rec_result_all += result[0]
-
-print(rec_result_all)
-```
-
-- `model_dir`: model_name in modelscope or local path downloaded from modelscope. If the local path is set, it should contain `model.onnx`, `config.yaml`, `am.mvn`
-- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
-- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
-- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
-
-Input: `str`, raw text of asr result
-
-Output: `List[str]`: recognition result
-
-## Performance benchmark
-
-Please ref to [benchmark](https://github.com/alibaba-damo-academy/FunASR/blob/main/runtime/docs/benchmark_onnx.md)
-
-## Acknowledge
-
-1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
-2. We partially refer [SWHL](https://github.com/RapidAI/RapidASR) for onnxruntime (only for paraformer model).
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt b/runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt
deleted file mode 100644
index 248471276..000000000
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-README.md
-setup.py
-funasr_onnx/__init__.py
-funasr_onnx/paraformer_bin.py
-funasr_onnx/paraformer_online_bin.py
-funasr_onnx/punc_bin.py
-funasr_onnx/sensevoice_bin.py
-funasr_onnx/vad_bin.py
-funasr_onnx.egg-info/PKG-INFO
-funasr_onnx.egg-info/SOURCES.txt
-funasr_onnx.egg-info/dependency_links.txt
-funasr_onnx.egg-info/requires.txt
-funasr_onnx.egg-info/top_level.txt
-funasr_onnx/utils/__init__.py
-funasr_onnx/utils/e2e_vad.py
-funasr_onnx/utils/frontend.py
-funasr_onnx/utils/postprocess_utils.py
-funasr_onnx/utils/sentencepiece_tokenizer.py
-funasr_onnx/utils/timestamp_utils.py
-funasr_onnx/utils/utils.py
\ No newline at end of file
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt b/runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt
deleted file mode 100644
index 8b1378917..000000000
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt b/runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt
deleted file mode 100644
index 6af99dea0..000000000
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/requires.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-librosa
-onnxruntime>=1.7.0
-scipy
-numpy<=1.26.4
-kaldi-native-fbank
-PyYAML>=5.1.2
-onnx
-sentencepiece
diff --git a/runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt b/runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt
deleted file mode 100644
index de41eb90e..000000000
--- a/runtime/python/onnxruntime/funasr_onnx.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-funasr_onnx