Dev gzf (#1474)

* qwenaudio qwenaudiochat * qwenaudio qwenaudiochat * whisper * whisper * llm * llm * llm * llm * llm * llm * llm * llm * export onnx * export onnx * export onnx * dingding * dingding * llm * doc * onnx * onnx * onnx * onnx * onnx * onnx * v1.0.15 * qwenaudio
2025-09-15 14:48:36 +08:00 · 2024-03-11 22:32:26 +08:00 · 2024-03-11 22:32:26 +08:00 · 0d9384c8c0
commit 0d9384c8c0
parent 15c4709beb
6 changed files with 42 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -227,6 +227,21 @@ model = AutoModel(model="paraformer", device="cpu")
 res = model.export(quantize=False)
 ```

+### Text ONNX
+```python
+# pip3 install -U funasr-onnx
+from funasr_onnx import Paraformer
+model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
+model = Paraformer(model_dir, batch_size=1, quantize=True)
+
+wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']
+
+result = model(wav_path)
+print(result)
+```
+
+More examples ref to [demo](runtime/python/onnxruntime)
+
 ## Deployment Service
 FunASR supports deploying pre-trained or further fine-tuned models for service. Currently, it supports the following types of service deployment:
 - File transcription service, Mandarin, CPU version, done
--- a/README_zh.md
+++ b/README_zh.md
@ -216,7 +216,7 @@ print(res)
 funasr-export ++model=paraformer ++quantize=false
 ```

-### 从python指令导出
+### 从Python导出
 ```python
 from funasr import AutoModel

@ -225,6 +225,20 @@ model = AutoModel(model="paraformer")
 res = model.export(quantize=False)
 ```

+### 测试ONNX
+```python
+# pip3 install -U funasr-onnx
+from funasr_onnx import Paraformer
+model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
+model = Paraformer(model_dir, batch_size=1, quantize=True)
+
+wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']
+
+result = model(wav_path)
+print(result)
+```
+
+更多例子请参考 [样例](runtime/python/onnxruntime)

 <a name="服务部署"></a>
 ## 服务部署
--- a/funasr/download/download_from_hub.py
+++ b/funasr/download/download_from_hub.py
@ -48,9 +48,10 @@ def download_from_ms(**kwargs):
            if "file_path_metas" in conf_json:
                add_file_root_path(model_or_path, conf_json["file_path_metas"], cfg)
            cfg.update(kwargs)
-            config = OmegaConf.load(cfg["config"])
-            kwargs = OmegaConf.merge(config, cfg)
-        kwargs["model"] = config["model"]
+            if "config" in cfg:
+                config = OmegaConf.load(cfg["config"])
+                kwargs = OmegaConf.merge(config, cfg)
+                kwargs["model"] = config["model"]
    elif os.path.exists(os.path.join(model_or_path, "config.yaml")) and os.path.exists(os.path.join(model_or_path, "model.pt")):
        config = OmegaConf.load(os.path.join(model_or_path, "config.yaml"))
        kwargs = OmegaConf.merge(config, kwargs)
--- a/funasr/version.txt
+++ b/funasr/version.txt
@ -1 +1 @@
-1.0.14
+1.0.15
--- a/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
+++ b/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
@ -56,9 +56,9 @@ class CT_Transformer():
                      "\npip3 install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple"

            model = AutoModel(model=model_dir)
-            model_dir = model.export(type="onnx", quantize=quantize)
+            model_dir = model.export(quantize=quantize)
            
-        config_file = os.path.join(model_dir, 'punc.yaml')
+        config_file = os.path.join(model_dir, 'confi.yaml')
        config = read_yaml(config_file)
        token_list = os.path.join(model_dir, 'tokens.json')
        with open(token_list, 'r', encoding='utf-8') as f:
@ -67,7 +67,7 @@ class CT_Transformer():
        self.converter = TokenIDConverter(token_list)
        self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
        self.batch_size = 1
-        self.punc_list = config['punc_list']
+        self.punc_list = config["model_conf"]['punc_list']
        self.period = 0
        for i in range(len(self.punc_list)):
            if self.punc_list[i] == ",":
@ -76,9 +76,9 @@ class CT_Transformer():
                self.punc_list[i] = "？"
            elif self.punc_list[i] == "。":
                self.period = i
-        if "seg_jieba" in config:
+        self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
+        if os.path.exists(self.jieba_usr_dict_path):
            self.seg_jieba = True
-            self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
            self.code_mix_split_words_jieba = code_mix_split_words_jieba(self.jieba_usr_dict_path)
        else:
            self.seg_jieba = False
@ -175,7 +175,7 @@ class CT_Transformer_VadRealtime(CT_Transformer):
                 intra_op_num_threads: int = 4,
                 cache_dir: str = None
                 ):
-        super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
+        super().__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)

    def __call__(self, text: str, param_dict: map, split_size=20):
        cache_key = "cache"
--- a/runtime/python/onnxruntime/setup.py
+++ b/runtime/python/onnxruntime/setup.py
@ -13,7 +13,7 @@ def get_readme():


 MODULE_NAME = 'funasr_onnx'
-VERSION_NUM = '0.2.5'
+VERSION_NUM = '0.3.0'

 setuptools.setup(
    name=MODULE_NAME,
 @ -1 +1 @@
 .0.14
 .0.15