Merge branch 'dev_wjm' of https://github.com/alibaba/FunASR into dev_wjm

2025-09-15 14:48:36 +08:00 · 2023-03-29 15:49:46 +08:00 · 2023-03-29 15:49:46 +08:00 · 8f26a9acc2
commit 8f26a9acc2
parent 63cc47929c d3c6967ca8
637 changed files with 186715 additions and 17610 deletions
--- a/egs/aishell/conformer/run.sh
+++ b/egs/aishell/conformer/run.sh
@ -217,7 +217,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
        if [ -n "${inference_config}" ]; then
            _opts+="--config ${inference_config} "
        fi
-        ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1: "${_nj}" "${_logdir}"/asr_inference.JOB.log \
+        ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
            python -m funasr.bin.asr_inference_launch \
                --batch_size 1 \
                --ngpu "${_ngpu}" \
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/README.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/README.md
@ -1,30 +0,0 @@
-# ModelScope Model
-
-## How to finetune and infer using a pretrained Paraformer-large Model
-
-### Finetune
-
- Modify finetune training related parameters in `finetune.py`
-    - <strong>output_dir:</strong> # result dir
-    - <strong>data_dir:</strong> # the dataset dir needs to include files: train/wav.scp, train/text; validation/wav.scp, validation/text.
-    - <strong>batch_bins:</strong> # batch size
-    - <strong>max_epoch:</strong> # number of training epoch
-    - <strong>lr:</strong> # learning rate
-
- Then you can run the pipeline to finetune with:
-```python
-    python finetune.py
-```
-
-### Inference
-
-Or you can use the finetuned model for inference directly.
-
- Setting parameters in `infer.py`
-    - <strong>data_dir:</strong> # the dataset dir
-    - <strong>output_dir:</strong> # result dir
-
- Then you can run the pipeline to infer with:
-```python
-    python infer.py
-```
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/RESULTS.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/RESULTS.md
@ -1,23 +0,0 @@
-# Paraformer-Large
- Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/summary>
- Model size: 220M
-
-# Environments
- date: `Fri Feb 10 13:34:24 CST 2023`
- python version: `3.7.12`
- FunASR version: `0.1.6`
- pytorch version: `pytorch 1.7.0`
- Git hash: ``
- Commit date: ``
-
-# Beachmark Results
-
-## AISHELL-1
- Decode config:
-  - Decode without CTC
-  - Decode without LM
-
-| testset CER(%) | base model|finetune model |
-|:--------------:|:---------:|:-------------:|
-| dev            | 1.75      |1.62           |
-| test           | 1.95      |1.78           |
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/finetune.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/finetune.py
@ -1,36 +0,0 @@
-import os
-
-from modelscope.metainfo import Trainers
-from modelscope.trainers import build_trainer
-
-from funasr.datasets.ms_dataset import MsDataset
-from funasr.utils.modelscope_param import modelscope_args
-
-
-def modelscope_finetune(params):
-    if not os.path.exists(params.output_dir):
-        os.makedirs(params.output_dir, exist_ok=True)
-    # dataset split ["train", "validation"]
-    ds_dict = MsDataset.load(params.data_path)
-    kwargs = dict(
-        model=params.model,
-        data_dir=ds_dict,
-        dataset_type=params.dataset_type,
-        work_dir=params.output_dir,
-        batch_bins=params.batch_bins,
-        max_epoch=params.max_epoch,
-        lr=params.lr)
-    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
-    trainer.train()
-
-
-if __name__ == '__main__':
-    params = modelscope_args(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch", data_path="./data")
-    params.output_dir = "./checkpoint"              # m模型保存路径
-    params.data_path = "./example_data/"            # 数据路径
-    params.dataset_type = "small"                   # 小数据量设置small，若数据量大于1000小时，请使用large
-    params.batch_bins = 2000                       # batch size，如果dataset_type="small"，batch_bins单位为fbank特征帧数，如果dataset_type="large"，batch_bins单位为毫秒，
-    params.max_epoch = 50                           # 最大训练轮数
-    params.lr = 0.00005                             # 设置学习率
-    
-    modelscope_finetune(params)
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/infer.py
@ -1,101 +0,0 @@
-import os
-import shutil
-from multiprocessing import Pool
-
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-
-from funasr.utils.compute_wer import compute_wer
-
-
-def modelscope_infer_core(output_dir, split_dir, njob, idx, batch_size, ngpu, model):
-    output_dir_job = os.path.join(output_dir, "output.{}".format(idx))
-    if ngpu > 0:
-        use_gpu = 1
-        gpu_id = int(idx) - 1
-    else:
-        use_gpu = 0
-        gpu_id = -1
-    if "CUDA_VISIBLE_DEVICES" in os.environ.keys():
-        gpu_list = os.environ['CUDA_VISIBLE_DEVICES'].split(",")
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_list[gpu_id])
-    else:
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
-    inference_pipline = pipeline(
-        task=Tasks.auto_speech_recognition,
-        model=model,
-        output_dir=output_dir_job,
-        batch_size=batch_size,
-        ngpu=use_gpu,
-    )
-    audio_in = os.path.join(split_dir, "wav.{}.scp".format(idx))
-    inference_pipline(audio_in=audio_in)
-
-
-def modelscope_infer(params):
-    # prepare for multi-GPU decoding
-    ngpu = params["ngpu"]
-    njob = params["njob"]
-    batch_size = params["batch_size"]
-    output_dir = params["output_dir"]
-    model = params["model"]
-    if os.path.exists(output_dir):
-        shutil.rmtree(output_dir)
-    os.mkdir(output_dir)
-    split_dir = os.path.join(output_dir, "split")
-    os.mkdir(split_dir)
-    if ngpu > 0:
-        nj = ngpu
-    elif ngpu == 0:
-        nj = njob
-    wav_scp_file = os.path.join(params["data_dir"], "wav.scp")
-    with open(wav_scp_file) as f:
-        lines = f.readlines()
-        num_lines = len(lines)
-        num_job_lines = num_lines // nj
-    start = 0
-    for i in range(nj):
-        end = start + num_job_lines
-        file = os.path.join(split_dir, "wav.{}.scp".format(str(i + 1)))
-        with open(file, "w") as f:
-            if i == nj - 1:
-                f.writelines(lines[start:])
-            else:
-                f.writelines(lines[start:end])
-        start = end
-
-    p = Pool(nj)
-    for i in range(nj):
-        p.apply_async(modelscope_infer_core,
-                      args=(output_dir, split_dir, njob, str(i + 1), batch_size, ngpu, model))
-    p.close()
-    p.join()
-
-    # combine decoding results
-    best_recog_path = os.path.join(output_dir, "1best_recog")
-    os.mkdir(best_recog_path)
-    files = ["text", "token", "score"]
-    for file in files:
-        with open(os.path.join(best_recog_path, file), "w") as f:
-            for i in range(nj):
-                job_file = os.path.join(output_dir, "output.{}/1best_recog".format(str(i + 1)), file)
-                with open(job_file) as f_job:
-                    lines = f_job.readlines()
-                f.writelines(lines)
-
-    # If text exists, compute CER
-    text_in = os.path.join(params["data_dir"], "text")
-    if os.path.exists(text_in):
-        text_proc_file = os.path.join(best_recog_path, "token")
-        compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))
-
-
-if __name__ == "__main__":
-    params = {}
-    params["model"] = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch"
-    params["data_dir"] = "./data/test"
-    params["output_dir"] = "./results"
-    params["ngpu"] = 1 # if ngpu > 0, will use gpu decoding
-    params["njob"] = 1 # if ngpu = 0, will use cpu decoding
-    params["batch_size"] = 64
-    modelscope_infer(params)
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/infer_after_finetune.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/infer_after_finetune.py
@ -1,48 +0,0 @@
-import json
-import os
-import shutil
-
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-from modelscope.hub.snapshot_download import snapshot_download
-
-from funasr.utils.compute_wer import compute_wer
-
-def modelscope_infer_after_finetune(params):
-    # prepare for decoding
-
-    try:
-        pretrained_model_path = snapshot_download(params["modelscope_model_name"], cache_dir=params["output_dir"])
-    except BaseException:
-        raise BaseException(f"Please download pretrain model from ModelScope firstly.")
-    shutil.copy(os.path.join(params["output_dir"], params["decoding_model_name"]), os.path.join(pretrained_model_path, "model.pb"))
-    decoding_path = os.path.join(params["output_dir"], "decode_results")
-    if os.path.exists(decoding_path):
-        shutil.rmtree(decoding_path)
-    os.mkdir(decoding_path)
-
-    # decoding
-    inference_pipeline = pipeline(
-        task=Tasks.auto_speech_recognition,
-        model=pretrained_model_path,
-        output_dir=decoding_path,
-        batch_size=params["batch_size"]
-    )
-    audio_in = os.path.join(params["data_dir"], "wav.scp")
-    inference_pipeline(audio_in=audio_in)
-
-    # computer CER if GT text is set
-    text_in = os.path.join(params["data_dir"], "text")
-    if os.path.exists(text_in):
-        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
-        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))
-
-
-if __name__ == '__main__':
-    params = {}
-    params["modelscope_model_name"] = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch"
-    params["output_dir"] = "./checkpoint"
-    params["data_dir"] = "./data/test"
-    params["decoding_model_name"] = "valid.acc.ave_10best.pb"
-    params["batch_size"] = 64
-    modelscope_infer_after_finetune(params)
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/README.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/README.md
@ -1,30 +0,0 @@
-# ModelScope Model
-
-## How to finetune and infer using a pretrained Paraformer-large Model
-
-### Finetune
-
- Modify finetune training related parameters in `finetune.py`
-    - <strong>output_dir:</strong> # result dir
-    - <strong>data_dir:</strong> # the dataset dir needs to include files: train/wav.scp, train/text; validation/wav.scp, validation/text.
-    - <strong>batch_bins:</strong> # batch size
-    - <strong>max_epoch:</strong> # number of training epoch
-    - <strong>lr:</strong> # learning rate
-
- Then you can run the pipeline to finetune with:
-```python
-    python finetune.py
-```
-
-### Inference
-
-Or you can use the finetuned model for inference directly.
-
- Setting parameters in `infer.py`
-    - <strong>data_dir:</strong> # the dataset dir
-    - <strong>output_dir:</strong> # result dir
-
- Then you can run the pipeline to infer with:
-```python
-    python infer.py
-```
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/RESULTS.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/RESULTS.md
@ -1,25 +0,0 @@
-# Paraformer-Large
- Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/summary>
- Model size: 220M
-
-# Environments
- date: `Fri Feb 10 13:34:24 CST 2023`
- python version: `3.7.12`
- FunASR version: `0.1.6`
- pytorch version: `pytorch 1.7.0`
- Git hash: ``
- Commit date: ``
-
-# Beachmark Results
-
-## AISHELL-2
- Decode config: 
-  - Decode without CTC
-  - Decode without LM
-
-| testset      | base model|finetune model|
-|:------------:|:---------:|:------------:|
-| dev_ios      | 2.80      |2.60          |
-| test_android | 3.13      |2.84          |
-| test_ios     | 2.85      |2.82          |
-| test_mic     | 3.06      |2.88          |
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/finetune.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/finetune.py
@ -1,36 +0,0 @@
-import os
-
-from modelscope.metainfo import Trainers
-from modelscope.trainers import build_trainer
-
-from funasr.datasets.ms_dataset import MsDataset
-from funasr.utils.modelscope_param import modelscope_args
-
-
-def modelscope_finetune(params):
-    if not os.path.exists(params.output_dir):
-        os.makedirs(params.output_dir, exist_ok=True)
-    # dataset split ["train", "validation"]
-    ds_dict = MsDataset.load(params.data_path)
-    kwargs = dict(
-        model=params.model,
-        data_dir=ds_dict,
-        dataset_type=params.dataset_type,
-        work_dir=params.output_dir,
-        batch_bins=params.batch_bins,
-        max_epoch=params.max_epoch,
-        lr=params.lr)
-    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
-    trainer.train()
-
-
-if __name__ == '__main__':
-    params = modelscope_args(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch", data_path="./data")
-    params.output_dir = "./checkpoint"              # m模型保存路径
-    params.data_path = "./example_data/"            # 数据路径
-    params.dataset_type = "small"                   # 小数据量设置small，若数据量大于1000小时，请使用large
-    params.batch_bins = 2000                       # batch size，如果dataset_type="small"，batch_bins单位为fbank特征帧数，如果dataset_type="large"，batch_bins单位为毫秒，
-    params.max_epoch = 50                           # 最大训练轮数
-    params.lr = 0.00005                             # 设置学习率
-    
-    modelscope_finetune(params)
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/infer.py
@ -1,101 +0,0 @@
-import os
-import shutil
-from multiprocessing import Pool
-
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-
-from funasr.utils.compute_wer import compute_wer
-
-
-def modelscope_infer_core(output_dir, split_dir, njob, idx, batch_size, ngpu, model):
-    output_dir_job = os.path.join(output_dir, "output.{}".format(idx))
-    if ngpu > 0:
-        use_gpu = 1
-        gpu_id = int(idx) - 1
-    else:
-        use_gpu = 0
-        gpu_id = -1
-    if "CUDA_VISIBLE_DEVICES" in os.environ.keys():
-        gpu_list = os.environ['CUDA_VISIBLE_DEVICES'].split(",")
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_list[gpu_id])
-    else:
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
-    inference_pipline = pipeline(
-        task=Tasks.auto_speech_recognition,
-        model=model,
-        output_dir=output_dir_job,
-        batch_size=batch_size,
-        ngpu=use_gpu,
-    )
-    audio_in = os.path.join(split_dir, "wav.{}.scp".format(idx))
-    inference_pipline(audio_in=audio_in)
-
-
-def modelscope_infer(params):
-    # prepare for multi-GPU decoding
-    ngpu = params["ngpu"]
-    njob = params["njob"]
-    batch_size = params["batch_size"]
-    output_dir = params["output_dir"]
-    model = params["model"]
-    if os.path.exists(output_dir):
-        shutil.rmtree(output_dir)
-    os.mkdir(output_dir)
-    split_dir = os.path.join(output_dir, "split")
-    os.mkdir(split_dir)
-    if ngpu > 0:
-        nj = ngpu
-    elif ngpu == 0:
-        nj = njob
-    wav_scp_file = os.path.join(params["data_dir"], "wav.scp")
-    with open(wav_scp_file) as f:
-        lines = f.readlines()
-        num_lines = len(lines)
-        num_job_lines = num_lines // nj
-    start = 0
-    for i in range(nj):
-        end = start + num_job_lines
-        file = os.path.join(split_dir, "wav.{}.scp".format(str(i + 1)))
-        with open(file, "w") as f:
-            if i == nj - 1:
-                f.writelines(lines[start:])
-            else:
-                f.writelines(lines[start:end])
-        start = end
-
-    p = Pool(nj)
-    for i in range(nj):
-        p.apply_async(modelscope_infer_core,
-                      args=(output_dir, split_dir, njob, str(i + 1), batch_size, ngpu, model))
-    p.close()
-    p.join()
-
-    # combine decoding results
-    best_recog_path = os.path.join(output_dir, "1best_recog")
-    os.mkdir(best_recog_path)
-    files = ["text", "token", "score"]
-    for file in files:
-        with open(os.path.join(best_recog_path, file), "w") as f:
-            for i in range(nj):
-                job_file = os.path.join(output_dir, "output.{}/1best_recog".format(str(i + 1)), file)
-                with open(job_file) as f_job:
-                    lines = f_job.readlines()
-                f.writelines(lines)
-
-    # If text exists, compute CER
-    text_in = os.path.join(params["data_dir"], "text")
-    if os.path.exists(text_in):
-        text_proc_file = os.path.join(best_recog_path, "token")
-        compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))
-
-
-if __name__ == "__main__":
-    params = {}
-    params["model"] = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch"
-    params["data_dir"] = "./data/test"
-    params["output_dir"] = "./results"
-    params["ngpu"] = 1 # if ngpu > 0, will use gpu decoding
-    params["njob"] = 1 # if ngpu = 0, will use cpu decoding
-    params["batch_size"] = 64
-    modelscope_infer(params)
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/infer_after_finetune.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/infer_after_finetune.py
@ -1,48 +0,0 @@
-import json
-import os
-import shutil
-
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-from modelscope.hub.snapshot_download import snapshot_download
-
-from funasr.utils.compute_wer import compute_wer
-
-def modelscope_infer_after_finetune(params):
-    # prepare for decoding
-
-    try:
-        pretrained_model_path = snapshot_download(params["modelscope_model_name"], cache_dir=params["output_dir"])
-    except BaseException:
-        raise BaseException(f"Please download pretrain model from ModelScope firstly.")
-    shutil.copy(os.path.join(params["output_dir"], params["decoding_model_name"]), os.path.join(pretrained_model_path, "model.pb"))
-    decoding_path = os.path.join(params["output_dir"], "decode_results")
-    if os.path.exists(decoding_path):
-        shutil.rmtree(decoding_path)
-    os.mkdir(decoding_path)
-
-    # decoding
-    inference_pipeline = pipeline(
-        task=Tasks.auto_speech_recognition,
-        model=pretrained_model_path,
-        output_dir=decoding_path,
-        batch_size=params["batch_size"]
-    )
-    audio_in = os.path.join(params["data_dir"], "wav.scp")
-    inference_pipeline(audio_in=audio_in)
-
-    # computer CER if GT text is set
-    text_in = os.path.join(params["data_dir"], "text")
-    if os.path.exists(text_in):
-        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
-        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))
-
-
-if __name__ == '__main__':
-    params = {}
-    params["modelscope_model_name"] = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch"
-    params["output_dir"] = "./checkpoint"
-    params["data_dir"] = "./data/test"
-    params["decoding_model_name"] = "valid.acc.ave_10best.pb"
-    params["batch_size"] = 64
-    modelscope_infer_after_finetune(params)
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/README.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/README.md
@ -21,23 +21,26 @@

 Or you can use the finetuned model for inference directly.

- Setting parameters in `infer.py`
+- Setting parameters in `infer.sh`
    - <strong>model:</strong> # model name on ModelScope
    - <strong>data_dir:</strong> # the dataset dir needs to include `test/wav.scp`. If `test/text` is also exists, CER will be computed
    - <strong>output_dir:</strong> # result dir
-    - <strong>ngpu:</strong> # the number of GPUs for decoding, if `ngpu` > 0, use GPU decoding
-    - <strong>njob:</strong> # the number of jobs for CPU decoding, if `ngpu` = 0, use CPU decoding, please set `njob`
    - <strong>batch_size:</strong> # batchsize of inference
+    - <strong>gpu_inference:</strong> # whether to perform gpu decoding, set false for cpu decoding
+    - <strong>gpuid_list:</strong> # set gpus, e.g., gpuid_list="0,1"
+    - <strong>njob:</strong> # the number of jobs for CPU decoding, if `gpu_inference`=false, use CPU decoding, please set `njob`

 - Then you can run the pipeline to infer with:
 ```python
-    python infer.py
+    sh infer.sh
 ```

 - Results

 The decoding results can be found in `$output_dir/1best_recog/text.cer`, which includes recognition results of each sample and the CER metric of the whole test set.

+If you decode the SpeechIO test sets, you can use textnorm with `stage`=3, and `DETAILS.txt`, `RESULTS.txt` record the results and CER after text normalization.
+
 ### Inference using local finetuned model

 - Modify inference related parameters in `infer_after_finetune.py`
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/RESULTS.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/RESULTS.md
@ -17,22 +17,22 @@
  - Decode without CTC
  - Decode without LM

-| testset   | CER(%)|
-|:---------:|:-----:|
-| dev       | 1.75  |
-| test      | 1.95  |
+| CER(%)    | Pretrain model|[Finetune model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/summary) |
+|:---------:|:-------------:|:-------------:|
+| dev       | 1.75          |1.62           |
+| test      | 1.95          |1.78           |

 ## AISHELL-2
 - Decode config: 
  - Decode without CTC
  - Decode without LM

-| testset      | CER(%)|
-|:------------:|:-----:|
-| dev_ios      | 2.80  |
-| test_android | 3.13  |
-| test_ios     | 2.85  |
-| test_mic     | 3.06  |
+| CER(%)       | Pretrain model|[Finetune model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/summary)|
+|:------------:|:-------------:|:------------:|
+| dev_ios      | 2.80          |2.60          |
+| test_android | 3.13          |2.84          |
+| test_ios     | 2.85          |2.82          |
+| test_mic     | 3.06          |2.88          |

 ## Wenetspeech
 - Decode config: 
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.py
@ -1,101 +1,25 @@
 import os
 import shutil
-from multiprocessing import Pool
-
+import argparse
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks

-from funasr.utils.compute_wer import compute_wer
-
-
-def modelscope_infer_core(output_dir, split_dir, njob, idx, batch_size, ngpu, model):
-    output_dir_job = os.path.join(output_dir, "output.{}".format(idx))
-    if ngpu > 0:
-        use_gpu = 1
-        gpu_id = int(idx) - 1
-    else:
-        use_gpu = 0
-        gpu_id = -1
-    if "CUDA_VISIBLE_DEVICES" in os.environ.keys():
-        gpu_list = os.environ['CUDA_VISIBLE_DEVICES'].split(",")
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_list[gpu_id])
-    else:
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
-    inference_pipline = pipeline(
+def modelscope_infer(args):
+    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid)
+    inference_pipeline = pipeline(
        task=Tasks.auto_speech_recognition,
-        model=model,
-        output_dir=output_dir_job,
-        batch_size=batch_size,
-        ngpu=use_gpu,
+        model=args.model,
+        output_dir=args.output_dir,
+        batch_size=args.batch_size,
    )
-    audio_in = os.path.join(split_dir, "wav.{}.scp".format(idx))
-    inference_pipline(audio_in=audio_in)
-
-
-def modelscope_infer(params):
-    # prepare for multi-GPU decoding
-    ngpu = params["ngpu"]
-    njob = params["njob"]
-    batch_size = params["batch_size"]
-    output_dir = params["output_dir"]
-    model = params["model"]
-    if os.path.exists(output_dir):
-        shutil.rmtree(output_dir)
-    os.mkdir(output_dir)
-    split_dir = os.path.join(output_dir, "split")
-    os.mkdir(split_dir)
-    if ngpu > 0:
-        nj = ngpu
-    elif ngpu == 0:
-        nj = njob
-    wav_scp_file = os.path.join(params["data_dir"], "wav.scp")
-    with open(wav_scp_file) as f:
-        lines = f.readlines()
-        num_lines = len(lines)
-        num_job_lines = num_lines // nj
-    start = 0
-    for i in range(nj):
-        end = start + num_job_lines
-        file = os.path.join(split_dir, "wav.{}.scp".format(str(i + 1)))
-        with open(file, "w") as f:
-            if i == nj - 1:
-                f.writelines(lines[start:])
-            else:
-                f.writelines(lines[start:end])
-        start = end
-
-    p = Pool(nj)
-    for i in range(nj):
-        p.apply_async(modelscope_infer_core,
-                      args=(output_dir, split_dir, njob, str(i + 1), batch_size, ngpu, model))
-    p.close()
-    p.join()
-
-    # combine decoding results
-    best_recog_path = os.path.join(output_dir, "1best_recog")
-    os.mkdir(best_recog_path)
-    files = ["text", "token", "score"]
-    for file in files:
-        with open(os.path.join(best_recog_path, file), "w") as f:
-            for i in range(nj):
-                job_file = os.path.join(output_dir, "output.{}/1best_recog".format(str(i + 1)), file)
-                with open(job_file) as f_job:
-                    lines = f_job.readlines()
-                f.writelines(lines)
-
-    # If text exists, compute CER
-    text_in = os.path.join(params["data_dir"], "text")
-    if os.path.exists(text_in):
-        text_proc_file = os.path.join(best_recog_path, "token")
-        compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))
-
+    inference_pipeline(audio_in=args.audio_in)

 if __name__ == "__main__":
-    params = {}
-    params["model"] = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
-    params["data_dir"] = "./data/test"
-    params["output_dir"] = "./results"
-    params["ngpu"] = 1 # if ngpu > 0, will use gpu decoding
-    params["njob"] = 1 # if ngpu = 0, will use cpu decoding
-    params["batch_size"] = 64
-    modelscope_infer(params)
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', type=str, default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch")
+    parser.add_argument('--audio_in', type=str, default="./data/test")
+    parser.add_argument('--output_dir', type=str, default="./results/")
+    parser.add_argument('--batch_size', type=int, default=64)
+    parser.add_argument('--gpuid', type=str, default="0")
+    args = parser.parse_args()
+    modelscope_infer(args)
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.sh
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.sh
@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+set -e
+set -u
+set -o pipefail
+
+stage=1
+stop_stage=2
+model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
+data_dir="./data/test"
+output_dir="./results"
+batch_size=64
+gpu_inference=true    # whether to perform gpu decoding
+gpuid_list="0,1"    # set gpus, e.g., gpuid_list="0,1"
+njob=4    # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob
+
+
+if ${gpu_inference}; then
+    nj=$(echo $gpuid_list | awk -F "," '{print NF}')
+else
+    nj=$njob
+    batch_size=1
+    gpuid_list=""
+    for JOB in $(seq ${nj}); do
+        gpuid_list=$gpuid_list"-1,"
+    done
+fi
+
+mkdir -p $output_dir/split
+split_scps=""
+for JOB in $(seq ${nj}); do
+    split_scps="$split_scps $output_dir/split/wav.$JOB.scp"
+done
+perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps}
+
+if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then
+    echo "Decoding ..."
+    gpuid_list_array=(${gpuid_list//,/ })
+    for JOB in $(seq ${nj}); do
+        {
+        id=$((JOB-1))
+        gpuid=${gpuid_list_array[$id]}
+        mkdir -p ${output_dir}/output.$JOB
+        python infer.py \
+            --model ${model} \
+            --audio_in ${output_dir}/split/wav.$JOB.scp \
+            --output_dir ${output_dir}/output.$JOB \
+            --batch_size ${batch_size} \
+            --gpuid ${gpuid}
+        }&
+    done
+    wait
+
+    mkdir -p ${output_dir}/1best_recog
+    for f in token score text; do
+        if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then
+          for i in $(seq "${nj}"); do
+              cat "${output_dir}/output.${i}/1best_recog/${f}"
+          done | sort -k1 >"${output_dir}/1best_recog/${f}"
+        fi
+    done
+fi
+
+if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
+    echo "Computing WER ..."
+    python utils/proce_text.py ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
+    python utils/proce_text.py ${data_dir}/text ${output_dir}/1best_recog/text.ref
+    python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer
+    tail -n 3 ${output_dir}/1best_recog/text.cer
+fi
+
+if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then
+    echo "SpeechIO TIOBE textnorm"
+    echo "$0 --> Normalizing REF text ..."
+    ./utils/textnorm_zh.py \
+        --has_key --to_upper \
+        ${data_dir}/text \
+        ${output_dir}/1best_recog/ref.txt
+
+    echo "$0 --> Normalizing HYP text ..."
+    ./utils/textnorm_zh.py \
+        --has_key --to_upper \
+        ${output_dir}/1best_recog/text.proc \
+        ${output_dir}/1best_recog/rec.txt
+    grep -v $'\t$' ${output_dir}/1best_recog/rec.txt > ${output_dir}/1best_recog/rec_non_empty.txt
+
+    echo "$0 --> computing WER/CER and alignment ..."
+    ./utils/error_rate_zh \
+        --tokenizer char \
+        --ref ${output_dir}/1best_recog/ref.txt \
+        --hyp ${output_dir}/1best_recog/rec_non_empty.txt \
+        ${output_dir}/1best_recog/DETAILS.txt | tee ${output_dir}/1best_recog/RESULTS.txt
+    rm -rf ${output_dir}/1best_recog/rec.txt ${output_dir}/1best_recog/rec_non_empty.txt
+fi
+
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/utils
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/utils
@ -0,0 +1 @@
+../../../../egs/aishell/transformer/utils
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
@ -1,57 +1,37 @@
+import os
+import logging
 import torch
-import torchaudio
+import soundfile
+
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
-
 from modelscope.utils.logger import get_logger
-import logging
+
 logger = get_logger(log_level=logging.CRITICAL)
 logger.setLevel(logging.CRITICAL)

+os.environ["MODELSCOPE_CACHE"] = "./"
 inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.2')

-waveform, sample_rate = torchaudio.load("waihu.wav")
-speech_length = waveform.shape[1]
-speech = waveform[0]
+model_dir = os.path.join(os.environ["MODELSCOPE_CACHE"], "damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online")
+speech, sample_rate = soundfile.read(os.path.join(model_dir, "example/asr_example.wav"))
+speech_length = speech.shape[0]

-cache_en = {"start_idx": 0, "pad_left": 0, "stride": 10, "pad_right": 5, "cif_hidden": None, "cif_alphas": None}
-cache_de = {"decode_fsmn": None}
-cache = {"encoder": cache_en, "decoder": cache_de}
-param_dict = {}
-param_dict["cache"] = cache
-
-first_chunk = True
-speech_buffer = speech
-speech_cache = []
+sample_offset = 0
+step = 4800  #300ms
+param_dict = {"cache": dict(), "is_final": False}
 final_result = ""

-while len(speech_buffer) >= 960:
-    if first_chunk:
-        if len(speech_buffer) >= 14400:
-            rec_result = inference_pipeline(audio_in=speech_buffer[0:14400], param_dict=param_dict)
-            speech_buffer = speech_buffer[4800:]
-        else:
-            cache_en["stride"] = len(speech_buffer) // 960
-            cache_en["pad_right"] = 0
-            rec_result = inference_pipeline(audio_in=speech_buffer, param_dict=param_dict)
-            speech_buffer = []
-        cache_en["start_idx"] = -5
-        first_chunk = False
-    else:
-        cache_en["start_idx"] += 10
-        if len(speech_buffer) >= 4800:
-            cache_en["pad_left"] = 5
-            rec_result = inference_pipeline(audio_in=speech_buffer[:19200], param_dict=param_dict)
-            speech_buffer = speech_buffer[9600:]
-        else:
-            cache_en["stride"] = len(speech_buffer) // 960 
-            cache_en["pad_right"] = 0
-            rec_result = inference_pipeline(audio_in=speech_buffer, param_dict=param_dict)
-            speech_buffer = []
-    if len(rec_result) !=0 and rec_result['text'] != "sil":
+for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)):
+    if sample_offset + step >= speech_length - 1:
+        step = speech_length - sample_offset
+        param_dict["is_final"] = True
+    rec_result = inference_pipeline(audio_in=speech[sample_offset: sample_offset + step],
+                                    param_dict=param_dict)
+    if len(rec_result) != 0 and rec_result['text'] != "sil" and rec_result['text'] != "waiting_for_more_voice":
        final_result += rec_result['text']
    print(rec_result)
 print(final_result)
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/README.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/README.md
@ -6,8 +6,9 @@

 - Modify finetune training related parameters in `finetune.py`
    - <strong>output_dir:</strong> # result dir
-    - <strong>data_dir:</strong> # the dataset dir needs to include files: train/wav.scp, train/text; validation/wav.scp, validation/text.
-    - <strong>batch_bins:</strong> # batch size
+    - <strong>data_dir:</strong> # the dataset dir needs to include files: `train/wav.scp`, `train/text`; `validation/wav.scp`, `validation/text`
+    - <strong>dataset_type:</strong> # for dataset larger than 1000 hours, set as `large`, otherwise set as `small`
+    - <strong>batch_bins:</strong> # batch size. For dataset_type is `small`, `batch_bins` indicates the feature frames. For dataset_type is `large`, `batch_bins` indicates the duration in ms
    - <strong>max_epoch:</strong> # number of training epoch
    - <strong>lr:</strong> # learning rate

@ -20,11 +21,38 @@

 Or you can use the finetuned model for inference directly.

- Setting parameters in `infer.py`
-    - <strong>data_dir:</strong> # the dataset dir
+- Setting parameters in `infer.sh`
+    - <strong>model:</strong> # model name on ModelScope
+    - <strong>data_dir:</strong> # the dataset dir needs to include `test/wav.scp`. If `test/text` is also exists, CER will be computed
    - <strong>output_dir:</strong> # result dir
+    - <strong>batch_size:</strong> # batchsize of inference
+    - <strong>gpu_inference:</strong> # whether to perform gpu decoding, set false for cpu decoding
+    - <strong>gpuid_list:</strong> # set gpus, e.g., gpuid_list="0,1"
+    - <strong>njob:</strong> # the number of jobs for CPU decoding, if `gpu_inference`=false, use CPU decoding, please set `njob`

 - Then you can run the pipeline to infer with:
 ```python
-    python infer.py
+    sh infer.sh
 ```
+
+- Results
+
+The decoding results can be found in `$output_dir/1best_recog/text.cer`, which includes recognition results of each sample and the CER metric of the whole test set.
+
+### Inference using local finetuned model
+
+- Modify inference related parameters in `infer_after_finetune.py`
+    - <strong>modelscope_model_name: </strong> # model name on ModelScope
+    - <strong>output_dir:</strong> # result dir
+    - <strong>data_dir:</strong> # the dataset dir needs to include `test/wav.scp`. If `test/text` is also exists, CER will be computed
+    - <strong>decoding_model_name:</strong> # set the checkpoint name for decoding, e.g., `valid.cer_ctc.ave.pb`
+    - <strong>batch_size:</strong> # batchsize of inference  
+
+- Then you can run the pipeline to finetune with:
+```python
+    python infer_after_finetune.py
+```
+
+- Results
+
+The decoding results can be found in `$output_dir/decoding_results/text.cer`, which includes recognition results of each sample and the CER metric of the whole test set.
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.py
@ -1,101 +1,25 @@
 import os
 import shutil
-from multiprocessing import Pool
-
+import argparse
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks

-from funasr.utils.compute_wer import compute_wer
-
-
-def modelscope_infer_core(output_dir, split_dir, njob, idx, batch_size, ngpu, model):
-    output_dir_job = os.path.join(output_dir, "output.{}".format(idx))
-    if ngpu > 0:
-        use_gpu = 1
-        gpu_id = int(idx) - 1
-    else:
-        use_gpu = 0
-        gpu_id = -1
-    if "CUDA_VISIBLE_DEVICES" in os.environ.keys():
-        gpu_list = os.environ['CUDA_VISIBLE_DEVICES'].split(",")
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_list[gpu_id])
-    else:
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
-    inference_pipline = pipeline(
+def modelscope_infer(args):
+    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid)
+    inference_pipeline = pipeline(
        task=Tasks.auto_speech_recognition,
-        model=model,
-        output_dir=output_dir_job,
-        batch_size=batch_size,
-        ngpu=use_gpu,
+        model=args.model,
+        output_dir=args.output_dir,
+        batch_size=args.batch_size,
    )
-    audio_in = os.path.join(split_dir, "wav.{}.scp".format(idx))
-    inference_pipline(audio_in=audio_in)
-
-
-def modelscope_infer(params):
-    # prepare for multi-GPU decoding
-    ngpu = params["ngpu"]
-    njob = params["njob"]
-    batch_size = params["batch_size"]
-    output_dir = params["output_dir"]
-    model = params["model"]
-    if os.path.exists(output_dir):
-        shutil.rmtree(output_dir)
-    os.mkdir(output_dir)
-    split_dir = os.path.join(output_dir, "split")
-    os.mkdir(split_dir)
-    if ngpu > 0:
-        nj = ngpu
-    elif ngpu == 0:
-        nj = njob
-    wav_scp_file = os.path.join(params["data_dir"], "wav.scp")
-    with open(wav_scp_file) as f:
-        lines = f.readlines()
-        num_lines = len(lines)
-        num_job_lines = num_lines // nj
-    start = 0
-    for i in range(nj):
-        end = start + num_job_lines
-        file = os.path.join(split_dir, "wav.{}.scp".format(str(i + 1)))
-        with open(file, "w") as f:
-            if i == nj - 1:
-                f.writelines(lines[start:])
-            else:
-                f.writelines(lines[start:end])
-        start = end
-
-    p = Pool(nj)
-    for i in range(nj):
-        p.apply_async(modelscope_infer_core,
-                      args=(output_dir, split_dir, njob, str(i + 1), batch_size, ngpu, model))
-    p.close()
-    p.join()
-
-    # combine decoding results
-    best_recog_path = os.path.join(output_dir, "1best_recog")
-    os.mkdir(best_recog_path)
-    files = ["text", "token", "score"]
-    for file in files:
-        with open(os.path.join(best_recog_path, file), "w") as f:
-            for i in range(nj):
-                job_file = os.path.join(output_dir, "output.{}/1best_recog".format(str(i + 1)), file)
-                with open(job_file) as f_job:
-                    lines = f_job.readlines()
-                f.writelines(lines)
-
-    # If text exists, compute CER
-    text_in = os.path.join(params["data_dir"], "text")
-    if os.path.exists(text_in):
-        text_proc_file = os.path.join(best_recog_path, "token")
-        compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))
-
+    inference_pipeline(audio_in=args.audio_in)

 if __name__ == "__main__":
-    params = {}
-    params["model"] = "damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1"
-    params["data_dir"] = "./data/test"
-    params["output_dir"] = "./results"
-    params["ngpu"] = 1 # if ngpu > 0, will use gpu decoding
-    params["njob"] = 1 # if ngpu = 0, will use cpu decoding
-    params["batch_size"] = 64
-    modelscope_infer(params)
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model', type=str, default="damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1")
+    parser.add_argument('--audio_in', type=str, default="./data/test")
+    parser.add_argument('--output_dir', type=str, default="./results/")
+    parser.add_argument('--batch_size', type=int, default=64)
+    parser.add_argument('--gpuid', type=str, default="0")
+    args = parser.parse_args()
+    modelscope_infer(args)
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.sh
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.sh
@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+
+set -e
+set -u
+set -o pipefail
+
+stage=1
+stop_stage=2
+model="damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1"
+data_dir="./data/test"
+output_dir="./results"
+batch_size=64
+gpu_inference=true    # whether to perform gpu decoding
+gpuid_list="0,1"    # set gpus, e.g., gpuid_list="0,1"
+njob=4    # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob
+
+
+if ${gpu_inference}; then
+    nj=$(echo $gpuid_list | awk -F "," '{print NF}')
+else
+    nj=$njob
+    batch_size=1
+    gpuid_list=""
+    for JOB in $(seq ${nj}); do
+        gpuid_list=$gpuid_list"-1,"
+    done
+fi
+
+mkdir -p $output_dir/split
+split_scps=""
+for JOB in $(seq ${nj}); do
+    split_scps="$split_scps $output_dir/split/wav.$JOB.scp"
+done
+perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps}
+
+if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then
+    echo "Decoding ..."
+    gpuid_list_array=(${gpuid_list//,/ })
+    for JOB in $(seq ${nj}); do
+        {
+        id=$((JOB-1))
+        gpuid=${gpuid_list_array[$id]}
+        mkdir -p ${output_dir}/output.$JOB
+        python infer.py \
+            --model ${model} \
+            --audio_in ${output_dir}/split/wav.$JOB.scp \
+            --output_dir ${output_dir}/output.$JOB \
+            --batch_size ${batch_size} \
+            --gpuid ${gpuid}
+        }&
+    done
+    wait
+
+    mkdir -p ${output_dir}/1best_recog
+    for f in token score text; do
+        if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then
+          for i in $(seq "${nj}"); do
+              cat "${output_dir}/output.${i}/1best_recog/${f}"
+          done | sort -k1 >"${output_dir}/1best_recog/${f}"
+        fi
+    done
+fi
+
+if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
+    echo "Computing WER ..."
+    python utils/proce_text.py ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
+    python utils/proce_text.py ${data_dir}/text ${output_dir}/1best_recog/text.ref
+    python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer
+    tail -n 3 ${output_dir}/1best_recog/text.cer
+fi
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/utils
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/utils
@ -0,0 +1 @@
+../../../../egs/aishell/transformer/utils
--- a/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/infer.py
+++ b/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/infer.py
@ -13,18 +13,14 @@ logger.setLevel(logging.CRITICAL)
 inference_pipeline = pipeline(
    task=Tasks.punctuation,
    model='damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727',
-    model_revision="v1.0.0",
    output_dir="./tmp/"
 )

 vads = inputs.split("|")
-
-cache_out = []
 rec_result_all="outputs:"
+param_dict = {"cache": []}
 for vad in vads:
-    rec_result = inference_pipeline(text_in=vad, cache=cache_out)
-    #print(rec_result)
-    cache_out = rec_result['cache']
+    rec_result = inference_pipeline(text_in=vad, param_dict=param_dict)
    rec_result_all += rec_result['text']

 print(rec_result_all)
--- a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common/infer_online.py
+++ b/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common/infer_online.py
@ -22,7 +22,7 @@ if __name__ == '__main__':
    sample_offset = 0
    
    step = 160 * 10
-    param_dict = {'in_cache': dict()}
+    param_dict = {'in_cache': dict(), 'max_end_sil': 800}
    for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)):
        if sample_offset + step >= speech_length - 1:
            step = speech_length - sample_offset
--- a/egs_modelscope/vad/speech_fsmn_vad_zh-cn-8k-common/infer_online.py
+++ b/egs_modelscope/vad/speech_fsmn_vad_zh-cn-8k-common/infer_online.py
@ -22,7 +22,7 @@ if __name__ == '__main__':
    sample_offset = 0
    
    step = 80 * 10
-    param_dict = {'in_cache': dict()}
+    param_dict = {'in_cache': dict(), 'max_end_sil': 800}
    for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)):
        if sample_offset + step >= speech_length - 1:
            step = speech_length - sample_offset
--- a/funasr/bin/asr_inference_paraformer.py
+++ b/funasr/bin/asr_inference_paraformer.py
@ -43,6 +43,7 @@ from funasr.models.frontend.wav_frontend import WavFrontend
 from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer
 from funasr.export.models.e2e_asr_paraformer import Paraformer as Paraformer_export
 from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard
+from funasr.bin.tp_inference import SpeechText2Timestamp


 class Speech2Text:
@ -540,7 +541,8 @@ def inference(
        ngram_weight: float = 0.9,
        nbest: int = 1,
        num_workers: int = 1,
-
+        timestamp_infer_config: Union[Path, str] = None,
+        timestamp_model_file: Union[Path, str] = None,
        **kwargs,
 ):
    inference_pipeline = inference_modelscope(
@ -604,6 +606,8 @@ def inference_modelscope(
        nbest: int = 1,
        num_workers: int = 1,
        output_dir: Optional[str] = None,
+        timestamp_infer_config: Union[Path, str] = None,
+        timestamp_model_file: Union[Path, str] = None,
        param_dict: dict = None,
        **kwargs,
 ):
@ -661,6 +665,15 @@ def inference_modelscope(
    else:
        speech2text = Speech2Text(**speech2text_kwargs)

+    if timestamp_model_file is not None:
+        speechtext2timestamp = SpeechText2Timestamp(
+            timestamp_cmvn_file=cmvn_file,
+            timestamp_model_file=timestamp_model_file,
+            timestamp_infer_config=timestamp_infer_config,
+        )
+    else:
+        speechtext2timestamp = None
+
    def _forward(
            data_path_and_name_and_type,
            raw_inputs: Union[np.ndarray, torch.Tensor] = None,
@ -744,7 +757,17 @@ def inference_modelscope(
                key = keys[batch_id]
                for n, result in zip(range(1, nbest + 1), result):
                    text, token, token_int, hyp = result[0], result[1], result[2], result[3]
-                    time_stamp = None if len(result) < 5 else result[4]
+                    timestamp = None if len(result) < 5 else result[4]
+                    # conduct timestamp prediction here
+                    # timestamp inference requires token length
+                    # thus following inference cannot be conducted in batch
+                    if timestamp is None and speechtext2timestamp:
+                        ts_batch = {}
+                        ts_batch['speech'] = batch['speech'][batch_id].unsqueeze(0)
+                        ts_batch['speech_lengths'] = torch.tensor([batch['speech_lengths'][batch_id]])
+                        ts_batch['text_lengths'] = torch.tensor([len(token)])
+                        us_alphas, us_peaks = speechtext2timestamp(**ts_batch)
+                        ts_str, timestamp = ts_prediction_lfr6_standard(us_alphas[0], us_peaks[0], token, force_time_shift=-3.0)
                    # Create a directory: outdir/{n}best_recog
                    if writer is not None:
                        ibest_writer = writer[f"{n}best_recog"]
@ -756,20 +779,20 @@ def inference_modelscope(
                        ibest_writer["rtf"][key] = rtf_cur

                    if text is not None:
-                        if use_timestamp and time_stamp is not None:
-                            postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+                        if use_timestamp and timestamp is not None:
+                            postprocessed_result = postprocess_utils.sentence_postprocess(token, timestamp)
                        else:
                            postprocessed_result = postprocess_utils.sentence_postprocess(token)
-                        time_stamp_postprocessed = ""
+                        timestamp_postprocessed = ""
                        if len(postprocessed_result) == 3:
-                            text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
+                            text_postprocessed, timestamp_postprocessed, word_lists = postprocessed_result[0], \
                                                                                       postprocessed_result[1], \
                                                                                       postprocessed_result[2]
                        else:
                            text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
                        item = {'key': key, 'value': text_postprocessed}
-                        if time_stamp_postprocessed != "":
-                            item['time_stamp'] = time_stamp_postprocessed
+                        if timestamp_postprocessed != "":
+                            item['timestamp'] = timestamp_postprocessed
                        asr_result_list.append(item)
                        finish_count += 1
                        # asr_utils.print_progress(finish_count / file_count)
--- a/funasr/bin/asr_inference_paraformer_streaming.py
+++ b/funasr/bin/asr_inference_paraformer_streaming.py
@ -42,6 +42,7 @@ from funasr.utils import asr_utils, wav_utils, postprocess_utils
 from funasr.models.frontend.wav_frontend import WavFrontend
 from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer
 from funasr.export.models.e2e_asr_paraformer import Paraformer as Paraformer_export
+np.set_printoptions(threshold=np.inf)

 class Speech2Text:
    """Speech2Text class
@ -203,7 +204,6 @@ class Speech2Text:
        # Input as audio signal
        if isinstance(speech, np.ndarray):
            speech = torch.tensor(speech)
-
        if self.frontend is not None:
            feats, feats_len = self.frontend.forward(speech, speech_lengths)
            feats = to_device(feats, device=self.device)
@ -213,13 +213,16 @@ class Speech2Text:
            feats = speech
            feats_len = speech_lengths
        lfr_factor = max(1, (feats.size()[-1] // 80) - 1)
+        feats_len = cache["encoder"]["stride"] + cache["encoder"]["pad_left"] + cache["encoder"]["pad_right"]
+        feats = feats[:,cache["encoder"]["start_idx"]:cache["encoder"]["start_idx"]+feats_len,:]
+        feats_len = torch.tensor([feats_len])
        batch = {"speech": feats, "speech_lengths": feats_len, "cache": cache}

        # a. To device
        batch = to_device(batch, device=self.device)

        # b. Forward Encoder
-        enc, enc_len = self.asr_model.encode_chunk(**batch)
+        enc, enc_len = self.asr_model.encode_chunk(feats, feats_len, cache)
        if isinstance(enc, tuple):
            enc = enc[0]
        # assert len(enc) == 1, len(enc)
@ -544,11 +547,6 @@ def inference_modelscope(
    )

    export_mode = False
-    if param_dict is not None:
-        hotword_list_or_file = param_dict.get('hotword')
-        export_mode = param_dict.get("export_mode", False)
-    else:
-        hotword_list_or_file = None

    if ngpu >= 1 and torch.cuda.is_available():
        device = "cuda"
@ -578,7 +576,6 @@ def inference_modelscope(
        ngram_weight=ngram_weight,
        penalty=penalty,
        nbest=nbest,
-        hotword_list_or_file=hotword_list_or_file,
    )
    if export_mode:
        speech2text = Speech2TextExport(**speech2text_kwargs)
@ -594,123 +591,116 @@ def inference_modelscope(
            **kwargs,
    ):

-        hotword_list_or_file = None
-        if param_dict is not None:
-            hotword_list_or_file = param_dict.get('hotword')
-        if 'hotword' in kwargs:
-            hotword_list_or_file = kwargs['hotword']
-        if hotword_list_or_file is not None or 'hotword' in kwargs:
-            speech2text.hotword_list = speech2text.generate_hotwords_list(hotword_list_or_file)
-
        # 3. Build data-iterator
        if data_path_and_name_and_type is None and raw_inputs is not None:
-            if isinstance(raw_inputs, torch.Tensor):
-                raw_inputs = raw_inputs.numpy()
-            data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
-        loader = ASRTask.build_streaming_iterator(
-            data_path_and_name_and_type,
-            dtype=dtype,
-            fs=fs,
-            batch_size=batch_size,
-            key_file=key_file,
-            num_workers=num_workers,
-            preprocess_fn=ASRTask.build_preprocess_fn(speech2text.asr_train_args, False),
-            collate_fn=ASRTask.build_collate_fn(speech2text.asr_train_args, False),
-            allow_variable_data_keys=allow_variable_data_keys,
-            inference=True,
-        )
-
-        if param_dict is not None:
-            use_timestamp = param_dict.get('use_timestamp', True)
-        else:
-            use_timestamp = True
-
-        forward_time_total = 0.0
-        length_total = 0.0
-        finish_count = 0
-        file_count = 1
-        cache = None
+            if isinstance(raw_inputs, np.ndarray):
+                raw_inputs = torch.tensor(raw_inputs)
+        is_final = False
+        if param_dict is not None and "cache" in param_dict:
+            cache = param_dict["cache"]
+        if param_dict is not None and "is_final" in param_dict:
+            is_final = param_dict["is_final"]
        # 7 .Start for-loop
        # FIXME(kamo): The output format should be discussed about
        asr_result_list = []
-        output_path = output_dir_v2 if output_dir_v2 is not None else output_dir
-        if output_path is not None:
-            writer = DatadirWriter(output_path)
+        results = []
+        asr_result = ""
+        wait = True
+        if len(cache) == 0:
+            cache["encoder"] = {"start_idx": 0, "pad_left": 0, "stride": 10, "pad_right": 5, "cif_hidden": None, "cif_alphas": None, "is_final": is_final, "left": 0, "right": 0}
+            cache_de = {"decode_fsmn": None}
+            cache["decoder"] = cache_de
+            cache["first_chunk"] = True
+            cache["speech"] = []
+            cache["accum_speech"] = 0
+
+        if raw_inputs is not None:
+            if len(cache["speech"]) == 0:
+                cache["speech"] = raw_inputs
+            else:
+                cache["speech"] = torch.cat([cache["speech"], raw_inputs], dim=0)
+            cache["accum_speech"] += len(raw_inputs)
+            while cache["accum_speech"] >= 960:
+                if cache["first_chunk"]:
+                    if cache["accum_speech"] >= 14400:
+                        speech = torch.unsqueeze(cache["speech"], axis=0)
+                        speech_length = torch.tensor([len(cache["speech"])])
+                        cache["encoder"]["pad_left"] = 5 
+                        cache["encoder"]["pad_right"] = 5 
+                        cache["encoder"]["stride"] = 10
+                        cache["encoder"]["left"] = 5
+                        cache["encoder"]["right"] = 0
+                        results = speech2text(cache, speech, speech_length)
+                        cache["accum_speech"] -= 4800
+                        cache["first_chunk"] = False
+                        cache["encoder"]["start_idx"] = -5
+                        cache["encoder"]["is_final"] = False
+                        wait = False
+                    else:
+                        if is_final:
+                            cache["encoder"]["stride"] = len(cache["speech"]) // 960
+                            cache["encoder"]["pad_left"] = 0
+                            cache["encoder"]["pad_right"] = 0
+                            speech = torch.unsqueeze(cache["speech"], axis=0)
+                            speech_length = torch.tensor([len(cache["speech"])])
+                            results = speech2text(cache, speech, speech_length)
+                            cache["accum_speech"] = 0
+                            wait = False
+                        else:
+                            break
+                else:
+                    if cache["accum_speech"] >= 19200:
+                        cache["encoder"]["start_idx"] += 10
+                        cache["encoder"]["stride"] = 10
+                        cache["encoder"]["pad_left"] = 5
+                        cache["encoder"]["pad_right"] = 5
+                        cache["encoder"]["left"] = 0
+                        cache["encoder"]["right"] = 0
+                        speech = torch.unsqueeze(cache["speech"], axis=0)
+                        speech_length = torch.tensor([len(cache["speech"])])
+                        results = speech2text(cache, speech, speech_length)
+                        cache["accum_speech"] -= 9600
+                        wait = False
+                    else:
+                        if is_final:
+                            cache["encoder"]["is_final"] = True
+                            if cache["accum_speech"] >= 14400:
+                                cache["encoder"]["start_idx"] += 10
+                                cache["encoder"]["stride"] = 10
+                                cache["encoder"]["pad_left"] = 5
+                                cache["encoder"]["pad_right"] = 5
+                                cache["encoder"]["left"] = 0
+                                cache["encoder"]["right"] = cache["accum_speech"] // 960 - 15
+                                speech = torch.unsqueeze(cache["speech"], axis=0)
+                                speech_length = torch.tensor([len(cache["speech"])])
+                                results = speech2text(cache, speech, speech_length)
+                                cache["accum_speech"] -= 9600
+                                wait = False
+                            else:
+                                cache["encoder"]["start_idx"] += 10
+                                cache["encoder"]["stride"] = cache["accum_speech"] // 960 - 5
+                                cache["encoder"]["pad_left"] = 5
+                                cache["encoder"]["pad_right"] = 0
+                                cache["encoder"]["left"] = 0
+                                cache["encoder"]["right"] = 0
+                                speech = torch.unsqueeze(cache["speech"], axis=0)
+                                speech_length = torch.tensor([len(cache["speech"])])
+                                results = speech2text(cache, speech, speech_length)
+                                cache["accum_speech"] = 0
+                                wait = False
+                        else:
+                            break
+                
+                if len(results) >= 1:
+                    asr_result += results[0][0]
+            if asr_result == "":
+                asr_result = "sil"
+            if wait:
+                asr_result = "waiting_for_more_voice"
+            item = {'key': "utt", 'value': asr_result}
+            asr_result_list.append(item)
        else:
-            writer = None
-        if param_dict is not None and "cache" in param_dict:
-            cache = param_dict["cache"]
-        for keys, batch in loader:
-            assert isinstance(batch, dict), type(batch)
-            assert all(isinstance(s, str) for s in keys), keys
-            _bs = len(next(iter(batch.values())))
-            assert len(keys) == _bs, f"{len(keys)} != {_bs}"
-            # batch = {k: v for k, v in batch.items() if not k.endswith("_lengths")}
-            logging.info("decoding, utt_id: {}".format(keys))
-            # N-best list of (text, token, token_int, hyp_object)
-
-            time_beg = time.time()
-            results = speech2text(cache=cache, **batch)
-            if len(results) < 1:
-                hyp = Hypothesis(score=0.0, scores={}, states={}, yseq=[])
-                results = [[" ", ["sil"], [2], hyp, 10, 6]] * nbest
-            time_end = time.time()
-            forward_time = time_end - time_beg
-            lfr_factor = results[0][-1]
-            length = results[0][-2]
-            forward_time_total += forward_time
-            length_total += length
-            rtf_cur = "decoding, feature length: {}, forward_time: {:.4f}, rtf: {:.4f}".format(length, forward_time,
-                                                                                               100 * forward_time / (
-                                                                                                           length * lfr_factor))
-            logging.info(rtf_cur)
-
-            for batch_id in range(_bs):
-                result = [results[batch_id][:-2]]
-
-                key = keys[batch_id]
-                for n, result in zip(range(1, nbest + 1), result):
-                    text, token, token_int, hyp = result[0], result[1], result[2], result[3]
-                    time_stamp = None if len(result) < 5 else result[4]
-                    # Create a directory: outdir/{n}best_recog
-                    if writer is not None:
-                        ibest_writer = writer[f"{n}best_recog"]
-
-                        # Write the result to each file
-                        ibest_writer["token"][key] = " ".join(token)
-                        # ibest_writer["token_int"][key] = " ".join(map(str, token_int))
-                        ibest_writer["score"][key] = str(hyp.score)
-                        ibest_writer["rtf"][key] = rtf_cur
-
-                    if text is not None:
-                        if use_timestamp and time_stamp is not None:
-                            postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
-                        else:
-                            postprocessed_result = postprocess_utils.sentence_postprocess(token)
-                        time_stamp_postprocessed = ""
-                        if len(postprocessed_result) == 3:
-                            text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
-                                                                                       postprocessed_result[1], \
-                                                                                       postprocessed_result[2]
-                        else:
-                            text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
-                        item = {'key': key, 'value': text_postprocessed}
-                        if time_stamp_postprocessed != "":
-                            item['time_stamp'] = time_stamp_postprocessed
-                        asr_result_list.append(item)
-                        finish_count += 1
-                        # asr_utils.print_progress(finish_count / file_count)
-                        if writer is not None:
-                            ibest_writer["text"][key] = text_postprocessed
-
-                    logging.info("decoding, utt: {}, predictions: {}".format(key, text))
-        rtf_avg = "decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}".format(length_total,
-                                                                                                           forward_time_total,
-                                                                                                           100 * forward_time_total / (
-                                                                                                                       length_total * lfr_factor))
-        logging.info(rtf_avg)
-        if writer is not None:
-            ibest_writer["rtf"]["rtf_avf"] = rtf_avg
+            return []
        return asr_result_list

    return _forward
@ -905,3 +895,4 @@ if __name__ == "__main__":
    # rec_result = inference_16k_pipline(audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
    # print(rec_result)

+
--- a/funasr/bin/asr_inference_paraformer_vad_punc.py
+++ b/funasr/bin/asr_inference_paraformer_vad_punc.py
@ -292,6 +292,8 @@ class Speech2Text:

                # remove blank symbol id, which is assumed to be 0
                token_int = list(filter(lambda x: x != 0 and x != 2, token_int))
+                if len(token_int) == 0:
+                    continue

                # Change integer-ids to tokens
                token = self.converter.ids2tokens(token_int)
--- a/funasr/bin/asr_inference_uniasr.py
+++ b/funasr/bin/asr_inference_uniasr.py
@ -261,6 +261,7 @@ class Speech2Text:

            # Change integer-ids to tokens
            token = self.converter.ids2tokens(token_int)
+            token = list(filter(lambda x: x != "<gbg>", token))

            if self.tokenizer is not None:
                text = self.tokenizer.tokens2text(token)
@ -512,7 +513,7 @@ def inference_modelscope(
                    finish_count += 1
                    asr_utils.print_progress(finish_count / file_count)
                    if writer is not None:
-                        ibest_writer["text"][key] = text
+                        ibest_writer["text"][key] = text_postprocessed
        return asr_result_list
    
    return _forward
--- a/funasr/bin/asr_inference_uniasr_vad.py
+++ b/funasr/bin/asr_inference_uniasr_vad.py
@ -261,6 +261,7 @@ class Speech2Text:

            # Change integer-ids to tokens
            token = self.converter.ids2tokens(token_int)
+            token = list(filter(lambda x: x != "<gbg>", token))

            if self.tokenizer is not None:
                text = self.tokenizer.tokens2text(token)
@ -512,7 +513,7 @@ def inference_modelscope(
                    finish_count += 1
                    asr_utils.print_progress(finish_count / file_count)
                    if writer is not None:
-                        ibest_writer["text"][key] = text
+                        ibest_writer["text"][key] = text_postprocessed
        return asr_result_list
    
    return _forward
--- a/funasr/bin/punctuation_infer_vadrealtime.py
+++ b/funasr/bin/punctuation_infer_vadrealtime.py
@ -69,6 +69,7 @@ class Text2Punc:
            precache = "".join(cache)
        else:
            precache = ""
+            cache = []
        data = {"text": precache + text}
        result = self.preprocessor(data=data, uid="12938712838719")
        split_text = self.preprocessor.pop_split_text_data(result)
@ -225,7 +226,7 @@ def inference_modelscope(
    ):
        results = []
        split_size = 10
-
+        cache_in = param_dict["cache"]
        if raw_inputs != None:
            line = raw_inputs.strip()
            key = "demo"
@ -233,35 +234,12 @@ def inference_modelscope(
                item = {'key': key, 'value': ""}
                results.append(item)
                return results
-            #import pdb;pdb.set_trace()
-            result, _, cache = text2punc(line, cache)
-            item = {'key': key, 'value': result, 'cache': cache}
+            result, _, cache = text2punc(line, cache_in)
+            param_dict["cache"] = cache
+            item = {'key': key, 'value': result}
            results.append(item)
            return results

-        for inference_text, _, _ in data_path_and_name_and_type:
-            with open(inference_text, "r", encoding="utf-8") as fin:
-                for line in fin:
-                    line = line.strip()
-                    segs = line.split("\t")
-                    if len(segs) != 2:
-                        continue
-                    key = segs[0]
-                    if len(segs[1]) == 0:
-                        continue
-                    result, _ = text2punc(segs[1])
-                    item = {'key': key, 'value': result}
-                    results.append(item)
-        output_path = output_dir_v2 if output_dir_v2 is not None else output_dir
-        if output_path != None:
-            output_file_name = "infer.out"
-            Path(output_path).mkdir(parents=True, exist_ok=True)
-            output_file_path = (Path(output_path) / output_file_name).absolute()
-            with open(output_file_path, "w", encoding="utf-8") as fout:
-                for item_i in results:
-                    key_out = item_i["key"]
-                    value_out = item_i["value"]
-                    fout.write(f"{key_out}\t{value_out}\n")
        return results

    return _forward
--- a/funasr/bin/tp_inference.py
+++ b/funasr/bin/tp_inference.py
@ -116,8 +116,8 @@ class SpeechText2Timestamp:
            enc = enc[0]

        # c. Forward Predictor
-        _, _, us_alphas, us_cif_peak = self.tp_model.calc_predictor_timestamp(enc, enc_len, text_lengths.to(self.device)+1)
-        return us_alphas, us_cif_peak
+        _, _, us_alphas, us_peaks = self.tp_model.calc_predictor_timestamp(enc, enc_len, text_lengths.to(self.device)+1)
+        return us_alphas, us_peaks


 def inference(
--- a/funasr/bin/vad_inference.py
+++ b/funasr/bin/vad_inference.py
@ -1,5 +1,6 @@
 import argparse
 import logging
+import os
 import sys
 import json
 from pathlib import Path
@ -266,7 +267,8 @@ def inference_modelscope(
            # do vad segment
            _, results = speech2vadsegment(**batch)
            for i, _ in enumerate(keys):
-                results[i] = json.dumps(results[i])
+                if "MODELSCOPE_ENVIRONMENT" in os.environ and os.environ["MODELSCOPE_ENVIRONMENT"] == "eas":
+                    results[i] = json.dumps(results[i])
                item = {'key': keys[i], 'value': results[i]}
                vad_results.append(item)
                if writer is not None:
--- a/funasr/bin/vad_inference_online.py
+++ b/funasr/bin/vad_inference_online.py
@ -1,5 +1,6 @@
 import argparse
 import logging
+import os
 import sys
 import json
 from pathlib import Path
@ -32,12 +33,6 @@ from funasr.bin.vad_inference import Speech2VadSegment
 header_colors = '\033[95m'
 end_colors = '\033[0m'

-global_asr_language: str = 'zh-cn'
-global_sample_rate: Union[int, Dict[Any, int]] = {
-    'audio_fs': 16000,
-    'model_fs': 16000
-}
-

 class Speech2VadSegmentOnline(Speech2VadSegment):
    """Speech2VadSegmentOnline class
@ -61,7 +56,7 @@ class Speech2VadSegmentOnline(Speech2VadSegment):
    @torch.no_grad()
    def __call__(
            self, speech: Union[torch.Tensor, np.ndarray], speech_lengths: Union[torch.Tensor, np.ndarray] = None,
-            in_cache: Dict[str, torch.Tensor] = dict(), is_final: bool = False
+            in_cache: Dict[str, torch.Tensor] = dict(), is_final: bool = False, max_end_sil: int = 800
    ) -> Tuple[torch.Tensor, List[List[int]], torch.Tensor]:
        """Inference

@ -92,7 +87,8 @@ class Speech2VadSegmentOnline(Speech2VadSegment):
                "feats": feats,
                "waveform": waveforms,
                "in_cache": in_cache,
-                "is_final": is_final
+                "is_final": is_final,
+                "max_end_sil": max_end_sil
            }
            # a. To device
            batch = to_device(batch, device=self.device)
@ -222,7 +218,8 @@ def inference_modelscope(

        vad_results = []
        batch_in_cache = param_dict['in_cache'] if param_dict is not None else dict()
-        is_final = param_dict['is_final'] if param_dict is not None else False
+        is_final = param_dict.get('is_final', False) if param_dict is not None else False
+        max_end_sil = param_dict.get('max_end_sil', 800) if param_dict is not None else 800
        for keys, batch in loader:
            assert isinstance(batch, dict), type(batch)
            assert all(isinstance(s, str) for s in keys), keys
@ -230,6 +227,7 @@ def inference_modelscope(
            assert len(keys) == _bs, f"{len(keys)} != {_bs}"
            batch['in_cache'] = batch_in_cache
            batch['is_final'] = is_final
+            batch['max_end_sil'] = max_end_sil

            # do vad segment
            _, results, param_dict['in_cache'] = speech2vadsegment(**batch)
@ -237,7 +235,8 @@ def inference_modelscope(
            if results:
                for i, _ in enumerate(keys):
                    if results[i]:
-                        results[i] = json.dumps(results[i])
+                        if "MODELSCOPE_ENVIRONMENT" in os.environ and os.environ["MODELSCOPE_ENVIRONMENT"] == "eas":
+                            results[i] = json.dumps(results[i])
                        item = {'key': keys[i], 'value': results[i]}
                        vad_results.append(item)
                        if writer is not None:
--- a/funasr/export/README.md
+++ b/funasr/export/README.md
@ -30,6 +30,16 @@ The installation is the same as [funasr](../../README.md)

   `fallback-num`: specify the number of fallback layers to perform automatic mixed precision quantization.

+## Performance Benchmark of Runtime
+
+### Paraformer on CPU
+
+[onnx runtime](https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/python/benchmark_onnx.md)
+
+[libtorch runtime](https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/python/benchmark_libtorch.md)
+
+### Paraformer on GPU
+[nv-triton](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/triton_gpu)

 ## For example
 ### Export onnx format model
--- a/funasr/export/export_model.py
+++ b/funasr/export/export_model.py
@ -14,7 +14,7 @@ from funasr.utils.types import str2bool
 # torch_version = float(".".join(torch.__version__.split(".")[:2]))
 # assert torch_version > 1.9

-class ASRModelExportParaformer:
+class ModelExport:
    def __init__(
        self,
        cache_dir: Union[Path, str] = None,
@ -240,7 +240,7 @@ if __name__ == '__main__':
    parser.add_argument('--calib_num', type=int, default=200, help='calib max num')
    args = parser.parse_args()

-    export_model = ASRModelExportParaformer(
+    export_model = ModelExport(
        cache_dir=args.export_dir,
        onnx=args.type == 'onnx',
        quant=args.quantize,
--- a/funasr/models/e2e_asr_paraformer.py
+++ b/funasr/models/e2e_asr_paraformer.py
@ -370,19 +370,10 @@ class Paraformer(AbsESPnetModel):
                encoder_out, encoder_out_lens
            )

-        assert encoder_out.size(0) == speech.size(0), (
-            encoder_out.size(),
-            speech.size(0),
-        )
-        assert encoder_out.size(1) <= encoder_out_lens.max(), (
-            encoder_out.size(),
-            encoder_out_lens.max(),
-        )
-
        if intermediate_outs is not None:
            return (encoder_out, intermediate_outs), encoder_out_lens

-        return encoder_out, encoder_out_lens
+        return encoder_out, torch.tensor([encoder_out.size(1)])

    def calc_predictor(self, encoder_out, encoder_out_lens):

--- a/funasr/models/e2e_vad.py
+++ b/funasr/models/e2e_vad.py
@ -473,8 +473,9 @@ class E2EVadModel(nn.Module):
        return segments, in_cache

    def forward_online(self, feats: torch.Tensor, waveform: torch.tensor, in_cache: Dict[str, torch.Tensor] = dict(),
-                is_final: bool = False
+                is_final: bool = False, max_end_sil: int = 800
                ) -> Tuple[List[List[List[int]]], Dict[str, torch.Tensor]]:
+        self.max_end_sil_frame_cnt_thresh = max_end_sil - self.vad_opts.speech_to_sil_time_thres
        self.waveform = waveform  # compute decibel for each frame
        self.ComputeDecibel()
        self.ComputeScores(feats, in_cache)
--- a/funasr/models/predictor/cif.py
+++ b/funasr/models/predictor/cif.py
@ -200,6 +200,7 @@ class CifPredictorV2(nn.Module):
        return acoustic_embeds, token_num, alphas, cif_peak

    def forward_chunk(self, hidden, cache=None):
+        b, t, d = hidden.size()
        h = hidden
        context = h.transpose(1, 2)
        queries = self.pad(context)
@ -220,6 +221,8 @@ class CifPredictorV2(nn.Module):
            alphas = alphas * mask_chunk_predictor
      
        if cache is not None:
+            if cache["is_final"]:
+                alphas[:, cache["stride"] + cache["pad_left"] - 1] += 0.45
            if cache["cif_hidden"] is not None:
                hidden = torch.cat((cache["cif_hidden"], hidden), 1)
            if cache["cif_alphas"] is not None:
@ -241,7 +244,6 @@ class CifPredictorV2(nn.Module):
                mask_chunk_peak_predictor[:, :pre_alphas_length] = 1.0
            mask_chunk_peak_predictor[:, pre_alphas_length + cache["pad_left"]:pre_alphas_length + cache["stride"] + cache["pad_left"]] = 1.0
            
-
        if mask_chunk_peak_predictor is not None:
            cif_peak = cif_peak * mask_chunk_peak_predictor.squeeze(-1)
        
--- a/funasr/modules/embedding.py
+++ b/funasr/modules/embedding.py
@ -8,7 +8,7 @@

 import math
 import torch
-
+import torch.nn.functional as F

 def _pre_hook(
    state_dict,
@ -409,9 +409,18 @@ class SinusoidalPositionEncoder(torch.nn.Module):

    def forward_chunk(self, x, cache=None):
        start_idx = 0
+        pad_left = 0
+        pad_right = 0
        batch_size, timesteps, input_dim = x.size()
        if cache is not None:
            start_idx = cache["start_idx"]
+            pad_left = cache["left"]
+            pad_right = cache["right"]
        positions = torch.arange(1, timesteps+start_idx+1)[None, :]
        position_encoding = self.encode(positions, input_dim, x.dtype).to(x.device)
-        return x + position_encoding[:, start_idx: start_idx + timesteps]
+        outputs = x + position_encoding[:, start_idx: start_idx + timesteps]
+        outputs = outputs.transpose(1,2)
+        outputs = F.pad(outputs, (pad_left, pad_right))
+        outputs = outputs.transpose(1,2)
+        return outputs
+       
--- a/funasr/punctuation/espnet_model.py
+++ b/funasr/punctuation/espnet_model.py
@ -24,8 +24,8 @@ class ESPnetPunctuationModel(AbsESPnetModel):

        # ignore_id may be assumed as 0, shared with CTC-blank symbol for ASR.
        self.ignore_id = ignore_id
-        if self.punc_model.with_vad():
-            print("This is a vad puncuation model.")
+        #if self.punc_model.with_vad():
+        #    print("This is a vad puncuation model.")

    def nll(
        self,
--- a/funasr/runtime/grpc/CMakeLists.txt
+++ b/funasr/runtime/grpc/CMakeLists.txt
@ -48,7 +48,7 @@ include_directories("${CMAKE_CURRENT_BINARY_DIR}")

 include_directories(../onnxruntime/include/)
 link_directories(../onnxruntime/build/src/)
-link_directories(../onnxruntime/build/third_party/webrtc/)
+link_directories(../onnxruntime/build/third_party/yaml-cpp/)

 link_directories(${ONNXRUNTIME_DIR}/lib)
 add_subdirectory("../onnxruntime/src" onnx_src)
@ -75,7 +75,6 @@ foreach(_target
  target_link_libraries(${_target}
    rg_grpc_proto
    rapidasr
-    webrtcvad
    ${EXTRA_LIBS}
    ${_REFLECTION}
    ${_GRPC_GRPCPP}
--- a/funasr/runtime/grpc/Readme.md
+++ b/funasr/runtime/grpc/Readme.md
@ -1,14 +1,13 @@
 ## paraformer grpc onnx server in c++

-
 #### Step 1. Build ../onnxruntime as it's document
 ```
-#put onnx-lib & onnx-asr-model & vocab.txt into /path/to/asrmodel(eg: /data/asrmodel)
+#put onnx-lib & onnx-asr-model into /path/to/asrmodel(eg: /data/asrmodel)
 ls /data/asrmodel/
 onnxruntime-linux-x64-1.14.0  speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch

-file /data/asrmodel/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/vocab.txt
-UTF-8 Unicode text
+#make sure you have config.yaml, am.mvn, model.onnx(or model_quant.onnx) under speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+
 ```

 #### Step 2. Compile and install grpc v1.52.0 in case of grpc bugs
@ -44,14 +43,16 @@ source ~/.bashrc

 #### Step 4. Start grpc paraformer server
 ```
-Usage: ./cmake/build/paraformer_server port thread_num /path/to/model_file
-./cmake/build/paraformer_server 10108 4 /data/asrmodel/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+Usage: ./cmake/build/paraformer_server port thread_num /path/to/model_file quantize(true or false)
+./cmake/build/paraformer_server 10108 4 /data/asrmodel/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch false
 ```

-
-
 #### Step 5. Start grpc python paraformer client  on PC with MIC
 ```
 cd ../python/grpc
 python grpc_main_client_mic.py  --host $server_ip --port 10108
 ```
+
+## Acknowledge
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
+2. We acknowledge [DeepScience](https://www.deepscience.cn) for contributing the grpc service.
--- a/funasr/runtime/grpc/paraformer_server.cc
+++ b/funasr/runtime/grpc/paraformer_server.cc
@ -29,8 +29,8 @@ using paraformer::Request;
 using paraformer::Response;
 using paraformer::ASR;

-ASRServicer::ASRServicer(const char* model_path, int thread_num) {
-    AsrHanlde=RapidAsrInit(model_path, thread_num);
+ASRServicer::ASRServicer(const char* model_path, int thread_num, bool quantize) {
+    AsrHanlde=RapidAsrInit(model_path, thread_num, quantize);
    std::cout << "ASRServicer init" << std::endl;
    init_flag = 0;
 }
@ -170,10 +170,10 @@ grpc::Status ASRServicer::Recognize(
 }


-void RunServer(const std::string& port, int thread_num, const char* model_path) {
+void RunServer(const std::string& port, int thread_num, const char* model_path, bool quantize) {
    std::string server_address;
    server_address = "0.0.0.0:" + port;
-    ASRServicer service(model_path, thread_num);
+    ASRServicer service(model_path, thread_num, quantize);

    ServerBuilder builder;
    builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
@ -184,12 +184,15 @@ void RunServer(const std::string& port, int thread_num, const char* model_path)
 }

 int main(int argc, char* argv[]) {
-    if (argc < 3)
+    if (argc < 5)
    {
-        printf("Usage: %s port thread_num /path/to/model_file\n", argv[0]);
+        printf("Usage: %s port thread_num /path/to/model_file quantize(true or false) \n", argv[0]);
        exit(-1);
    }

-    RunServer(argv[1], atoi(argv[2]), argv[3]);
+    // is quantize
+    bool quantize = false;
+    std::istringstream(argv[4]) >> std::boolalpha >> quantize;
+    RunServer(argv[1], atoi(argv[2]), argv[3], quantize);
    return 0;
 }
--- a/funasr/runtime/grpc/paraformer_server.h
+++ b/funasr/runtime/grpc/paraformer_server.h
@ -45,7 +45,7 @@ class ASRServicer final : public ASR::Service {
    std::unordered_map<std::string, std::string> client_transcription;

  public:
-    ASRServicer(const char* model_path, int thread_num);
+    ASRServicer(const char* model_path, int thread_num, bool quantize);
    void clear_states(const std::string& user);
    void clear_buffers(const std::string& user);
    void clear_transcriptions(const std::string& user);
--- a/funasr/runtime/onnxruntime/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/CMakeLists.txt
@ -1,7 +1,6 @@
 cmake_minimum_required(VERSION 3.10)

-#-DONNXRUNTIME_DIR=D:\thirdpart\onnxruntime
-project(FastASR)
+project(FunASRonnx)

 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@ -23,8 +22,6 @@ link_directories(${ONNXRUNTIME_DIR}/lib)

 endif()

-#option(FASTASR_BUILD_PYTHON_MODULE "build python module, using FastASR in Python" OFF)
-
-add_subdirectory("./third_party/webrtc")
+add_subdirectory("./third_party/yaml-cpp")
 add_subdirectory(src)
 add_subdirectory(tester)
--- a/funasr/runtime/onnxruntime/include/Model.h
+++ b/funasr/runtime/onnxruntime/include/Model.h
@ -13,5 +13,5 @@ class Model {
    virtual std::string rescoring() = 0;
 };

-Model *create_model(const char *path,int nThread=0);
+Model *create_model(const char *path,int nThread=0,bool quantize=false);
 #endif
--- a/funasr/runtime/onnxruntime/include/librapidasrapi.h
+++ b/funasr/runtime/onnxruntime/include/librapidasrapi.h
@ -1,33 +1,20 @@
 #pragma once

-
 #ifdef WIN32
-
-
 #ifdef _RPASR_API_EXPORT
-
 #define  _RAPIDASRAPI __declspec(dllexport)
 #else
 #define  _RAPIDASRAPI __declspec(dllimport)
 #endif
-	
-
 #else
-#define _RAPIDASRAPI  
+#define _RAPIDASRAPI
 #endif

-
-
-
-
 #ifndef _WIN32
-
 #define RPASR_CALLBCK_PREFIX __attribute__((__stdcall__))
-
 #else
 #define RPASR_CALLBCK_PREFIX __stdcall
 #endif
-	

 #ifdef __cplusplus 

@ -35,16 +22,13 @@ extern "C" {
 #endif

 typedef void* RPASR_HANDLE;
-
 typedef void* RPASR_RESULT;
-
 typedef unsigned char RPASR_BOOL;

 #define RPASR_TRUE 1
 #define RPASR_FALSE 0
 #define QM_DEFAULT_THREAD_NUM  4

-
 typedef enum
 {
 RASR_NONE=-1,
@ -55,7 +39,6 @@ typedef enum
 }RPASR_MODE;

 typedef enum {
-
 	RPASR_MODEL_PADDLE = 0,
 	RPASR_MODEL_PADDLE_2 = 1,
 	RPASR_MODEL_K2 = 2,
@ -63,17 +46,15 @@ typedef enum {

 }RPASR_MODEL_TYPE;

-
 typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step.
 	
-	// APIs for qmasr
-
-_RAPIDASRAPI RPASR_HANDLE  RapidAsrInit(const char* szModelDir, int nThread);
-
+// APIs for qmasr
+_RAPIDASRAPI RPASR_HANDLE  RapidAsrInit(const char* szModelDir, int nThread, bool quantize);


 // if not give a fnCallback ,it should be NULL 
 _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
+
 _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);

 _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback);
@ -83,8 +64,8 @@ _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogFile(RPASR_HANDLE handle, const char* szW
 _RAPIDASRAPI const char*	RapidAsrGetResult(RPASR_RESULT Result,int nIndex);

 _RAPIDASRAPI const int		RapidAsrGetRetNumber(RPASR_RESULT Result);
-_RAPIDASRAPI void			RapidAsrFreeResult(RPASR_RESULT Result);

+_RAPIDASRAPI void			RapidAsrFreeResult(RPASR_RESULT Result);

 _RAPIDASRAPI void			RapidAsrUninit(RPASR_HANDLE Handle);

--- a/funasr/runtime/onnxruntime/include/webrtc_vad.h
+++ b/funasr/runtime/onnxruntime/include/webrtc_vad.h
@ -1,87 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * This header file includes the VAD API calls. Specific function calls are
- * given below.
- */
-
-#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
-#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
-
-#include <stddef.h>
-#include <stdint.h>
-
-typedef struct WebRtcVadInst VadInst;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Creates an instance to the VAD structure.
-VadInst* WebRtcVad_Create(void);
-
-// Frees the dynamic memory of a specified VAD instance.
-//
-// - handle [i] : Pointer to VAD instance that should be freed.
-void WebRtcVad_Free(VadInst* handle);
-
-// Initializes a VAD instance.
-//
-// - handle [i/o] : Instance that should be initialized.
-//
-// returns        : 0 - (OK),
-//                 -1 - (null pointer or Default mode could not be set).
-int WebRtcVad_Init(VadInst* handle);
-
-// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
-// restrictive in reporting speech. Put in other words the probability of being
-// speech when the VAD returns 1 is increased with increasing mode. As a
-// consequence also the missed detection rate goes up.
-//
-// - handle [i/o] : VAD instance.
-// - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
-//
-// returns        : 0 - (OK),
-//                 -1 - (null pointer, mode could not be set or the VAD instance
-//                       has not been initialized).
-int WebRtcVad_set_mode(VadInst* handle, int mode);
-
-// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
-// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
-//
-// - handle       [i/o] : VAD Instance. Needs to be initialized by
-//                        WebRtcVad_Init() before call.
-// - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
-// - audio_frame  [i]   : Audio frame buffer.
-// - frame_length [i]   : Length of audio frame buffer in number of samples.
-//
-// returns              : 1 - (Active Voice),
-//                        0 - (Non-active Voice),
-//                       -1 - (Error)
-int WebRtcVad_Process(VadInst* handle,
-                      int fs,
-                      const int16_t* audio_frame,
-                      size_t frame_length);
-
-// Checks for valid combinations of |rate| and |frame_length|. We support 10,
-// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
-//
-// - rate         [i] : Sampling frequency (Hz).
-// - frame_length [i] : Speech frame buffer length in number of samples.
-//
-// returns            : 0 - (valid combination), -1 - (invalid combination)
-int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/anchor.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/anchor.h
@ -0,0 +1,17 @@
+#ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+
+namespace YAML {
+typedef std::size_t anchor_t;
+const anchor_t NullAnchor = 0;
+}
+
+#endif  // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/binary.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/binary.h
@ -0,0 +1,67 @@
+#ifndef BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+YAML_CPP_API std::string EncodeBase64(const unsigned char *data,
+                                      std::size_t size);
+YAML_CPP_API std::vector<unsigned char> DecodeBase64(const std::string &input);
+
+class YAML_CPP_API Binary {
+ public:
+  Binary() : m_unownedData(0), m_unownedSize(0) {}
+  Binary(const unsigned char *data_, std::size_t size_)
+      : m_unownedData(data_), m_unownedSize(size_) {}
+
+  bool owned() const { return !m_unownedData; }
+  std::size_t size() const { return owned() ? m_data.size() : m_unownedSize; }
+  const unsigned char *data() const {
+    return owned() ? &m_data[0] : m_unownedData;
+  }
+
+  void swap(std::vector<unsigned char> &rhs) {
+    if (m_unownedData) {
+      m_data.swap(rhs);
+      rhs.clear();
+      rhs.resize(m_unownedSize);
+      std::copy(m_unownedData, m_unownedData + m_unownedSize, rhs.begin());
+      m_unownedData = 0;
+      m_unownedSize = 0;
+    } else {
+      m_data.swap(rhs);
+    }
+  }
+
+  bool operator==(const Binary &rhs) const {
+    const std::size_t s = size();
+    if (s != rhs.size())
+      return false;
+    const unsigned char *d1 = data();
+    const unsigned char *d2 = rhs.data();
+    for (std::size_t i = 0; i < s; i++) {
+      if (*d1++ != *d2++)
+        return false;
+    }
+    return true;
+  }
+
+  bool operator!=(const Binary &rhs) const { return !(*this == rhs); }
+
+ private:
+  std::vector<unsigned char> m_data;
+  const unsigned char *m_unownedData;
+  std::size_t m_unownedSize;
+};
+}
+
+#endif  // BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/contrib/anchordict.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/contrib/anchordict.h
@ -0,0 +1,39 @@
+#ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <vector>
+
+#include "../anchor.h"
+
+namespace YAML {
+/**
+ * An object that stores and retrieves values correlating to {@link anchor_t}
+ * values.
+ *
+ * <p>Efficient implementation that can make assumptions about how
+ * {@code anchor_t} values are assigned by the {@link Parser} class.
+ */
+template <class T>
+class AnchorDict {
+ public:
+  void Register(anchor_t anchor, T value) {
+    if (anchor > m_data.size()) {
+      m_data.resize(anchor);
+    }
+    m_data[anchor - 1] = value;
+  }
+
+  T Get(anchor_t anchor) const { return m_data[anchor - 1]; }
+
+ private:
+  std::vector<T> m_data;
+};
+}
+
+#endif  // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/contrib/graphbuilder.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/contrib/graphbuilder.h
@ -0,0 +1,149 @@
+#ifndef GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/mark.h"
+#include <string>
+
+namespace YAML {
+class Parser;
+
+// GraphBuilderInterface
+// . Abstraction of node creation
+// . pParentNode is always NULL or the return value of one of the NewXXX()
+//   functions.
+class GraphBuilderInterface {
+ public:
+  virtual ~GraphBuilderInterface() = 0;
+
+  // Create and return a new node with a null value.
+  virtual void *NewNull(const Mark &mark, void *pParentNode) = 0;
+
+  // Create and return a new node with the given tag and value.
+  virtual void *NewScalar(const Mark &mark, const std::string &tag,
+                          void *pParentNode, const std::string &value) = 0;
+
+  // Create and return a new sequence node
+  virtual void *NewSequence(const Mark &mark, const std::string &tag,
+                            void *pParentNode) = 0;
+
+  // Add pNode to pSequence.  pNode was created with one of the NewXxx()
+  // functions and pSequence with NewSequence().
+  virtual void AppendToSequence(void *pSequence, void *pNode) = 0;
+
+  // Note that no moew entries will be added to pSequence
+  virtual void SequenceComplete(void *pSequence) { (void)pSequence; }
+
+  // Create and return a new map node
+  virtual void *NewMap(const Mark &mark, const std::string &tag,
+                       void *pParentNode) = 0;
+
+  // Add the pKeyNode => pValueNode mapping to pMap.  pKeyNode and pValueNode
+  // were created with one of the NewXxx() methods and pMap with NewMap().
+  virtual void AssignInMap(void *pMap, void *pKeyNode, void *pValueNode) = 0;
+
+  // Note that no more assignments will be made in pMap
+  virtual void MapComplete(void *pMap) { (void)pMap; }
+
+  // Return the node that should be used in place of an alias referencing
+  // pNode (pNode by default)
+  virtual void *AnchorReference(const Mark &mark, void *pNode) {
+    (void)mark;
+    return pNode;
+  }
+};
+
+// Typesafe wrapper for GraphBuilderInterface.  Assumes that Impl defines
+// Node, Sequence, and Map types.  Sequence and Map must derive from Node
+// (unless Node is defined as void).  Impl must also implement function with
+// all of the same names as the virtual functions in GraphBuilderInterface
+// -- including the ones with default implementations -- but with the
+// prototypes changed to accept an explicit Node*, Sequence*, or Map* where
+// appropriate.
+template <class Impl>
+class GraphBuilder : public GraphBuilderInterface {
+ public:
+  typedef typename Impl::Node Node;
+  typedef typename Impl::Sequence Sequence;
+  typedef typename Impl::Map Map;
+
+  GraphBuilder(Impl &impl) : m_impl(impl) {
+    Map *pMap = NULL;
+    Sequence *pSeq = NULL;
+    Node *pNode = NULL;
+
+    // Type consistency checks
+    pNode = pMap;
+    pNode = pSeq;
+  }
+
+  GraphBuilderInterface &AsBuilderInterface() { return *this; }
+
+  virtual void *NewNull(const Mark &mark, void *pParentNode) {
+    return CheckType<Node>(m_impl.NewNull(mark, AsNode(pParentNode)));
+  }
+
+  virtual void *NewScalar(const Mark &mark, const std::string &tag,
+                          void *pParentNode, const std::string &value) {
+    return CheckType<Node>(
+        m_impl.NewScalar(mark, tag, AsNode(pParentNode), value));
+  }
+
+  virtual void *NewSequence(const Mark &mark, const std::string &tag,
+                            void *pParentNode) {
+    return CheckType<Sequence>(
+        m_impl.NewSequence(mark, tag, AsNode(pParentNode)));
+  }
+  virtual void AppendToSequence(void *pSequence, void *pNode) {
+    m_impl.AppendToSequence(AsSequence(pSequence), AsNode(pNode));
+  }
+  virtual void SequenceComplete(void *pSequence) {
+    m_impl.SequenceComplete(AsSequence(pSequence));
+  }
+
+  virtual void *NewMap(const Mark &mark, const std::string &tag,
+                       void *pParentNode) {
+    return CheckType<Map>(m_impl.NewMap(mark, tag, AsNode(pParentNode)));
+  }
+  virtual void AssignInMap(void *pMap, void *pKeyNode, void *pValueNode) {
+    m_impl.AssignInMap(AsMap(pMap), AsNode(pKeyNode), AsNode(pValueNode));
+  }
+  virtual void MapComplete(void *pMap) { m_impl.MapComplete(AsMap(pMap)); }
+
+  virtual void *AnchorReference(const Mark &mark, void *pNode) {
+    return CheckType<Node>(m_impl.AnchorReference(mark, AsNode(pNode)));
+  }
+
+ private:
+  Impl &m_impl;
+
+  // Static check for pointer to T
+  template <class T, class U>
+  static T *CheckType(U *p) {
+    return p;
+  }
+
+  static Node *AsNode(void *pNode) { return static_cast<Node *>(pNode); }
+  static Sequence *AsSequence(void *pSeq) {
+    return static_cast<Sequence *>(pSeq);
+  }
+  static Map *AsMap(void *pMap) { return static_cast<Map *>(pMap); }
+};
+
+void *BuildGraphOfNextDocument(Parser &parser,
+                               GraphBuilderInterface &graphBuilder);
+
+template <class Impl>
+typename Impl::Node *BuildGraphOfNextDocument(Parser &parser, Impl &impl) {
+  GraphBuilder<Impl> graphBuilder(impl);
+  return static_cast<typename Impl::Node *>(
+      BuildGraphOfNextDocument(parser, graphBuilder));
+}
+}
+
+#endif  // GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/dll.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/dll.h
@ -0,0 +1,33 @@
+#ifndef DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+// The following ifdef block is the standard way of creating macros which make
+// exporting from a DLL simpler. All files within this DLL are compiled with the
+// yaml_cpp_EXPORTS symbol defined on the command line. This symbol should not
+// be defined on any project that uses this DLL. This way any other project
+// whose source files include this file see YAML_CPP_API functions as being
+// imported from a DLL, whereas this DLL sees symbols defined with this macro as
+// being exported.
+#undef YAML_CPP_API
+
+#ifdef YAML_CPP_DLL      // Using or Building YAML-CPP DLL (definition defined
+                         // manually)
+#ifdef yaml_cpp_EXPORTS  // Building YAML-CPP DLL (definition created by CMake
+                         // or defined manually)
+//	#pragma message( "Defining YAML_CPP_API for DLL export" )
+#define YAML_CPP_API __declspec(dllexport)
+#else  // yaml_cpp_EXPORTS
+//	#pragma message( "Defining YAML_CPP_API for DLL import" )
+#define YAML_CPP_API __declspec(dllimport)
+#endif  // yaml_cpp_EXPORTS
+#else   // YAML_CPP_DLL
+#define YAML_CPP_API
+#endif  // YAML_CPP_DLL
+
+#endif  // DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emitfromevents.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emitfromevents.h
@ -0,0 +1,57 @@
+#ifndef EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <stack>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/eventhandler.h"
+
+namespace YAML {
+struct Mark;
+}  // namespace YAML
+
+namespace YAML {
+class Emitter;
+
+class EmitFromEvents : public EventHandler {
+ public:
+  EmitFromEvents(Emitter& emitter);
+
+  virtual void OnDocumentStart(const Mark& mark);
+  virtual void OnDocumentEnd();
+
+  virtual void OnNull(const Mark& mark, anchor_t anchor);
+  virtual void OnAlias(const Mark& mark, anchor_t anchor);
+  virtual void OnScalar(const Mark& mark, const std::string& tag,
+                        anchor_t anchor, const std::string& value);
+
+  virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
+                               anchor_t anchor, EmitterStyle::value style);
+  virtual void OnSequenceEnd();
+
+  virtual void OnMapStart(const Mark& mark, const std::string& tag,
+                          anchor_t anchor, EmitterStyle::value style);
+  virtual void OnMapEnd();
+
+ private:
+  void BeginNode();
+  void EmitProps(const std::string& tag, anchor_t anchor);
+
+ private:
+  Emitter& m_emitter;
+
+  struct State {
+    enum value { WaitingForSequenceEntry, WaitingForKey, WaitingForValue };
+  };
+  std::stack<State::value> m_stateStack;
+};
+}
+
+#endif  // EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emitter.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emitter.h
@ -0,0 +1,254 @@
+#ifndef EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "yaml-cpp/binary.h"
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/emitterdef.h"
+#include "yaml-cpp/emittermanip.h"
+#include "yaml-cpp/noncopyable.h"
+#include "yaml-cpp/null.h"
+#include "yaml-cpp/ostream_wrapper.h"
+
+namespace YAML {
+class Binary;
+struct _Null;
+}  // namespace YAML
+
+namespace YAML {
+class EmitterState;
+
+class YAML_CPP_API Emitter : private noncopyable {
+ public:
+  Emitter();
+  explicit Emitter(std::ostream& stream);
+  ~Emitter();
+
+  // output
+  const char* c_str() const;
+  std::size_t size() const;
+
+  // state checking
+  bool good() const;
+  const std::string GetLastError() const;
+
+  // global setters
+  bool SetOutputCharset(EMITTER_MANIP value);
+  bool SetStringFormat(EMITTER_MANIP value);
+  bool SetBoolFormat(EMITTER_MANIP value);
+  bool SetIntBase(EMITTER_MANIP value);
+  bool SetSeqFormat(EMITTER_MANIP value);
+  bool SetMapFormat(EMITTER_MANIP value);
+  bool SetIndent(std::size_t n);
+  bool SetPreCommentIndent(std::size_t n);
+  bool SetPostCommentIndent(std::size_t n);
+  bool SetFloatPrecision(std::size_t n);
+  bool SetDoublePrecision(std::size_t n);
+
+  // local setters
+  Emitter& SetLocalValue(EMITTER_MANIP value);
+  Emitter& SetLocalIndent(const _Indent& indent);
+  Emitter& SetLocalPrecision(const _Precision& precision);
+
+  // overloads of write
+  Emitter& Write(const std::string& str);
+  Emitter& Write(bool b);
+  Emitter& Write(char ch);
+  Emitter& Write(const _Alias& alias);
+  Emitter& Write(const _Anchor& anchor);
+  Emitter& Write(const _Tag& tag);
+  Emitter& Write(const _Comment& comment);
+  Emitter& Write(const _Null& n);
+  Emitter& Write(const Binary& binary);
+
+  template <typename T>
+  Emitter& WriteIntegralType(T value);
+
+  template <typename T>
+  Emitter& WriteStreamable(T value);
+
+ private:
+  template <typename T>
+  void SetStreamablePrecision(std::stringstream&) {}
+  std::size_t GetFloatPrecision() const;
+  std::size_t GetDoublePrecision() const;
+
+  void PrepareIntegralStream(std::stringstream& stream) const;
+  void StartedScalar();
+
+ private:
+  void EmitBeginDoc();
+  void EmitEndDoc();
+  void EmitBeginSeq();
+  void EmitEndSeq();
+  void EmitBeginMap();
+  void EmitEndMap();
+  void EmitNewline();
+  void EmitKindTag();
+  void EmitTag(bool verbatim, const _Tag& tag);
+
+  void PrepareNode(EmitterNodeType::value child);
+  void PrepareTopNode(EmitterNodeType::value child);
+  void FlowSeqPrepareNode(EmitterNodeType::value child);
+  void BlockSeqPrepareNode(EmitterNodeType::value child);
+
+  void FlowMapPrepareNode(EmitterNodeType::value child);
+
+  void FlowMapPrepareLongKey(EmitterNodeType::value child);
+  void FlowMapPrepareLongKeyValue(EmitterNodeType::value child);
+  void FlowMapPrepareSimpleKey(EmitterNodeType::value child);
+  void FlowMapPrepareSimpleKeyValue(EmitterNodeType::value child);
+
+  void BlockMapPrepareNode(EmitterNodeType::value child);
+
+  void BlockMapPrepareLongKey(EmitterNodeType::value child);
+  void BlockMapPrepareLongKeyValue(EmitterNodeType::value child);
+  void BlockMapPrepareSimpleKey(EmitterNodeType::value child);
+  void BlockMapPrepareSimpleKeyValue(EmitterNodeType::value child);
+
+  void SpaceOrIndentTo(bool requireSpace, std::size_t indent);
+
+  const char* ComputeFullBoolName(bool b) const;
+  bool CanEmitNewline() const;
+
+ private:
+  std::unique_ptr<EmitterState> m_pState;
+  ostream_wrapper m_stream;
+};
+
+template <typename T>
+inline Emitter& Emitter::WriteIntegralType(T value) {
+  if (!good())
+    return *this;
+
+  PrepareNode(EmitterNodeType::Scalar);
+
+  std::stringstream stream;
+  PrepareIntegralStream(stream);
+  stream << value;
+  m_stream << stream.str();
+
+  StartedScalar();
+
+  return *this;
+}
+
+template <typename T>
+inline Emitter& Emitter::WriteStreamable(T value) {
+  if (!good())
+    return *this;
+
+  PrepareNode(EmitterNodeType::Scalar);
+
+  std::stringstream stream;
+  SetStreamablePrecision<T>(stream);
+  stream << value;
+  m_stream << stream.str();
+
+  StartedScalar();
+
+  return *this;
+}
+
+template <>
+inline void Emitter::SetStreamablePrecision<float>(std::stringstream& stream) {
+  stream.precision(static_cast<std::streamsize>(GetFloatPrecision()));
+}
+
+template <>
+inline void Emitter::SetStreamablePrecision<double>(std::stringstream& stream) {
+  stream.precision(static_cast<std::streamsize>(GetDoublePrecision()));
+}
+
+// overloads of insertion
+inline Emitter& operator<<(Emitter& emitter, const std::string& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, bool v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, char v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned char v) {
+  return emitter.Write(static_cast<char>(v));
+}
+inline Emitter& operator<<(Emitter& emitter, const _Alias& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Anchor& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Tag& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Comment& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Null& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const Binary& b) {
+  return emitter.Write(b);
+}
+
+inline Emitter& operator<<(Emitter& emitter, const char* v) {
+  return emitter.Write(std::string(v));
+}
+
+inline Emitter& operator<<(Emitter& emitter, int v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned int v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, short v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned short v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, long v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned long v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, long long v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned long long v) {
+  return emitter.WriteIntegralType(v);
+}
+
+inline Emitter& operator<<(Emitter& emitter, float v) {
+  return emitter.WriteStreamable(v);
+}
+inline Emitter& operator<<(Emitter& emitter, double v) {
+  return emitter.WriteStreamable(v);
+}
+
+inline Emitter& operator<<(Emitter& emitter, EMITTER_MANIP value) {
+  return emitter.SetLocalValue(value);
+}
+
+inline Emitter& operator<<(Emitter& emitter, _Indent indent) {
+  return emitter.SetLocalIndent(indent);
+}
+
+inline Emitter& operator<<(Emitter& emitter, _Precision precision) {
+  return emitter.SetLocalPrecision(precision);
+}
+}
+
+#endif  // EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emitterdef.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emitterdef.h
@ -0,0 +1,16 @@
+#ifndef EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct EmitterNodeType {
+  enum value { NoType, Property, Scalar, FlowSeq, BlockSeq, FlowMap, BlockMap };
+};
+}
+
+#endif  // EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emittermanip.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emittermanip.h
@ -0,0 +1,137 @@
+#ifndef EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+namespace YAML {
+enum EMITTER_MANIP {
+  // general manipulators
+  Auto,
+  TagByKind,
+  Newline,
+
+  // output character set
+  EmitNonAscii,
+  EscapeNonAscii,
+
+  // string manipulators
+  // Auto, // duplicate
+  SingleQuoted,
+  DoubleQuoted,
+  Literal,
+
+  // bool manipulators
+  YesNoBool,      // yes, no
+  TrueFalseBool,  // true, false
+  OnOffBool,      // on, off
+  UpperCase,      // TRUE, N
+  LowerCase,      // f, yes
+  CamelCase,      // No, Off
+  LongBool,       // yes, On
+  ShortBool,      // y, t
+
+  // int manipulators
+  Dec,
+  Hex,
+  Oct,
+
+  // document manipulators
+  BeginDoc,
+  EndDoc,
+
+  // sequence manipulators
+  BeginSeq,
+  EndSeq,
+  Flow,
+  Block,
+
+  // map manipulators
+  BeginMap,
+  EndMap,
+  Key,
+  Value,
+  // Flow, // duplicate
+  // Block, // duplicate
+  // Auto, // duplicate
+  LongKey
+};
+
+struct _Indent {
+  _Indent(int value_) : value(value_) {}
+  int value;
+};
+
+inline _Indent Indent(int value) { return _Indent(value); }
+
+struct _Alias {
+  _Alias(const std::string& content_) : content(content_) {}
+  std::string content;
+};
+
+inline _Alias Alias(const std::string content) { return _Alias(content); }
+
+struct _Anchor {
+  _Anchor(const std::string& content_) : content(content_) {}
+  std::string content;
+};
+
+inline _Anchor Anchor(const std::string content) { return _Anchor(content); }
+
+struct _Tag {
+  struct Type {
+    enum value { Verbatim, PrimaryHandle, NamedHandle };
+  };
+
+  explicit _Tag(const std::string& prefix_, const std::string& content_,
+                Type::value type_)
+      : prefix(prefix_), content(content_), type(type_) {}
+  std::string prefix;
+  std::string content;
+  Type::value type;
+};
+
+inline _Tag VerbatimTag(const std::string content) {
+  return _Tag("", content, _Tag::Type::Verbatim);
+}
+
+inline _Tag LocalTag(const std::string content) {
+  return _Tag("", content, _Tag::Type::PrimaryHandle);
+}
+
+inline _Tag LocalTag(const std::string& prefix, const std::string content) {
+  return _Tag(prefix, content, _Tag::Type::NamedHandle);
+}
+
+inline _Tag SecondaryTag(const std::string content) {
+  return _Tag("", content, _Tag::Type::NamedHandle);
+}
+
+struct _Comment {
+  _Comment(const std::string& content_) : content(content_) {}
+  std::string content;
+};
+
+inline _Comment Comment(const std::string content) { return _Comment(content); }
+
+struct _Precision {
+  _Precision(int floatPrecision_, int doublePrecision_)
+      : floatPrecision(floatPrecision_), doublePrecision(doublePrecision_) {}
+
+  int floatPrecision;
+  int doublePrecision;
+};
+
+inline _Precision FloatPrecision(int n) { return _Precision(n, -1); }
+
+inline _Precision DoublePrecision(int n) { return _Precision(-1, n); }
+
+inline _Precision Precision(int n) { return _Precision(n, n); }
+}
+
+#endif  // EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emitterstyle.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emitterstyle.h
@ -0,0 +1,16 @@
+#ifndef EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct EmitterStyle {
+  enum value { Default, Block, Flow };
+};
+}
+
+#endif  // EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/eventhandler.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/eventhandler.h
@ -0,0 +1,40 @@
+#ifndef EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/emitterstyle.h"
+
+namespace YAML {
+struct Mark;
+
+class EventHandler {
+ public:
+  virtual ~EventHandler() {}
+
+  virtual void OnDocumentStart(const Mark& mark) = 0;
+  virtual void OnDocumentEnd() = 0;
+
+  virtual void OnNull(const Mark& mark, anchor_t anchor) = 0;
+  virtual void OnAlias(const Mark& mark, anchor_t anchor) = 0;
+  virtual void OnScalar(const Mark& mark, const std::string& tag,
+                        anchor_t anchor, const std::string& value) = 0;
+
+  virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
+                               anchor_t anchor, EmitterStyle::value style) = 0;
+  virtual void OnSequenceEnd() = 0;
+
+  virtual void OnMapStart(const Mark& mark, const std::string& tag,
+                          anchor_t anchor, EmitterStyle::value style) = 0;
+  virtual void OnMapEnd() = 0;
+};
+}
+
+#endif  // EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/exceptions.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/exceptions.h
@ -0,0 +1,267 @@
+#ifndef EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/mark.h"
+#include "yaml-cpp/traits.h"
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+// This is here for compatibility with older versions of Visual Studio
+// which don't support noexcept
+#ifdef _MSC_VER
+    #define YAML_CPP_NOEXCEPT _NOEXCEPT
+#else
+    #define YAML_CPP_NOEXCEPT noexcept
+#endif
+
+namespace YAML {
+// error messages
+namespace ErrorMsg {
+const char* const YAML_DIRECTIVE_ARGS =
+    "YAML directives must have exactly one argument";
+const char* const YAML_VERSION = "bad YAML version: ";
+const char* const YAML_MAJOR_VERSION = "YAML major version too large";
+const char* const REPEATED_YAML_DIRECTIVE = "repeated YAML directive";
+const char* const TAG_DIRECTIVE_ARGS =
+    "TAG directives must have exactly two arguments";
+const char* const REPEATED_TAG_DIRECTIVE = "repeated TAG directive";
+const char* const CHAR_IN_TAG_HANDLE =
+    "illegal character found while scanning tag handle";
+const char* const TAG_WITH_NO_SUFFIX = "tag handle with no suffix";
+const char* const END_OF_VERBATIM_TAG = "end of verbatim tag not found";
+const char* const END_OF_MAP = "end of map not found";
+const char* const END_OF_MAP_FLOW = "end of map flow not found";
+const char* const END_OF_SEQ = "end of sequence not found";
+const char* const END_OF_SEQ_FLOW = "end of sequence flow not found";
+const char* const MULTIPLE_TAGS =
+    "cannot assign multiple tags to the same node";
+const char* const MULTIPLE_ANCHORS =
+    "cannot assign multiple anchors to the same node";
+const char* const MULTIPLE_ALIASES =
+    "cannot assign multiple aliases to the same node";
+const char* const ALIAS_CONTENT =
+    "aliases can't have any content, *including* tags";
+const char* const INVALID_HEX = "bad character found while scanning hex number";
+const char* const INVALID_UNICODE = "invalid unicode: ";
+const char* const INVALID_ESCAPE = "unknown escape character: ";
+const char* const UNKNOWN_TOKEN = "unknown token";
+const char* const DOC_IN_SCALAR = "illegal document indicator in scalar";
+const char* const EOF_IN_SCALAR = "illegal EOF in scalar";
+const char* const CHAR_IN_SCALAR = "illegal character in scalar";
+const char* const TAB_IN_INDENTATION =
+    "illegal tab when looking for indentation";
+const char* const FLOW_END = "illegal flow end";
+const char* const BLOCK_ENTRY = "illegal block entry";
+const char* const MAP_KEY = "illegal map key";
+const char* const MAP_VALUE = "illegal map value";
+const char* const ALIAS_NOT_FOUND = "alias not found after *";
+const char* const ANCHOR_NOT_FOUND = "anchor not found after &";
+const char* const CHAR_IN_ALIAS =
+    "illegal character found while scanning alias";
+const char* const CHAR_IN_ANCHOR =
+    "illegal character found while scanning anchor";
+const char* const ZERO_INDENT_IN_BLOCK =
+    "cannot set zero indentation for a block scalar";
+const char* const CHAR_IN_BLOCK = "unexpected character in block scalar";
+const char* const AMBIGUOUS_ANCHOR =
+    "cannot assign the same alias to multiple nodes";
+const char* const UNKNOWN_ANCHOR = "the referenced anchor is not defined";
+
+const char* const INVALID_NODE =
+    "invalid node; this may result from using a map iterator as a sequence "
+    "iterator, or vice-versa";
+const char* const INVALID_SCALAR = "invalid scalar";
+const char* const KEY_NOT_FOUND = "key not found";
+const char* const BAD_CONVERSION = "bad conversion";
+const char* const BAD_DEREFERENCE = "bad dereference";
+const char* const BAD_SUBSCRIPT = "operator[] call on a scalar";
+const char* const BAD_PUSHBACK = "appending to a non-sequence";
+const char* const BAD_INSERT = "inserting in a non-convertible-to-map";
+
+const char* const UNMATCHED_GROUP_TAG = "unmatched group tag";
+const char* const UNEXPECTED_END_SEQ = "unexpected end sequence token";
+const char* const UNEXPECTED_END_MAP = "unexpected end map token";
+const char* const SINGLE_QUOTED_CHAR =
+    "invalid character in single-quoted string";
+const char* const INVALID_ANCHOR = "invalid anchor";
+const char* const INVALID_ALIAS = "invalid alias";
+const char* const INVALID_TAG = "invalid tag";
+const char* const BAD_FILE = "bad file";
+
+template <typename T>
+inline const std::string KEY_NOT_FOUND_WITH_KEY(
+    const T&, typename disable_if<is_numeric<T>>::type* = 0) {
+  return KEY_NOT_FOUND;
+}
+
+inline const std::string KEY_NOT_FOUND_WITH_KEY(const std::string& key) {
+  std::stringstream stream;
+  stream << KEY_NOT_FOUND << ": " << key;
+  return stream.str();
+}
+
+template <typename T>
+inline const std::string KEY_NOT_FOUND_WITH_KEY(
+    const T& key, typename enable_if<is_numeric<T>>::type* = 0) {
+  std::stringstream stream;
+  stream << KEY_NOT_FOUND << ": " << key;
+  return stream.str();
+}
+}
+
+class YAML_CPP_API Exception : public std::runtime_error {
+ public:
+  Exception(const Mark& mark_, const std::string& msg_)
+      : std::runtime_error(build_what(mark_, msg_)), mark(mark_), msg(msg_) {}
+  virtual ~Exception() YAML_CPP_NOEXCEPT;
+
+  Exception(const Exception&) = default;
+
+  Mark mark;
+  std::string msg;
+
+ private:
+  static const std::string build_what(const Mark& mark,
+                                      const std::string& msg) {
+    if (mark.is_null()) {
+      return msg.c_str();
+    }
+
+    std::stringstream output;
+    output << "yaml-cpp: error at line " << mark.line + 1 << ", column "
+           << mark.column + 1 << ": " << msg;
+    return output.str();
+  }
+};
+
+class YAML_CPP_API ParserException : public Exception {
+ public:
+  ParserException(const Mark& mark_, const std::string& msg_)
+      : Exception(mark_, msg_) {}
+  ParserException(const ParserException&) = default;
+  virtual ~ParserException() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API RepresentationException : public Exception {
+ public:
+  RepresentationException(const Mark& mark_, const std::string& msg_)
+      : Exception(mark_, msg_) {}
+  RepresentationException(const RepresentationException&) = default;
+  virtual ~RepresentationException() YAML_CPP_NOEXCEPT;
+};
+
+// representation exceptions
+class YAML_CPP_API InvalidScalar : public RepresentationException {
+ public:
+  InvalidScalar(const Mark& mark_)
+      : RepresentationException(mark_, ErrorMsg::INVALID_SCALAR) {}
+  InvalidScalar(const InvalidScalar&) = default;
+  virtual ~InvalidScalar() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API KeyNotFound : public RepresentationException {
+ public:
+  template <typename T>
+  KeyNotFound(const Mark& mark_, const T& key_)
+      : RepresentationException(mark_, ErrorMsg::KEY_NOT_FOUND_WITH_KEY(key_)) {
+  }
+  KeyNotFound(const KeyNotFound&) = default;
+  virtual ~KeyNotFound() YAML_CPP_NOEXCEPT;
+};
+
+template <typename T>
+class YAML_CPP_API TypedKeyNotFound : public KeyNotFound {
+ public:
+  TypedKeyNotFound(const Mark& mark_, const T& key_)
+      : KeyNotFound(mark_, key_), key(key_) {}
+  virtual ~TypedKeyNotFound() YAML_CPP_NOEXCEPT {}
+
+  T key;
+};
+
+template <typename T>
+inline TypedKeyNotFound<T> MakeTypedKeyNotFound(const Mark& mark,
+                                                const T& key) {
+  return TypedKeyNotFound<T>(mark, key);
+}
+
+class YAML_CPP_API InvalidNode : public RepresentationException {
+ public:
+  InvalidNode()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::INVALID_NODE) {}
+  InvalidNode(const InvalidNode&) = default;
+  virtual ~InvalidNode() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadConversion : public RepresentationException {
+ public:
+  explicit BadConversion(const Mark& mark_)
+      : RepresentationException(mark_, ErrorMsg::BAD_CONVERSION) {}
+  BadConversion(const BadConversion&) = default;
+  virtual ~BadConversion() YAML_CPP_NOEXCEPT;
+};
+
+template <typename T>
+class TypedBadConversion : public BadConversion {
+ public:
+  explicit TypedBadConversion(const Mark& mark_) : BadConversion(mark_) {}
+};
+
+class YAML_CPP_API BadDereference : public RepresentationException {
+ public:
+  BadDereference()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_DEREFERENCE) {}
+  BadDereference(const BadDereference&) = default;
+  virtual ~BadDereference() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadSubscript : public RepresentationException {
+ public:
+  BadSubscript()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_SUBSCRIPT) {}
+  BadSubscript(const BadSubscript&) = default;
+  virtual ~BadSubscript() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadPushback : public RepresentationException {
+ public:
+  BadPushback()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_PUSHBACK) {}
+  BadPushback(const BadPushback&) = default;
+  virtual ~BadPushback() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadInsert : public RepresentationException {
+ public:
+  BadInsert()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_INSERT) {}
+  BadInsert(const BadInsert&) = default;
+  virtual ~BadInsert() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API EmitterException : public Exception {
+ public:
+  EmitterException(const std::string& msg_)
+      : Exception(Mark::null_mark(), msg_) {}
+  EmitterException(const EmitterException&) = default;
+  virtual ~EmitterException() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadFile : public Exception {
+ public:
+  BadFile() : Exception(Mark::null_mark(), ErrorMsg::BAD_FILE) {}
+  BadFile(const BadFile&) = default;
+  virtual ~BadFile() YAML_CPP_NOEXCEPT;
+};
+}
+
+#undef YAML_CPP_NOEXCEPT
+
+#endif  // EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/mark.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/mark.h
@ -0,0 +1,29 @@
+#ifndef MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+struct YAML_CPP_API Mark {
+  Mark() : pos(0), line(0), column(0) {}
+
+  static const Mark null_mark() { return Mark(-1, -1, -1); }
+
+  bool is_null() const { return pos == -1 && line == -1 && column == -1; }
+
+  int pos;
+  int line, column;
+
+ private:
+  Mark(int pos_, int line_, int column_)
+      : pos(pos_), line(line_), column(column_) {}
+};
+}
+
+#endif  // MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/convert.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/convert.h
@ -0,0 +1,331 @@
+#ifndef NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <array>
+#include <limits>
+#include <list>
+#include <map>
+#include <sstream>
+#include <vector>
+
+#include "yaml-cpp/binary.h"
+#include "yaml-cpp/node/impl.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/null.h"
+
+namespace YAML {
+class Binary;
+struct _Null;
+template <typename T>
+struct convert;
+}  // namespace YAML
+
+namespace YAML {
+namespace conversion {
+inline bool IsInfinity(const std::string& input) {
+  return input == ".inf" || input == ".Inf" || input == ".INF" ||
+         input == "+.inf" || input == "+.Inf" || input == "+.INF";
+}
+
+inline bool IsNegativeInfinity(const std::string& input) {
+  return input == "-.inf" || input == "-.Inf" || input == "-.INF";
+}
+
+inline bool IsNaN(const std::string& input) {
+  return input == ".nan" || input == ".NaN" || input == ".NAN";
+}
+}
+
+// Node
+template <>
+struct convert<Node> {
+  static Node encode(const Node& rhs) { return rhs; }
+
+  static bool decode(const Node& node, Node& rhs) {
+    rhs.reset(node);
+    return true;
+  }
+};
+
+// std::string
+template <>
+struct convert<std::string> {
+  static Node encode(const std::string& rhs) { return Node(rhs); }
+
+  static bool decode(const Node& node, std::string& rhs) {
+    if (!node.IsScalar())
+      return false;
+    rhs = node.Scalar();
+    return true;
+  }
+};
+
+// C-strings can only be encoded
+template <>
+struct convert<const char*> {
+  static Node encode(const char*& rhs) { return Node(rhs); }
+};
+
+template <std::size_t N>
+struct convert<const char[N]> {
+  static Node encode(const char(&rhs)[N]) { return Node(rhs); }
+};
+
+template <>
+struct convert<_Null> {
+  static Node encode(const _Null& /* rhs */) { return Node(); }
+
+  static bool decode(const Node& node, _Null& /* rhs */) {
+    return node.IsNull();
+  }
+};
+
+#define YAML_DEFINE_CONVERT_STREAMABLE(type, negative_op)                \
+  template <>                                                            \
+  struct convert<type> {                                                 \
+    static Node encode(const type& rhs) {                                \
+      std::stringstream stream;                                          \
+      stream.precision(std::numeric_limits<type>::digits10 + 1);         \
+      stream << rhs;                                                     \
+      return Node(stream.str());                                         \
+    }                                                                    \
+                                                                         \
+    static bool decode(const Node& node, type& rhs) {                    \
+      if (node.Type() != NodeType::Scalar)                               \
+        return false;                                                    \
+      const std::string& input = node.Scalar();                          \
+      std::stringstream stream(input);                                   \
+      stream.unsetf(std::ios::dec);                                      \
+      if ((stream >> std::noskipws >> rhs) && (stream >> std::ws).eof()) \
+        return true;                                                     \
+      if (std::numeric_limits<type>::has_infinity) {                     \
+        if (conversion::IsInfinity(input)) {                             \
+          rhs = std::numeric_limits<type>::infinity();                   \
+          return true;                                                   \
+        } else if (conversion::IsNegativeInfinity(input)) {              \
+          rhs = negative_op std::numeric_limits<type>::infinity();       \
+          return true;                                                   \
+        }                                                                \
+      }                                                                  \
+                                                                         \
+      if (std::numeric_limits<type>::has_quiet_NaN &&                    \
+          conversion::IsNaN(input)) {                                    \
+        rhs = std::numeric_limits<type>::quiet_NaN();                    \
+        return true;                                                     \
+      }                                                                  \
+                                                                         \
+      return false;                                                      \
+    }                                                                    \
+  }
+
+#define YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(type) \
+  YAML_DEFINE_CONVERT_STREAMABLE(type, -)
+
+#define YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(type) \
+  YAML_DEFINE_CONVERT_STREAMABLE(type, +)
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(int);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(short);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long long);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned short);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned long);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned long long);
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(char);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(signed char);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned char);
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(float);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(double);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long double);
+
+#undef YAML_DEFINE_CONVERT_STREAMABLE_SIGNED
+#undef YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED
+#undef YAML_DEFINE_CONVERT_STREAMABLE
+
+// bool
+template <>
+struct convert<bool> {
+  static Node encode(bool rhs) { return rhs ? Node("true") : Node("false"); }
+
+  YAML_CPP_API static bool decode(const Node& node, bool& rhs);
+};
+
+// std::map
+template <typename K, typename V>
+struct convert<std::map<K, V>> {
+  static Node encode(const std::map<K, V>& rhs) {
+    Node node(NodeType::Map);
+    for (typename std::map<K, V>::const_iterator it = rhs.begin();
+         it != rhs.end(); ++it)
+      node.force_insert(it->first, it->second);
+    return node;
+  }
+
+  static bool decode(const Node& node, std::map<K, V>& rhs) {
+    if (!node.IsMap())
+      return false;
+
+    rhs.clear();
+    for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+      // workaround for GCC 3:
+      rhs[it->first.template as<K>()] = it->second.template as<V>();
+#else
+      rhs[it->first.as<K>()] = it->second.as<V>();
+#endif
+    return true;
+  }
+};
+
+// std::vector
+template <typename T>
+struct convert<std::vector<T>> {
+  static Node encode(const std::vector<T>& rhs) {
+    Node node(NodeType::Sequence);
+    for (typename std::vector<T>::const_iterator it = rhs.begin();
+         it != rhs.end(); ++it)
+      node.push_back(*it);
+    return node;
+  }
+
+  static bool decode(const Node& node, std::vector<T>& rhs) {
+    if (!node.IsSequence())
+      return false;
+
+    rhs.clear();
+    for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+      // workaround for GCC 3:
+      rhs.push_back(it->template as<T>());
+#else
+      rhs.push_back(it->as<T>());
+#endif
+    return true;
+  }
+};
+
+// std::list
+template <typename T>
+struct convert<std::list<T>> {
+  static Node encode(const std::list<T>& rhs) {
+    Node node(NodeType::Sequence);
+    for (typename std::list<T>::const_iterator it = rhs.begin();
+         it != rhs.end(); ++it)
+      node.push_back(*it);
+    return node;
+  }
+
+  static bool decode(const Node& node, std::list<T>& rhs) {
+    if (!node.IsSequence())
+      return false;
+
+    rhs.clear();
+    for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+      // workaround for GCC 3:
+      rhs.push_back(it->template as<T>());
+#else
+      rhs.push_back(it->as<T>());
+#endif
+    return true;
+  }
+};
+
+// std::array
+template <typename T, std::size_t N>
+struct convert<std::array<T, N>> {
+  static Node encode(const std::array<T, N>& rhs) {
+    Node node(NodeType::Sequence);
+    for (const auto& element : rhs) {
+      node.push_back(element);
+    }
+    return node;
+  }
+
+  static bool decode(const Node& node, std::array<T, N>& rhs) {
+    if (!isNodeValid(node)) {
+      return false;
+    }
+
+    for (auto i = 0u; i < node.size(); ++i) {
+#if defined(__GNUC__) && __GNUC__ < 4
+      // workaround for GCC 3:
+      rhs[i] = node[i].template as<T>();
+#else
+      rhs[i] = node[i].as<T>();
+#endif
+    }
+    return true;
+  }
+
+ private:
+  static bool isNodeValid(const Node& node) {
+    return node.IsSequence() && node.size() == N;
+  }
+};
+
+// std::pair
+template <typename T, typename U>
+struct convert<std::pair<T, U>> {
+  static Node encode(const std::pair<T, U>& rhs) {
+    Node node(NodeType::Sequence);
+    node.push_back(rhs.first);
+    node.push_back(rhs.second);
+    return node;
+  }
+
+  static bool decode(const Node& node, std::pair<T, U>& rhs) {
+    if (!node.IsSequence())
+      return false;
+    if (node.size() != 2)
+      return false;
+
+#if defined(__GNUC__) && __GNUC__ < 4
+    // workaround for GCC 3:
+    rhs.first = node[0].template as<T>();
+#else
+    rhs.first = node[0].as<T>();
+#endif
+#if defined(__GNUC__) && __GNUC__ < 4
+    // workaround for GCC 3:
+    rhs.second = node[1].template as<U>();
+#else
+    rhs.second = node[1].as<U>();
+#endif
+    return true;
+  }
+};
+
+// binary
+template <>
+struct convert<Binary> {
+  static Node encode(const Binary& rhs) {
+    return Node(EncodeBase64(rhs.data(), rhs.size()));
+  }
+
+  static bool decode(const Node& node, Binary& rhs) {
+    if (!node.IsScalar())
+      return false;
+
+    std::vector<unsigned char> data = DecodeBase64(node.Scalar());
+    if (data.empty() && !node.Scalar().empty())
+      return false;
+
+    rhs.swap(data);
+    return true;
+  }
+};
+}
+
+#endif  // NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/bool_type.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/bool_type.h
@ -0,0 +1,26 @@
+#ifndef NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+namespace detail {
+struct unspecified_bool {
+  struct NOT_ALLOWED;
+  static void true_value(NOT_ALLOWED*) {}
+};
+typedef void (*unspecified_bool_type)(unspecified_bool::NOT_ALLOWED*);
+}
+}
+
+#define YAML_CPP_OPERATOR_BOOL()                                            \
+  operator YAML::detail::unspecified_bool_type() const {                    \
+    return this->operator!() ? 0                                            \
+                             : &YAML::detail::unspecified_bool::true_value; \
+  }
+
+#endif  // NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/impl.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/impl.h
@ -0,0 +1,185 @@
+#ifndef NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/node/detail/node.h"
+#include "yaml-cpp/node/detail/node_data.h"
+#include <type_traits>
+
+namespace YAML {
+namespace detail {
+template <typename Key, typename Enable = void>
+struct get_idx {
+  static node* get(const std::vector<node*>& /* sequence */,
+                   const Key& /* key */, shared_memory_holder /* pMemory */) {
+    return 0;
+  }
+};
+
+template <typename Key>
+struct get_idx<Key,
+               typename std::enable_if<std::is_unsigned<Key>::value &&
+                                       !std::is_same<Key, bool>::value>::type> {
+  static node* get(const std::vector<node*>& sequence, const Key& key,
+                   shared_memory_holder /* pMemory */) {
+    return key < sequence.size() ? sequence[key] : 0;
+  }
+
+  static node* get(std::vector<node*>& sequence, const Key& key,
+                   shared_memory_holder pMemory) {
+   if (key > sequence.size() || (key > 0 && !sequence[key-1]->is_defined()))
+      return 0;
+    if (key == sequence.size())
+      sequence.push_back(&pMemory->create_node());
+    return sequence[key];
+  }
+};
+
+template <typename Key>
+struct get_idx<Key, typename std::enable_if<std::is_signed<Key>::value>::type> {
+  static node* get(const std::vector<node*>& sequence, const Key& key,
+                   shared_memory_holder pMemory) {
+    return key >= 0 ? get_idx<std::size_t>::get(
+                          sequence, static_cast<std::size_t>(key), pMemory)
+                    : 0;
+  }
+  static node* get(std::vector<node*>& sequence, const Key& key,
+                   shared_memory_holder pMemory) {
+    return key >= 0 ? get_idx<std::size_t>::get(
+                          sequence, static_cast<std::size_t>(key), pMemory)
+                    : 0;
+  }
+};
+
+template <typename T>
+inline bool node::equals(const T& rhs, shared_memory_holder pMemory) {
+  T lhs;
+  if (convert<T>::decode(Node(*this, pMemory), lhs)) {
+    return lhs == rhs;
+  }
+  return false;
+}
+
+inline bool node::equals(const char* rhs, shared_memory_holder pMemory) {
+  return equals<std::string>(rhs, pMemory);
+}
+
+// indexing
+template <typename Key>
+inline node* node_data::get(const Key& key,
+                            shared_memory_holder pMemory) const {
+  switch (m_type) {
+    case NodeType::Map:
+      break;
+    case NodeType::Undefined:
+    case NodeType::Null:
+      return NULL;
+    case NodeType::Sequence:
+      if (node* pNode = get_idx<Key>::get(m_sequence, key, pMemory))
+        return pNode;
+      return NULL;
+    case NodeType::Scalar:
+      throw BadSubscript();
+  }
+
+  for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
+    if (it->first->equals(key, pMemory)) {
+      return it->second;
+    }
+  }
+
+  return NULL;
+}
+
+template <typename Key>
+inline node& node_data::get(const Key& key, shared_memory_holder pMemory) {
+  switch (m_type) {
+    case NodeType::Map:
+      break;
+    case NodeType::Undefined:
+    case NodeType::Null:
+    case NodeType::Sequence:
+      if (node* pNode = get_idx<Key>::get(m_sequence, key, pMemory)) {
+        m_type = NodeType::Sequence;
+        return *pNode;
+      }
+
+      convert_to_map(pMemory);
+      break;
+    case NodeType::Scalar:
+      throw BadSubscript();
+  }
+
+  for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
+    if (it->first->equals(key, pMemory)) {
+      return *it->second;
+    }
+  }
+
+  node& k = convert_to_node(key, pMemory);
+  node& v = pMemory->create_node();
+  insert_map_pair(k, v);
+  return v;
+}
+
+template <typename Key>
+inline bool node_data::remove(const Key& key, shared_memory_holder pMemory) {
+  if (m_type != NodeType::Map)
+    return false;
+
+  for (kv_pairs::iterator it = m_undefinedPairs.begin();
+       it != m_undefinedPairs.end();) {
+    kv_pairs::iterator jt = std::next(it);
+    if (it->first->equals(key, pMemory))
+      m_undefinedPairs.erase(it);
+    it = jt;
+  }
+
+  for (node_map::iterator it = m_map.begin(); it != m_map.end(); ++it) {
+    if (it->first->equals(key, pMemory)) {
+      m_map.erase(it);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// map
+template <typename Key, typename Value>
+inline void node_data::force_insert(const Key& key, const Value& value,
+                                    shared_memory_holder pMemory) {
+  switch (m_type) {
+    case NodeType::Map:
+      break;
+    case NodeType::Undefined:
+    case NodeType::Null:
+    case NodeType::Sequence:
+      convert_to_map(pMemory);
+      break;
+    case NodeType::Scalar:
+      throw BadInsert();
+  }
+
+  node& k = convert_to_node(key, pMemory);
+  node& v = convert_to_node(value, pMemory);
+  insert_map_pair(k, v);
+}
+
+template <typename T>
+inline node& node_data::convert_to_node(const T& rhs,
+                                        shared_memory_holder pMemory) {
+  Node value = convert<T>::encode(rhs);
+  value.EnsureNodeExists();
+  pMemory->merge(*value.m_pMemory);
+  return *value.m_pNode;
+}
+}
+}
+
+#endif  // NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/iterator.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/iterator.h
@ -0,0 +1,92 @@
+#ifndef VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_iterator.h"
+#include <cstddef>
+#include <iterator>
+
+namespace YAML {
+namespace detail {
+struct iterator_value;
+
+template <typename V>
+class iterator_base : public std::iterator<std::forward_iterator_tag, V,
+                                           std::ptrdiff_t, V*, V> {
+
+ private:
+  template <typename>
+  friend class iterator_base;
+  struct enabler {};
+  typedef node_iterator base_type;
+
+  struct proxy {
+    explicit proxy(const V& x) : m_ref(x) {}
+    V* operator->() { return std::addressof(m_ref); }
+    operator V*() { return std::addressof(m_ref); }
+
+    V m_ref;
+  };
+
+ public:
+  typedef typename iterator_base::value_type value_type;
+
+ public:
+  iterator_base() : m_iterator(), m_pMemory() {}
+  explicit iterator_base(base_type rhs, shared_memory_holder pMemory)
+      : m_iterator(rhs), m_pMemory(pMemory) {}
+
+  template <class W>
+  iterator_base(const iterator_base<W>& rhs,
+                typename std::enable_if<std::is_convertible<W*, V*>::value,
+                                        enabler>::type = enabler())
+      : m_iterator(rhs.m_iterator), m_pMemory(rhs.m_pMemory) {}
+
+  iterator_base<V>& operator++() {
+    ++m_iterator;
+    return *this;
+  }
+
+  iterator_base<V> operator++(int) {
+    iterator_base<V> iterator_pre(*this);
+    ++(*this);
+    return iterator_pre;
+  }
+
+  template <typename W>
+  bool operator==(const iterator_base<W>& rhs) const {
+    return m_iterator == rhs.m_iterator;
+  }
+
+  template <typename W>
+  bool operator!=(const iterator_base<W>& rhs) const {
+    return m_iterator != rhs.m_iterator;
+  }
+
+  value_type operator*() const {
+    const typename base_type::value_type& v = *m_iterator;
+    if (v.pNode)
+      return value_type(Node(*v, m_pMemory));
+    if (v.first && v.second)
+      return value_type(Node(*v.first, m_pMemory), Node(*v.second, m_pMemory));
+    return value_type();
+  }
+
+  proxy operator->() const { return proxy(**this); }
+
+ private:
+  base_type m_iterator;
+  shared_memory_holder m_pMemory;
+};
+}
+}
+
+#endif  // VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/iterator_fwd.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/iterator_fwd.h
@ -0,0 +1,27 @@
+#ifndef VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+
+namespace detail {
+struct iterator_value;
+template <typename V>
+class iterator_base;
+}
+
+typedef detail::iterator_base<detail::iterator_value> iterator;
+typedef detail::iterator_base<const detail::iterator_value> const_iterator;
+}
+
+#endif  // VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/memory.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/memory.h
@ -0,0 +1,46 @@
+#ifndef VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <set>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/ptr.h"
+
+namespace YAML {
+namespace detail {
+class node;
+}  // namespace detail
+}  // namespace YAML
+
+namespace YAML {
+namespace detail {
+class YAML_CPP_API memory {
+ public:
+  node& create_node();
+  void merge(const memory& rhs);
+
+ private:
+  typedef std::set<shared_node> Nodes;
+  Nodes m_nodes;
+};
+
+class YAML_CPP_API memory_holder {
+ public:
+  memory_holder() : m_pMemory(new memory) {}
+
+  node& create_node() { return m_pMemory->create_node(); }
+  void merge(memory_holder& rhs);
+
+ private:
+  shared_memory m_pMemory;
+};
+}
+}
+
+#endif  // VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node.h
@ -0,0 +1,169 @@
+#ifndef NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_ref.h"
+#include <set>
+
+namespace YAML {
+namespace detail {
+class node {
+ public:
+  node() : m_pRef(new node_ref) {}
+  node(const node&) = delete;
+  node& operator=(const node&) = delete;
+
+  bool is(const node& rhs) const { return m_pRef == rhs.m_pRef; }
+  const node_ref* ref() const { return m_pRef.get(); }
+
+  bool is_defined() const { return m_pRef->is_defined(); }
+  const Mark& mark() const { return m_pRef->mark(); }
+  NodeType::value type() const { return m_pRef->type(); }
+
+  const std::string& scalar() const { return m_pRef->scalar(); }
+  const std::string& tag() const { return m_pRef->tag(); }
+  EmitterStyle::value style() const { return m_pRef->style(); }
+
+  template <typename T>
+  bool equals(const T& rhs, shared_memory_holder pMemory);
+  bool equals(const char* rhs, shared_memory_holder pMemory);
+
+  void mark_defined() {
+    if (is_defined())
+      return;
+
+    m_pRef->mark_defined();
+    for (nodes::iterator it = m_dependencies.begin();
+         it != m_dependencies.end(); ++it)
+      (*it)->mark_defined();
+    m_dependencies.clear();
+  }
+
+  void add_dependency(node& rhs) {
+    if (is_defined())
+      rhs.mark_defined();
+    else
+      m_dependencies.insert(&rhs);
+  }
+
+  void set_ref(const node& rhs) {
+    if (rhs.is_defined())
+      mark_defined();
+    m_pRef = rhs.m_pRef;
+  }
+  void set_data(const node& rhs) {
+    if (rhs.is_defined())
+      mark_defined();
+    m_pRef->set_data(*rhs.m_pRef);
+  }
+
+  void set_mark(const Mark& mark) { m_pRef->set_mark(mark); }
+
+  void set_type(NodeType::value type) {
+    if (type != NodeType::Undefined)
+      mark_defined();
+    m_pRef->set_type(type);
+  }
+  void set_null() {
+    mark_defined();
+    m_pRef->set_null();
+  }
+  void set_scalar(const std::string& scalar) {
+    mark_defined();
+    m_pRef->set_scalar(scalar);
+  }
+  void set_tag(const std::string& tag) {
+    mark_defined();
+    m_pRef->set_tag(tag);
+  }
+
+  // style
+  void set_style(EmitterStyle::value style) {
+    mark_defined();
+    m_pRef->set_style(style);
+  }
+
+  // size/iterator
+  std::size_t size() const { return m_pRef->size(); }
+
+  const_node_iterator begin() const {
+    return static_cast<const node_ref&>(*m_pRef).begin();
+  }
+  node_iterator begin() { return m_pRef->begin(); }
+
+  const_node_iterator end() const {
+    return static_cast<const node_ref&>(*m_pRef).end();
+  }
+  node_iterator end() { return m_pRef->end(); }
+
+  // sequence
+  void push_back(node& input, shared_memory_holder pMemory) {
+    m_pRef->push_back(input, pMemory);
+    input.add_dependency(*this);
+  }
+  void insert(node& key, node& value, shared_memory_holder pMemory) {
+    m_pRef->insert(key, value, pMemory);
+    key.add_dependency(*this);
+    value.add_dependency(*this);
+  }
+
+  // indexing
+  template <typename Key>
+  node* get(const Key& key, shared_memory_holder pMemory) const {
+    // NOTE: this returns a non-const node so that the top-level Node can wrap
+    // it, and returns a pointer so that it can be NULL (if there is no such
+    // key).
+    return static_cast<const node_ref&>(*m_pRef).get(key, pMemory);
+  }
+  template <typename Key>
+  node& get(const Key& key, shared_memory_holder pMemory) {
+    node& value = m_pRef->get(key, pMemory);
+    value.add_dependency(*this);
+    return value;
+  }
+  template <typename Key>
+  bool remove(const Key& key, shared_memory_holder pMemory) {
+    return m_pRef->remove(key, pMemory);
+  }
+
+  node* get(node& key, shared_memory_holder pMemory) const {
+    // NOTE: this returns a non-const node so that the top-level Node can wrap
+    // it, and returns a pointer so that it can be NULL (if there is no such
+    // key).
+    return static_cast<const node_ref&>(*m_pRef).get(key, pMemory);
+  }
+  node& get(node& key, shared_memory_holder pMemory) {
+    node& value = m_pRef->get(key, pMemory);
+    key.add_dependency(*this);
+    value.add_dependency(*this);
+    return value;
+  }
+  bool remove(node& key, shared_memory_holder pMemory) {
+    return m_pRef->remove(key, pMemory);
+  }
+
+  // map
+  template <typename Key, typename Value>
+  void force_insert(const Key& key, const Value& value,
+                    shared_memory_holder pMemory) {
+    m_pRef->force_insert(key, value, pMemory);
+  }
+
+ private:
+  shared_node_ref m_pRef;
+  typedef std::set<node*> nodes;
+  nodes m_dependencies;
+};
+}
+}
+
+#endif  // NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_data.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_data.h
@ -0,0 +1,127 @@
+#ifndef VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <list>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/detail/node_iterator.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+namespace detail {
+class node;
+}  // namespace detail
+}  // namespace YAML
+
+namespace YAML {
+namespace detail {
+class YAML_CPP_API node_data {
+ public:
+  node_data();
+  node_data(const node_data&) = delete;
+  node_data& operator=(const node_data&) = delete;
+
+  void mark_defined();
+  void set_mark(const Mark& mark);
+  void set_type(NodeType::value type);
+  void set_tag(const std::string& tag);
+  void set_null();
+  void set_scalar(const std::string& scalar);
+  void set_style(EmitterStyle::value style);
+
+  bool is_defined() const { return m_isDefined; }
+  const Mark& mark() const { return m_mark; }
+  NodeType::value type() const {
+    return m_isDefined ? m_type : NodeType::Undefined;
+  }
+  const std::string& scalar() const { return m_scalar; }
+  const std::string& tag() const { return m_tag; }
+  EmitterStyle::value style() const { return m_style; }
+
+  // size/iterator
+  std::size_t size() const;
+
+  const_node_iterator begin() const;
+  node_iterator begin();
+
+  const_node_iterator end() const;
+  node_iterator end();
+
+  // sequence
+  void push_back(node& node, shared_memory_holder pMemory);
+  void insert(node& key, node& value, shared_memory_holder pMemory);
+
+  // indexing
+  template <typename Key>
+  node* get(const Key& key, shared_memory_holder pMemory) const;
+  template <typename Key>
+  node& get(const Key& key, shared_memory_holder pMemory);
+  template <typename Key>
+  bool remove(const Key& key, shared_memory_holder pMemory);
+
+  node* get(node& key, shared_memory_holder pMemory) const;
+  node& get(node& key, shared_memory_holder pMemory);
+  bool remove(node& key, shared_memory_holder pMemory);
+
+  // map
+  template <typename Key, typename Value>
+  void force_insert(const Key& key, const Value& value,
+                    shared_memory_holder pMemory);
+
+ public:
+  static std::string empty_scalar;
+
+ private:
+  void compute_seq_size() const;
+  void compute_map_size() const;
+
+  void reset_sequence();
+  void reset_map();
+
+  void insert_map_pair(node& key, node& value);
+  void convert_to_map(shared_memory_holder pMemory);
+  void convert_sequence_to_map(shared_memory_holder pMemory);
+
+  template <typename T>
+  static node& convert_to_node(const T& rhs, shared_memory_holder pMemory);
+
+ private:
+  bool m_isDefined;
+  Mark m_mark;
+  NodeType::value m_type;
+  std::string m_tag;
+  EmitterStyle::value m_style;
+
+  // scalar
+  std::string m_scalar;
+
+  // sequence
+  typedef std::vector<node*> node_seq;
+  node_seq m_sequence;
+
+  mutable std::size_t m_seqSize;
+
+  // map
+  typedef std::vector<std::pair<node*, node*>> node_map;
+  node_map m_map;
+
+  typedef std::pair<node*, node*> kv_pair;
+  typedef std::list<kv_pair> kv_pairs;
+  mutable kv_pairs m_undefinedPairs;
+};
+}
+}
+
+#endif  // VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_iterator.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_iterator.h
@ -0,0 +1,180 @@
+#ifndef VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/ptr.h"
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <map>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+namespace detail {
+struct iterator_type {
+  enum value { NoneType, Sequence, Map };
+};
+
+template <typename V>
+struct node_iterator_value : public std::pair<V*, V*> {
+  typedef std::pair<V*, V*> kv;
+
+  node_iterator_value() : kv(), pNode(0) {}
+  explicit node_iterator_value(V& rhs) : kv(), pNode(&rhs) {}
+  explicit node_iterator_value(V& key, V& value) : kv(&key, &value), pNode(0) {}
+
+  V& operator*() const { return *pNode; }
+  V& operator->() const { return *pNode; }
+
+  V* pNode;
+};
+
+typedef std::vector<node*> node_seq;
+typedef std::vector<std::pair<node*, node*>> node_map;
+
+template <typename V>
+struct node_iterator_type {
+  typedef node_seq::iterator seq;
+  typedef node_map::iterator map;
+};
+
+template <typename V>
+struct node_iterator_type<const V> {
+  typedef node_seq::const_iterator seq;
+  typedef node_map::const_iterator map;
+};
+
+template <typename V>
+class node_iterator_base
+    : public std::iterator<std::forward_iterator_tag, node_iterator_value<V>,
+                           std::ptrdiff_t, node_iterator_value<V>*,
+                           node_iterator_value<V>> {
+ private:
+  struct enabler {};
+
+  struct proxy {
+    explicit proxy(const node_iterator_value<V>& x) : m_ref(x) {}
+    node_iterator_value<V>* operator->() { return std::addressof(m_ref); }
+    operator node_iterator_value<V>*() { return std::addressof(m_ref); }
+
+    node_iterator_value<V> m_ref;
+  };
+
+ public:
+  typedef typename node_iterator_type<V>::seq SeqIter;
+  typedef typename node_iterator_type<V>::map MapIter;
+  typedef node_iterator_value<V> value_type;
+
+  node_iterator_base()
+      : m_type(iterator_type::NoneType), m_seqIt(), m_mapIt(), m_mapEnd() {}
+  explicit node_iterator_base(SeqIter seqIt)
+      : m_type(iterator_type::Sequence),
+        m_seqIt(seqIt),
+        m_mapIt(),
+        m_mapEnd() {}
+  explicit node_iterator_base(MapIter mapIt, MapIter mapEnd)
+      : m_type(iterator_type::Map),
+        m_seqIt(),
+        m_mapIt(mapIt),
+        m_mapEnd(mapEnd) {
+    m_mapIt = increment_until_defined(m_mapIt);
+  }
+
+  template <typename W>
+  node_iterator_base(const node_iterator_base<W>& rhs,
+                     typename std::enable_if<std::is_convertible<W*, V*>::value,
+                                             enabler>::type = enabler())
+      : m_type(rhs.m_type),
+        m_seqIt(rhs.m_seqIt),
+        m_mapIt(rhs.m_mapIt),
+        m_mapEnd(rhs.m_mapEnd) {}
+
+  template <typename>
+  friend class node_iterator_base;
+
+  template <typename W>
+  bool operator==(const node_iterator_base<W>& rhs) const {
+    if (m_type != rhs.m_type)
+      return false;
+
+    switch (m_type) {
+      case iterator_type::NoneType:
+        return true;
+      case iterator_type::Sequence:
+        return m_seqIt == rhs.m_seqIt;
+      case iterator_type::Map:
+        return m_mapIt == rhs.m_mapIt;
+    }
+    return true;
+  }
+
+  template <typename W>
+  bool operator!=(const node_iterator_base<W>& rhs) const {
+    return !(*this == rhs);
+  }
+
+  node_iterator_base<V>& operator++() {
+    switch (m_type) {
+      case iterator_type::NoneType:
+        break;
+      case iterator_type::Sequence:
+        ++m_seqIt;
+        break;
+      case iterator_type::Map:
+        ++m_mapIt;
+        m_mapIt = increment_until_defined(m_mapIt);
+        break;
+    }
+    return *this;
+  }
+
+  node_iterator_base<V> operator++(int) {
+    node_iterator_base<V> iterator_pre(*this);
+    ++(*this);
+    return iterator_pre;
+  }
+
+  value_type operator*() const {
+    switch (m_type) {
+      case iterator_type::NoneType:
+        return value_type();
+      case iterator_type::Sequence:
+        return value_type(**m_seqIt);
+      case iterator_type::Map:
+        return value_type(*m_mapIt->first, *m_mapIt->second);
+    }
+    return value_type();
+  }
+
+  proxy operator->() const { return proxy(**this); }
+
+  MapIter increment_until_defined(MapIter it) {
+    while (it != m_mapEnd && !is_defined(it))
+      ++it;
+    return it;
+  }
+
+  bool is_defined(MapIter it) const {
+    return it->first->is_defined() && it->second->is_defined();
+  }
+
+ private:
+  typename iterator_type::value m_type;
+
+  SeqIter m_seqIt;
+  MapIter m_mapIt, m_mapEnd;
+};
+
+typedef node_iterator_base<node> node_iterator;
+typedef node_iterator_base<const node> const_node_iterator;
+}
+}
+
+#endif  // VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_ref.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_ref.h
@ -0,0 +1,98 @@
+#ifndef VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_data.h"
+
+namespace YAML {
+namespace detail {
+class node_ref {
+ public:
+  node_ref() : m_pData(new node_data) {}
+  node_ref(const node_ref&) = delete;
+  node_ref& operator=(const node_ref&) = delete;
+
+  bool is_defined() const { return m_pData->is_defined(); }
+  const Mark& mark() const { return m_pData->mark(); }
+  NodeType::value type() const { return m_pData->type(); }
+  const std::string& scalar() const { return m_pData->scalar(); }
+  const std::string& tag() const { return m_pData->tag(); }
+  EmitterStyle::value style() const { return m_pData->style(); }
+
+  void mark_defined() { m_pData->mark_defined(); }
+  void set_data(const node_ref& rhs) { m_pData = rhs.m_pData; }
+
+  void set_mark(const Mark& mark) { m_pData->set_mark(mark); }
+  void set_type(NodeType::value type) { m_pData->set_type(type); }
+  void set_tag(const std::string& tag) { m_pData->set_tag(tag); }
+  void set_null() { m_pData->set_null(); }
+  void set_scalar(const std::string& scalar) { m_pData->set_scalar(scalar); }
+  void set_style(EmitterStyle::value style) { m_pData->set_style(style); }
+
+  // size/iterator
+  std::size_t size() const { return m_pData->size(); }
+
+  const_node_iterator begin() const {
+    return static_cast<const node_data&>(*m_pData).begin();
+  }
+  node_iterator begin() { return m_pData->begin(); }
+
+  const_node_iterator end() const {
+    return static_cast<const node_data&>(*m_pData).end();
+  }
+  node_iterator end() { return m_pData->end(); }
+
+  // sequence
+  void push_back(node& node, shared_memory_holder pMemory) {
+    m_pData->push_back(node, pMemory);
+  }
+  void insert(node& key, node& value, shared_memory_holder pMemory) {
+    m_pData->insert(key, value, pMemory);
+  }
+
+  // indexing
+  template <typename Key>
+  node* get(const Key& key, shared_memory_holder pMemory) const {
+    return static_cast<const node_data&>(*m_pData).get(key, pMemory);
+  }
+  template <typename Key>
+  node& get(const Key& key, shared_memory_holder pMemory) {
+    return m_pData->get(key, pMemory);
+  }
+  template <typename Key>
+  bool remove(const Key& key, shared_memory_holder pMemory) {
+    return m_pData->remove(key, pMemory);
+  }
+
+  node* get(node& key, shared_memory_holder pMemory) const {
+    return static_cast<const node_data&>(*m_pData).get(key, pMemory);
+  }
+  node& get(node& key, shared_memory_holder pMemory) {
+    return m_pData->get(key, pMemory);
+  }
+  bool remove(node& key, shared_memory_holder pMemory) {
+    return m_pData->remove(key, pMemory);
+  }
+
+  // map
+  template <typename Key, typename Value>
+  void force_insert(const Key& key, const Value& value,
+                    shared_memory_holder pMemory) {
+    m_pData->force_insert(key, value, pMemory);
+  }
+
+ private:
+  shared_node_data m_pData;
+};
+}
+}
+
+#endif  // VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/emit.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/emit.h
@ -0,0 +1,32 @@
+#ifndef NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <iosfwd>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class Emitter;
+class Node;
+
+/**
+ * Emits the node to the given {@link Emitter}. If there is an error in writing,
+ * {@link Emitter#good} will return false.
+ */
+YAML_CPP_API Emitter& operator<<(Emitter& out, const Node& node);
+
+/** Emits the node to the given output stream. */
+YAML_CPP_API std::ostream& operator<<(std::ostream& out, const Node& node);
+
+/** Converts the node to a YAML string. */
+YAML_CPP_API std::string Dump(const Node& node);
+}  // namespace YAML
+
+#endif  // NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/impl.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/impl.h
@ -0,0 +1,448 @@
+#ifndef NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/detail/memory.h"
+#include "yaml-cpp/node/detail/node.h"
+#include "yaml-cpp/exceptions.h"
+#include <string>
+
+namespace YAML {
+inline Node::Node() : m_isValid(true), m_pNode(NULL) {}
+
+inline Node::Node(NodeType::value type)
+    : m_isValid(true),
+      m_pMemory(new detail::memory_holder),
+      m_pNode(&m_pMemory->create_node()) {
+  m_pNode->set_type(type);
+}
+
+template <typename T>
+inline Node::Node(const T& rhs)
+    : m_isValid(true),
+      m_pMemory(new detail::memory_holder),
+      m_pNode(&m_pMemory->create_node()) {
+  Assign(rhs);
+}
+
+inline Node::Node(const detail::iterator_value& rhs)
+    : m_isValid(rhs.m_isValid),
+      m_pMemory(rhs.m_pMemory),
+      m_pNode(rhs.m_pNode) {}
+
+inline Node::Node(const Node& rhs)
+    : m_isValid(rhs.m_isValid),
+      m_pMemory(rhs.m_pMemory),
+      m_pNode(rhs.m_pNode) {}
+
+inline Node::Node(Zombie) : m_isValid(false), m_pNode(NULL) {}
+
+inline Node::Node(detail::node& node, detail::shared_memory_holder pMemory)
+    : m_isValid(true), m_pMemory(pMemory), m_pNode(&node) {}
+
+inline Node::~Node() {}
+
+inline void Node::EnsureNodeExists() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  if (!m_pNode) {
+    m_pMemory.reset(new detail::memory_holder);
+    m_pNode = &m_pMemory->create_node();
+    m_pNode->set_null();
+  }
+}
+
+inline bool Node::IsDefined() const {
+  if (!m_isValid) {
+    return false;
+  }
+  return m_pNode ? m_pNode->is_defined() : true;
+}
+
+inline Mark Node::Mark() const {
+  if (!m_isValid) {
+    throw InvalidNode();
+  }
+  return m_pNode ? m_pNode->mark() : Mark::null_mark();
+}
+
+inline NodeType::value Node::Type() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->type() : NodeType::Null;
+}
+
+// access
+
+// template helpers
+template <typename T, typename S>
+struct as_if {
+  explicit as_if(const Node& node_) : node(node_) {}
+  const Node& node;
+
+  T operator()(const S& fallback) const {
+    if (!node.m_pNode)
+      return fallback;
+
+    T t;
+    if (convert<T>::decode(node, t))
+      return t;
+    return fallback;
+  }
+};
+
+template <typename S>
+struct as_if<std::string, S> {
+  explicit as_if(const Node& node_) : node(node_) {}
+  const Node& node;
+
+  std::string operator()(const S& fallback) const {
+    if (node.Type() != NodeType::Scalar)
+      return fallback;
+    return node.Scalar();
+  }
+};
+
+template <typename T>
+struct as_if<T, void> {
+  explicit as_if(const Node& node_) : node(node_) {}
+  const Node& node;
+
+  T operator()() const {
+    if (!node.m_pNode)
+      throw TypedBadConversion<T>(node.Mark());
+
+    T t;
+    if (convert<T>::decode(node, t))
+      return t;
+    throw TypedBadConversion<T>(node.Mark());
+  }
+};
+
+template <>
+struct as_if<std::string, void> {
+  explicit as_if(const Node& node_) : node(node_) {}
+  const Node& node;
+
+  std::string operator()() const {
+    if (node.Type() != NodeType::Scalar)
+      throw TypedBadConversion<std::string>(node.Mark());
+    return node.Scalar();
+  }
+};
+
+// access functions
+template <typename T>
+inline T Node::as() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return as_if<T, void>(*this)();
+}
+
+template <typename T, typename S>
+inline T Node::as(const S& fallback) const {
+  if (!m_isValid)
+    return fallback;
+  return as_if<T, S>(*this)(fallback);
+}
+
+inline const std::string& Node::Scalar() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->scalar() : detail::node_data::empty_scalar;
+}
+
+inline const std::string& Node::Tag() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->tag() : detail::node_data::empty_scalar;
+}
+
+inline void Node::SetTag(const std::string& tag) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_tag(tag);
+}
+
+inline EmitterStyle::value Node::Style() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->style() : EmitterStyle::Default;
+}
+
+inline void Node::SetStyle(EmitterStyle::value style) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_style(style);
+}
+
+// assignment
+inline bool Node::is(const Node& rhs) const {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  if (!m_pNode || !rhs.m_pNode)
+    return false;
+  return m_pNode->is(*rhs.m_pNode);
+}
+
+template <typename T>
+inline Node& Node::operator=(const T& rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  Assign(rhs);
+  return *this;
+}
+
+inline void Node::reset(const YAML::Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  m_pMemory = rhs.m_pMemory;
+  m_pNode = rhs.m_pNode;
+}
+
+template <typename T>
+inline void Node::Assign(const T& rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  AssignData(convert<T>::encode(rhs));
+}
+
+template <>
+inline void Node::Assign(const std::string& rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_scalar(rhs);
+}
+
+inline void Node::Assign(const char* rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_scalar(rhs);
+}
+
+inline void Node::Assign(char* rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_scalar(rhs);
+}
+
+inline Node& Node::operator=(const Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  if (is(rhs))
+    return *this;
+  AssignNode(rhs);
+  return *this;
+}
+
+inline void Node::AssignData(const Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  rhs.EnsureNodeExists();
+
+  m_pNode->set_data(*rhs.m_pNode);
+  m_pMemory->merge(*rhs.m_pMemory);
+}
+
+inline void Node::AssignNode(const Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  rhs.EnsureNodeExists();
+
+  if (!m_pNode) {
+    m_pNode = rhs.m_pNode;
+    m_pMemory = rhs.m_pMemory;
+    return;
+  }
+
+  m_pNode->set_ref(*rhs.m_pNode);
+  m_pMemory->merge(*rhs.m_pMemory);
+  m_pNode = rhs.m_pNode;
+}
+
+// size/iterator
+inline std::size_t Node::size() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->size() : 0;
+}
+
+inline const_iterator Node::begin() const {
+  if (!m_isValid)
+    return const_iterator();
+  return m_pNode ? const_iterator(m_pNode->begin(), m_pMemory)
+                 : const_iterator();
+}
+
+inline iterator Node::begin() {
+  if (!m_isValid)
+    return iterator();
+  return m_pNode ? iterator(m_pNode->begin(), m_pMemory) : iterator();
+}
+
+inline const_iterator Node::end() const {
+  if (!m_isValid)
+    return const_iterator();
+  return m_pNode ? const_iterator(m_pNode->end(), m_pMemory) : const_iterator();
+}
+
+inline iterator Node::end() {
+  if (!m_isValid)
+    return iterator();
+  return m_pNode ? iterator(m_pNode->end(), m_pMemory) : iterator();
+}
+
+// sequence
+template <typename T>
+inline void Node::push_back(const T& rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  push_back(Node(rhs));
+}
+
+inline void Node::push_back(const Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  rhs.EnsureNodeExists();
+
+  m_pNode->push_back(*rhs.m_pNode, m_pMemory);
+  m_pMemory->merge(*rhs.m_pMemory);
+}
+
+// helpers for indexing
+namespace detail {
+template <typename T>
+struct to_value_t {
+  explicit to_value_t(const T& t_) : t(t_) {}
+  const T& t;
+  typedef const T& return_type;
+
+  const T& operator()() const { return t; }
+};
+
+template <>
+struct to_value_t<const char*> {
+  explicit to_value_t(const char* t_) : t(t_) {}
+  const char* t;
+  typedef std::string return_type;
+
+  const std::string operator()() const { return t; }
+};
+
+template <>
+struct to_value_t<char*> {
+  explicit to_value_t(char* t_) : t(t_) {}
+  const char* t;
+  typedef std::string return_type;
+
+  const std::string operator()() const { return t; }
+};
+
+template <std::size_t N>
+struct to_value_t<char[N]> {
+  explicit to_value_t(const char* t_) : t(t_) {}
+  const char* t;
+  typedef std::string return_type;
+
+  const std::string operator()() const { return t; }
+};
+
+// converts C-strings to std::strings so they can be copied
+template <typename T>
+inline typename to_value_t<T>::return_type to_value(const T& t) {
+  return to_value_t<T>(t)();
+}
+}
+
+// indexing
+template <typename Key>
+inline const Node Node::operator[](const Key& key) const {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  detail::node* value = static_cast<const detail::node&>(*m_pNode)
+                            .get(detail::to_value(key), m_pMemory);
+  if (!value) {
+    return Node(ZombieNode);
+  }
+  return Node(*value, m_pMemory);
+}
+
+template <typename Key>
+inline Node Node::operator[](const Key& key) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  detail::node& value = m_pNode->get(detail::to_value(key), m_pMemory);
+  return Node(value, m_pMemory);
+}
+
+template <typename Key>
+inline bool Node::remove(const Key& key) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  return m_pNode->remove(detail::to_value(key), m_pMemory);
+}
+
+inline const Node Node::operator[](const Node& key) const {
+  if (!m_isValid || !key.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  key.EnsureNodeExists();
+  m_pMemory->merge(*key.m_pMemory);
+  detail::node* value =
+      static_cast<const detail::node&>(*m_pNode).get(*key.m_pNode, m_pMemory);
+  if (!value) {
+    return Node(ZombieNode);
+  }
+  return Node(*value, m_pMemory);
+}
+
+inline Node Node::operator[](const Node& key) {
+  if (!m_isValid || !key.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  key.EnsureNodeExists();
+  m_pMemory->merge(*key.m_pMemory);
+  detail::node& value = m_pNode->get(*key.m_pNode, m_pMemory);
+  return Node(value, m_pMemory);
+}
+
+inline bool Node::remove(const Node& key) {
+  if (!m_isValid || !key.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  key.EnsureNodeExists();
+  return m_pNode->remove(*key.m_pNode, m_pMemory);
+}
+
+// map
+template <typename Key, typename Value>
+inline void Node::force_insert(const Key& key, const Value& value) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->force_insert(detail::to_value(key), detail::to_value(value),
+                        m_pMemory);
+}
+
+// free functions
+inline bool operator==(const Node& lhs, const Node& rhs) { return lhs.is(rhs); }
+}
+
+#endif  // NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/iterator.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/iterator.h
@ -0,0 +1,31 @@
+#ifndef VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/detail/iterator_fwd.h"
+#include "yaml-cpp/node/detail/iterator.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+namespace detail {
+struct iterator_value : public Node, std::pair<Node, Node> {
+  iterator_value() {}
+  explicit iterator_value(const Node& rhs)
+      : Node(rhs),
+        std::pair<Node, Node>(Node(Node::ZombieNode), Node(Node::ZombieNode)) {}
+  explicit iterator_value(const Node& key, const Node& value)
+      : Node(Node::ZombieNode), std::pair<Node, Node>(key, value) {}
+};
+}
+}
+
+#endif  // VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/node.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/node.h
@ -0,0 +1,145 @@
+#ifndef NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <stdexcept>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/mark.h"
+#include "yaml-cpp/node/detail/bool_type.h"
+#include "yaml-cpp/node/detail/iterator_fwd.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+namespace detail {
+class node;
+class node_data;
+struct iterator_value;
+}  // namespace detail
+}  // namespace YAML
+
+namespace YAML {
+class YAML_CPP_API Node {
+ public:
+  friend class NodeBuilder;
+  friend class NodeEvents;
+  friend struct detail::iterator_value;
+  friend class detail::node;
+  friend class detail::node_data;
+  template <typename>
+  friend class detail::iterator_base;
+  template <typename T, typename S>
+  friend struct as_if;
+
+  typedef YAML::iterator iterator;
+  typedef YAML::const_iterator const_iterator;
+
+  Node();
+  explicit Node(NodeType::value type);
+  template <typename T>
+  explicit Node(const T& rhs);
+  explicit Node(const detail::iterator_value& rhs);
+  Node(const Node& rhs);
+  ~Node();
+
+  YAML::Mark Mark() const;
+  NodeType::value Type() const;
+  bool IsDefined() const;
+  bool IsNull() const { return Type() == NodeType::Null; }
+  bool IsScalar() const { return Type() == NodeType::Scalar; }
+  bool IsSequence() const { return Type() == NodeType::Sequence; }
+  bool IsMap() const { return Type() == NodeType::Map; }
+
+  // bool conversions
+  YAML_CPP_OPERATOR_BOOL()
+  bool operator!() const { return !IsDefined(); }
+
+  // access
+  template <typename T>
+  T as() const;
+  template <typename T, typename S>
+  T as(const S& fallback) const;
+  const std::string& Scalar() const;
+
+  const std::string& Tag() const;
+  void SetTag(const std::string& tag);
+
+  // style
+  // WARNING: This API might change in future releases.
+  EmitterStyle::value Style() const;
+  void SetStyle(EmitterStyle::value style);
+
+  // assignment
+  bool is(const Node& rhs) const;
+  template <typename T>
+  Node& operator=(const T& rhs);
+  Node& operator=(const Node& rhs);
+  void reset(const Node& rhs = Node());
+
+  // size/iterator
+  std::size_t size() const;
+
+  const_iterator begin() const;
+  iterator begin();
+
+  const_iterator end() const;
+  iterator end();
+
+  // sequence
+  template <typename T>
+  void push_back(const T& rhs);
+  void push_back(const Node& rhs);
+
+  // indexing
+  template <typename Key>
+  const Node operator[](const Key& key) const;
+  template <typename Key>
+  Node operator[](const Key& key);
+  template <typename Key>
+  bool remove(const Key& key);
+
+  const Node operator[](const Node& key) const;
+  Node operator[](const Node& key);
+  bool remove(const Node& key);
+
+  // map
+  template <typename Key, typename Value>
+  void force_insert(const Key& key, const Value& value);
+
+ private:
+  enum Zombie { ZombieNode };
+  explicit Node(Zombie);
+  explicit Node(detail::node& node, detail::shared_memory_holder pMemory);
+
+  void EnsureNodeExists() const;
+
+  template <typename T>
+  void Assign(const T& rhs);
+  void Assign(const char* rhs);
+  void Assign(char* rhs);
+
+  void AssignData(const Node& rhs);
+  void AssignNode(const Node& rhs);
+
+ private:
+  bool m_isValid;
+  mutable detail::shared_memory_holder m_pMemory;
+  mutable detail::node* m_pNode;
+};
+
+YAML_CPP_API bool operator==(const Node& lhs, const Node& rhs);
+
+YAML_CPP_API Node Clone(const Node& node);
+
+template <typename T>
+struct convert;
+}
+
+#endif  // NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/parse.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/parse.h
@ -0,0 +1,78 @@
+#ifndef VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class Node;
+
+/**
+ * Loads the input string as a single YAML document.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API Node Load(const std::string& input);
+
+/**
+ * Loads the input string as a single YAML document.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API Node Load(const char* input);
+
+/**
+ * Loads the input stream as a single YAML document.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API Node Load(std::istream& input);
+
+/**
+ * Loads the input file as a single YAML document.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ * @throws {@link BadFile} if the file cannot be loaded.
+ */
+YAML_CPP_API Node LoadFile(const std::string& filename);
+
+/**
+ * Loads the input string as a list of YAML documents.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API std::vector<Node> LoadAll(const std::string& input);
+
+/**
+ * Loads the input string as a list of YAML documents.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API std::vector<Node> LoadAll(const char* input);
+
+/**
+ * Loads the input stream as a list of YAML documents.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API std::vector<Node> LoadAll(std::istream& input);
+
+/**
+ * Loads the input file as a list of YAML documents.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ * @throws {@link BadFile} if the file cannot be loaded.
+ */
+YAML_CPP_API std::vector<Node> LoadAllFromFile(const std::string& filename);
+}  // namespace YAML
+
+#endif  // VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/ptr.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/ptr.h
@ -0,0 +1,29 @@
+#ifndef VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include <memory>
+
+namespace YAML {
+namespace detail {
+class node;
+class node_ref;
+class node_data;
+class memory;
+class memory_holder;
+
+typedef std::shared_ptr<node> shared_node;
+typedef std::shared_ptr<node_ref> shared_node_ref;
+typedef std::shared_ptr<node_data> shared_node_data;
+typedef std::shared_ptr<memory_holder> shared_memory_holder;
+typedef std::shared_ptr<memory> shared_memory;
+}
+}
+
+#endif  // VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/type.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/type.h
@ -0,0 +1,16 @@
+#ifndef VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct NodeType {
+  enum value { Undefined, Null, Scalar, Sequence, Map };
+};
+}
+
+#endif  // VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/noncopyable.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/noncopyable.h
@ -0,0 +1,25 @@
+#ifndef NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+// this is basically boost::noncopyable
+class YAML_CPP_API noncopyable {
+ protected:
+  noncopyable() {}
+  ~noncopyable() {}
+
+ private:
+  noncopyable(const noncopyable&);
+  const noncopyable& operator=(const noncopyable&);
+};
+}
+
+#endif  // NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/null.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/null.h
@ -0,0 +1,26 @@
+#ifndef NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include <string>
+
+namespace YAML {
+class Node;
+
+struct YAML_CPP_API _Null {};
+inline bool operator==(const _Null&, const _Null&) { return true; }
+inline bool operator!=(const _Null&, const _Null&) { return false; }
+
+YAML_CPP_API bool IsNull(const Node& node);  // old API only
+YAML_CPP_API bool IsNullString(const std::string& str);
+
+extern YAML_CPP_API _Null Null;
+}
+
+#endif  // NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/ostream_wrapper.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/ostream_wrapper.h
@ -0,0 +1,72 @@
+#ifndef OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class YAML_CPP_API ostream_wrapper {
+ public:
+  ostream_wrapper();
+  explicit ostream_wrapper(std::ostream& stream);
+  ~ostream_wrapper();
+
+  void write(const std::string& str);
+  void write(const char* str, std::size_t size);
+
+  void set_comment() { m_comment = true; }
+
+  const char* str() const {
+    if (m_pStream) {
+      return 0;
+    } else {
+      m_buffer[m_pos] = '\0';
+      return &m_buffer[0];
+    }
+  }
+
+  std::size_t row() const { return m_row; }
+  std::size_t col() const { return m_col; }
+  std::size_t pos() const { return m_pos; }
+  bool comment() const { return m_comment; }
+
+ private:
+  void update_pos(char ch);
+
+ private:
+  mutable std::vector<char> m_buffer;
+  std::ostream* const m_pStream;
+
+  std::size_t m_pos;
+  std::size_t m_row, m_col;
+  bool m_comment;
+};
+
+template <std::size_t N>
+inline ostream_wrapper& operator<<(ostream_wrapper& stream,
+                                   const char(&str)[N]) {
+  stream.write(str, N - 1);
+  return stream;
+}
+
+inline ostream_wrapper& operator<<(ostream_wrapper& stream,
+                                   const std::string& str) {
+  stream.write(str);
+  return stream;
+}
+
+inline ostream_wrapper& operator<<(ostream_wrapper& stream, char ch) {
+  stream.write(&ch, 1);
+  return stream;
+}
+}
+
+#endif  // OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/parser.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/parser.h
@ -0,0 +1,86 @@
+#ifndef PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <ios>
+#include <memory>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/noncopyable.h"
+
+namespace YAML {
+class EventHandler;
+class Node;
+class Scanner;
+struct Directives;
+struct Token;
+
+/**
+ * A parser turns a stream of bytes into one stream of "events" per YAML
+ * document in the input stream.
+ */
+class YAML_CPP_API Parser : private noncopyable {
+ public:
+  /** Constructs an empty parser (with no input. */
+  Parser();
+
+  /**
+   * Constructs a parser from the given input stream. The input stream must
+   * live as long as the parser.
+   */
+  explicit Parser(std::istream& in);
+
+  ~Parser();
+
+  /** Evaluates to true if the parser has some valid input to be read. */
+  explicit operator bool() const;
+
+  /**
+   * Resets the parser with the given input stream. Any existing state is
+   * erased.
+   */
+  void Load(std::istream& in);
+
+  /**
+   * Handles the next document by calling events on the {@code eventHandler}.
+   *
+   * @throw a ParserException on error.
+   * @return false if there are no more documents
+   */
+  bool HandleNextDocument(EventHandler& eventHandler);
+
+  void PrintTokens(std::ostream& out);
+
+ private:
+  /**
+   * Reads any directives that are next in the queue, setting the internal
+   * {@code m_pDirectives} state.
+   */
+  void ParseDirectives();
+
+  void HandleDirective(const Token& token);
+
+  /**
+   * Handles a "YAML" directive, which should be of the form 'major.minor' (like
+   * a version number).
+   */
+  void HandleYamlDirective(const Token& token);
+
+  /**
+   * Handles a "TAG" directive, which should be of the form 'handle prefix',
+   * where 'handle' is converted to 'prefix' in the file.
+   */
+  void HandleTagDirective(const Token& token);
+
+ private:
+  std::unique_ptr<Scanner> m_pScanner;
+  std::unique_ptr<Directives> m_pDirectives;
+};
+}
+
+#endif  // PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/stlemitter.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/stlemitter.h
@ -0,0 +1,51 @@
+#ifndef STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <vector>
+#include <list>
+#include <set>
+#include <map>
+
+namespace YAML {
+template <typename Seq>
+inline Emitter& EmitSeq(Emitter& emitter, const Seq& seq) {
+  emitter << BeginSeq;
+  for (typename Seq::const_iterator it = seq.begin(); it != seq.end(); ++it)
+    emitter << *it;
+  emitter << EndSeq;
+  return emitter;
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::vector<T>& v) {
+  return EmitSeq(emitter, v);
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::list<T>& v) {
+  return EmitSeq(emitter, v);
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::set<T>& v) {
+  return EmitSeq(emitter, v);
+}
+
+template <typename K, typename V>
+inline Emitter& operator<<(Emitter& emitter, const std::map<K, V>& m) {
+  typedef typename std::map<K, V> map;
+  emitter << BeginMap;
+  for (typename map::const_iterator it = m.begin(); it != m.end(); ++it)
+    emitter << Key << it->first << Value << it->second;
+  emitter << EndMap;
+  return emitter;
+}
+}
+
+#endif  // STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/traits.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/traits.h
@ -0,0 +1,103 @@
+#ifndef TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+template <typename>
+struct is_numeric {
+  enum { value = false };
+};
+
+template <>
+struct is_numeric<char> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned char> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<long int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned long int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<short int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned short int> {
+  enum { value = true };
+};
+#if defined(_MSC_VER) && (_MSC_VER < 1310)
+template <>
+struct is_numeric<__int64> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned __int64> {
+  enum { value = true };
+};
+#else
+template <>
+struct is_numeric<long long> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned long long> {
+  enum { value = true };
+};
+#endif
+template <>
+struct is_numeric<float> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<double> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<long double> {
+  enum { value = true };
+};
+
+template <bool, class T = void>
+struct enable_if_c {
+  typedef T type;
+};
+
+template <class T>
+struct enable_if_c<false, T> {};
+
+template <class Cond, class T = void>
+struct enable_if : public enable_if_c<Cond::value, T> {};
+
+template <bool, class T = void>
+struct disable_if_c {
+  typedef T type;
+};
+
+template <class T>
+struct disable_if_c<true, T> {};
+
+template <class Cond, class T = void>
+struct disable_if : public disable_if_c<Cond::value, T> {};
+}
+
+#endif  // TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/yaml.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/yaml.h
@ -0,0 +1,24 @@
+#ifndef YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/parser.h"
+#include "yaml-cpp/emitter.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/stlemitter.h"
+#include "yaml-cpp/exceptions.h"
+
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/impl.h"
+#include "yaml-cpp/node/convert.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/detail/impl.h"
+#include "yaml-cpp/node/parse.h"
+#include "yaml-cpp/node/emit.h"
+
+#endif  // YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/models/readme.md
+++ b/funasr/runtime/onnxruntime/models/readme.md
@ -1 +0,0 @@
-Place model.onnx here!
--- a/funasr/runtime/onnxruntime/models/vocab.txt
+++ b/funasr/runtime/onnxruntime/models/vocab.txt
--- a/funasr/runtime/onnxruntime/readme.md
+++ b/funasr/runtime/onnxruntime/readme.md
@ -1,6 +1,4 @@

-
-
 ## 快速使用

 ### Windows
@ -9,19 +7,16 @@

 Windows下已经预置fftw3及onnxruntime库

-
 ### Linux
 See the bottom of this page: Building Guidance

-
 ###  运行程序

-tester  /path/to/models/dir /path/to/wave/file
+tester  /path/to/models_dir /path/to/wave_file quantize(true or false)

- 例如： tester /data/models  /data/test.wav
-
-/data/models 需要包括如下两个文件： model.onnx 和vocab.txt
+例如： tester /data/models  /data/test.wav false

+/data/models 需要包括如下三个文件: config.yaml, am.mvn, model.onnx(or model_quant.onnx)

 ## 支持平台
 - Windows
@ -42,7 +37,7 @@ pip install --editable ./
 导出onnx模型，[详见](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/export)，参考示例，从modelscope中模型导出：

 ```shell
-python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize False
+python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
 ```

 ## Building Guidance for Linux/Unix
@ -66,7 +61,7 @@ centos: yum install fftw fftw-devel
 bash ./third_party/install_openblas.sh

 # build
- cmake  -DCMAKE_BUILD_TYPE=release .. -DONNXRUNTIME_DIR=/mnt/c/Users/ma139/RapidASR/cpp_onnx/build/onnxruntime-linux-x64-1.14.0
+ cmake  -DCMAKE_BUILD_TYPE=release .. -DONNXRUNTIME_DIR=/path/to/onnxruntime-linux-x64-1.14.0
 make

 # then in the subfolder tester of current direcotry, you will see a program, tester
@ -80,35 +75,11 @@ onnxruntime_xxx
 └───lib
 ```

-## 线程数与性能关系
-
-测试环境Rocky Linux 8，仅测试cpp版本结果（未测python版本），@acely 
-
-简述：
-在3台配置不同的机器上分别编译并测试，在fftw和onnxruntime版本都相同的前提下，识别同一个30分钟的音频文件，分别测试不同onnx线程数量的表现。
-
-![线程数关系](images/threadnum.png "Windows ASR")
-
-目前可以总结出大致规律：
-
-并非onnx线程数越多越好
-2线程比1线程提升显著，线程再多则提升较小
-线程数等于CPU物理核心数时效率最好
-实操建议：
-
-大部分场景用3-4线程性价比最高
-低配机器用2线程合适
-
-
-
-##  演示
-
-![Windows演示](images/demo.png "Windows ASR")
-
 ## 注意
 本程序只支持 采样率16000hz, 位深16bit的 **单声道** 音频。


 ## Acknowledge
-1. We acknowledge [mayong](https://github.com/RapidAI/RapidASR/tree/main/cpp_onnx) for contributing the onnxruntime(cpp api).
-2. We borrowed a lot of code from [FastASR](https://github.com/chenkui164/FastASR) for audio frontend and text-postprocess.
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
+2. We acknowledge [mayong](https://github.com/RapidAI/RapidASR/tree/main/cpp_onnx) for contributing the onnxruntime(cpp api).
+3. We borrowed a lot of code from [FastASR](https://github.com/chenkui164/FastASR) for audio frontend and text-postprocess.
--- a/funasr/runtime/onnxruntime/src/Audio.cpp
+++ b/funasr/runtime/onnxruntime/src/Audio.cpp
@ -3,7 +3,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <webrtc_vad.h>

 #include "Audio.h"

@ -138,9 +137,9 @@ bool Audio::loadwav(const char *filename)
    fp = fopen(filename, "rb");
    if (fp == nullptr)
        return false;
-    fseek(fp, 0, SEEK_END);
-    uint32_t nFileLen = ftell(fp);
-    fseek(fp, 44, SEEK_SET);
+    fseek(fp, 0, SEEK_END);  /*定位到文件末尾*/
+    uint32_t nFileLen = ftell(fp);  /*得到文件大小*/
+    fseek(fp, 44, SEEK_SET);  /*跳过wav文件头*/

    speech_len = (nFileLen - 44) / 2;
    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
@ -414,6 +413,7 @@ void Audio::padding()
 #define SPEECH_LEN_20S (16000 * 20)
 #define SPEECH_LEN_30S (16000 * 30)

+/*
 void Audio::split()
 {
    VadInst *handle = WebRtcVad_Create();
@ -472,3 +472,4 @@ void Audio::split()
    }
    WebRtcVad_Free(handle);
 }
+*/
--- a/funasr/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt
@ -10,7 +10,7 @@ add_library(rapidasr ${files})

 if(WIN32)

-        set(EXTRA_LIBS libfftw3f-3 webrtcvad)
+        set(EXTRA_LIBS libfftw3f-3 yaml-cpp)
        if(CMAKE_CL_64)
            target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
        else()
@ -21,7 +21,7 @@ if(WIN32)
        target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT)
 else()

-    set(EXTRA_LIBS fftw3f webrtcvad pthread)
+    set(EXTRA_LIBS fftw3f pthread yaml-cpp)
    target_include_directories(rapidasr PUBLIC "/usr/local/opt/fftw/include")
    target_link_directories(rapidasr PUBLIC "/usr/local/opt/fftw/lib")

--- a/funasr/runtime/onnxruntime/src/Model.cpp
+++ b/funasr/runtime/onnxruntime/src/Model.cpp
@ -1,11 +1,10 @@
 #include "precomp.h"

-Model *create_model(const char *path,int nThread)
+Model *create_model(const char *path, int nThread, bool quantize)
 {
    Model *mm;

-
-    mm = new paraformer::ModelImp(path, nThread);
+    mm = new paraformer::ModelImp(path, nThread, quantize);

    return mm;
 }
--- a/funasr/runtime/onnxruntime/src/Vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/Vocab.cpp
@ -1,4 +1,5 @@
 #include "Vocab.h"
+#include "yaml-cpp/yaml.h"

 #include <fstream>
 #include <iostream>
@ -11,25 +12,42 @@ using namespace std;
 Vocab::Vocab(const char *filename)
 {
    ifstream in(filename);
-    string line;
+    loadVocabFromYaml(filename);

+    /*
+    string line;
    if (in) // 有该文件
    {
        while (getline(in, line)) // line中不包括每行的换行符
        {
            vocab.push_back(line);
        }
-        // cout << vocab[1719] << endl;
    }
-    // else // 没有该文件
-    //{
-    //     cout << "no such file" << endl;
-    // }
+    else{
+        printf("Cannot load vocab from: %s, there must be file vocab.txt", filename);
+        exit(-1);
+    }
+    */
 }
 Vocab::~Vocab()
 {
 }

+void Vocab::loadVocabFromYaml(const char* filename){
+    YAML::Node config;
+    try{
+        config = YAML::LoadFile(filename);
+    }catch(...){
+        printf("error loading file, yaml file error or not exist.\n");
+        exit(-1);
+    }
+
+    YAML::Node myList = config["token_list"];
+    for (YAML::const_iterator it = myList.begin(); it != myList.end(); ++it) {
+        vocab.push_back(it->as<string>());
+    }
+}
+
 string Vocab::vector2string(vector<int> in)
 {
    int i;
@ -67,7 +85,6 @@ bool Vocab::isChinese(string ch)
    return false;
 }

-
 string Vocab::vector2stringV2(vector<int> in)
 {
    int i;
--- a/funasr/runtime/onnxruntime/src/Vocab.h
+++ b/funasr/runtime/onnxruntime/src/Vocab.h
@ -12,6 +12,7 @@ class Vocab {
    vector<string> vocab;
    bool isChinese(string ch);
    bool isEnglish(string ch);
+    void loadVocabFromYaml(const char* filename);

  public:
    Vocab(const char *filename);
--- a/funasr/runtime/onnxruntime/src/librapidasrapi.cpp
+++ b/funasr/runtime/onnxruntime/src/librapidasrapi.cpp
@ -4,24 +4,16 @@
 extern "C" {
 #endif

-
 	// APIs for qmasr
-	_RAPIDASRAPI RPASR_HANDLE  RapidAsrInit(const char* szModelDir, int nThreadNum)
+	_RAPIDASRAPI RPASR_HANDLE  RapidAsrInit(const char* szModelDir, int nThreadNum, bool quantize)
 	{
-
-
-		Model* mm = create_model(szModelDir, nThreadNum); 
-
+		Model* mm = create_model(szModelDir, nThreadNum, quantize);
 		return mm;
 	}

-
 	_RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
 	{
-
-
 		Model* pRecogObj = (Model*)handle;
-
 		if (!pRecogObj)
 			return nullptr;

@ -46,15 +38,12 @@ extern "C" {
 				fnCallback(nStep, nTotal);
 		}

-
 		return pResult;
 	}

 	_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
 	{
-
 		Model* pRecogObj = (Model*)handle;
-
 		if (!pRecogObj)
 			return nullptr;

@ -79,16 +68,12 @@ extern "C" {
 				fnCallback(nStep, nTotal);
 		}

-
 		return pResult;
-
 	}

 	_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback)
 	{
-
 		Model* pRecogObj = (Model*)handle;
-
 		if (!pRecogObj)
 			return nullptr;

@ -113,15 +98,12 @@ extern "C" {
 				fnCallback(nStep, nTotal);
 		}

-
 		return pResult;
-
 	}

 	_RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback)
 	{
 		Model* pRecogObj = (Model*)handle;
-
 		if (!pRecogObj)
 			return nullptr;

@ -146,9 +128,6 @@ extern "C" {
 				fnCallback(nStep, nTotal);
 		}
 	
-	
-
-
 		return pResult;
 	}

@ -158,7 +137,6 @@ extern "C" {
 			return 0;

 		return 1;
-		
 	}


@ -168,7 +146,6 @@ extern "C" {
 			return 0.0f;

 		return ((RPASR_RECOG_RESULT*)Result)->snippet_time;
-
 	}

 	_RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex)
@ -178,34 +155,26 @@ extern "C" {
 			return nullptr;

 		return pResult->msg.c_str();
-	
 	}

 	_RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result)
 	{
-
 		if (Result)
 		{
 			delete (RPASR_RECOG_RESULT*)Result;
-
 		}
 	}

 	_RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE handle)
 	{
-
 		Model* pRecogObj = (Model*)handle;

-
 		if (!pRecogObj)
 			return;

 		delete pRecogObj;
-
 	}

-
-
 #ifdef __cplusplus 

 }
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
+++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
@ -3,14 +3,25 @@
 using namespace std;
 using namespace paraformer;

-ModelImp::ModelImp(const char* path,int nNumThread)
+ModelImp::ModelImp(const char* path,int nNumThread, bool quantize)
 {
-    string model_path = pathAppend(path, "model.onnx");
-    string vocab_path = pathAppend(path, "vocab.txt");
+    string model_path;
+    string cmvn_path;
+    string config_path;
+
+    if(quantize)
+    {
+        model_path = pathAppend(path, "model_quant.onnx");
+    }else{
+        model_path = pathAppend(path, "model.onnx");
+    }
+    cmvn_path = pathAppend(path, "am.mvn");
+    config_path = pathAppend(path, "config.yaml");

    fe = new FeatureExtract(3);

-    sessionOptions.SetInterOpNumThreads(nNumThread);
+    //sessionOptions.SetInterOpNumThreads(1);
+    sessionOptions.SetIntraOpNumThreads(nNumThread);
    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

 #ifdef _WIN32
@ -35,7 +46,8 @@ ModelImp::ModelImp(const char* path,int nNumThread)
        m_szInputNames.push_back(item.c_str());
    for (auto& item : m_strOutputNames)
        m_szOutputNames.push_back(item.c_str());
-    vocab = new Vocab(vocab_path.c_str());
+    vocab = new Vocab(config_path.c_str());
+    load_cmvn(cmvn_path.c_str());
 }

 ModelImp::~ModelImp()
@ -80,16 +92,49 @@ void ModelImp::apply_lfr(Tensor<float>*& din)
    din = tmp;
 }

+void ModelImp::load_cmvn(const char *filename)
+{
+    ifstream cmvn_stream(filename);
+    string line;
+
+    while (getline(cmvn_stream, line)) {
+        istringstream iss(line);
+        vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
+        if (line_item[0] == "<AddShift>") {
+            getline(cmvn_stream, line);
+            istringstream means_lines_stream(line);
+            vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
+            if (means_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < means_lines.size() - 1; j++) {
+                    means_list.push_back(stof(means_lines[j]));
+                }
+                continue;
+            }
+        }
+        else if (line_item[0] == "<Rescale>") {
+            getline(cmvn_stream, line);
+            istringstream vars_lines_stream(line);
+            vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
+            if (vars_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < vars_lines.size() - 1; j++) {
+                    vars_list.push_back(stof(vars_lines[j])*scale);
+                }
+                continue;
+            }
+        }
+    }
+}
+
 void ModelImp::apply_cmvn(Tensor<float>* din)
 {
    const float* var;
    const float* mean;
-    float scale = 22.6274169979695;
+    var = vars_list.data();
+    mean= means_list.data();
+
    int m = din->size[2];
    int n = din->size[3];

-    var = (const float*)paraformer_cmvn_var_hex;
-    mean = (const float*)paraformer_cmvn_mean_hex;
    for (int i = 0; i < m; i++) {
        for (int j = 0; j < n; j++) {
            int idx = i * n + j;
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.h
+++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.h
@ -4,10 +4,6 @@
 #ifndef PARAFORMER_MODELIMP_H
 #define PARAFORMER_MODELIMP_H

-
-
-
-
 namespace paraformer {

    class ModelImp : public Model {
@ -15,11 +11,14 @@ namespace paraformer {
        FeatureExtract* fe;

        Vocab* vocab;
+        vector<float> means_list;
+        vector<float> vars_list;
+        const float scale = 22.6274169979695;

        void apply_lfr(Tensor<float>*& din);
        void apply_cmvn(Tensor<float>* din);
+        void load_cmvn(const char *filename);

-        
        string greedy_search( float* in, int nLen);

 #ifdef _WIN_X86
@ -39,7 +38,7 @@ namespace paraformer {
        //string m_strOutputName, m_strOutputNameLen;

    public:
-        ModelImp(const char* path, int nNumThread=0);
+        ModelImp(const char* path, int nNumThread=0, bool quantize=false);
        ~ModelImp();
        void reset();
        string forward_chunk(float* din, int len, int flag);
--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@ -1,13 +1,15 @@
 #pragma once 
 // system 

-#include <iostream>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <deque>
 #include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iterator>
 #include <list>
 #include <locale.h>
 #include <vector>
--- a/funasr/runtime/onnxruntime/tester/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/tester/CMakeLists.txt
@ -13,8 +13,11 @@ set(EXTRA_LIBS rapidasr)

 include_directories(${CMAKE_SOURCE_DIR}/include)
 set(EXECNAME "tester")
+set(EXECNAMERTF "tester_rtf")

 add_executable(${EXECNAME} "tester.cpp")
 target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})

+add_executable(${EXECNAMERTF} "tester_rtf.cpp")
+target_link_libraries(${EXECNAMERTF} PUBLIC ${EXTRA_LIBS})

--- a/funasr/runtime/onnxruntime/tester/tester.cpp
+++ b/funasr/runtime/onnxruntime/tester/tester.cpp
@ -9,41 +9,40 @@

 #include <iostream>
 #include <fstream>
+#include <sstream>
 using namespace std;

 int main(int argc, char *argv[])
 {

-    if (argc < 2)
+    if (argc < 4)
    {
-        printf("Usage: %s /path/to/model_dir /path/to/wav/file", argv[0]);
+        printf("Usage: %s /path/to/model_dir /path/to/wav/file quantize(true or false) \n", argv[0]);
        exit(-1);
    }
    struct timeval start, end;
    gettimeofday(&start, NULL);
    int nThreadNum = 4;
-    RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum);
+    // is quantize
+    bool quantize = false;
+    istringstream(argv[3]) >> boolalpha >> quantize;
+    RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum, quantize);

    if (!AsrHanlde)
    {
        printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
        exit(-1);
    }
-    
- 

    gettimeofday(&end, NULL);
    long seconds = (end.tv_sec - start.tv_sec);
    long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
    printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);

-
-
    gettimeofday(&start, NULL);
    float snippet_time = 0.0f;

-
-     RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL);
+    RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL);

    gettimeofday(&end, NULL);
   
@ -52,8 +51,7 @@ int main(int argc, char *argv[])
        string msg = RapidAsrGetResult(Result, 0);
        setbuf(stdout, NULL);
        cout << "Result: \"";
-        cout << msg << endl;
-        cout << "\"." << endl;
+        cout << msg << "\"." << endl;
        snippet_time = RapidAsrGetRetSnippetTime(Result);
        RapidAsrFreeResult(Result);
    }
@ -62,7 +60,6 @@ int main(int argc, char *argv[])
        cout <<"no return data!";
    }
 
- 
    //char* buff = nullptr;
    //int len = 0;
    //ifstream ifs(argv[2], std::ios::binary | std::ios::in);
@ -101,13 +98,11 @@ int main(int argc, char *argv[])
    //   
    //delete[]buff;
    //}
-
 
    printf("Audio length %lfs.\n", (double)snippet_time);
    seconds = (end.tv_sec - start.tv_sec);
    long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
    printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000);
-
    printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000));

    RapidAsrUninit(AsrHanlde);
--- a/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
+++ b/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
@ -0,0 +1,99 @@
+
+#ifndef _WIN32
+#include <sys/time.h>
+#else
+#include <win_func.h>
+#endif
+
+#include "librapidasrapi.h"
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+int main(int argc, char *argv[])
+{
+
+    if (argc < 4)
+    {
+        printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) \n", argv[0]);
+        exit(-1);
+    }
+
+    // read wav.scp
+    vector<string> wav_list;
+    ifstream in(argv[2]);
+    if (!in.is_open()) {
+        printf("Failed to open file: %s", argv[2]);
+        return 0;
+    }
+    string line;
+    while(getline(in, line))
+    {
+        istringstream iss(line);
+        string column1, column2;
+        iss >> column1 >> column2;
+        wav_list.push_back(column2); 
+    }
+    in.close();
+
+    // model init
+    struct timeval start, end;
+    gettimeofday(&start, NULL);
+    int nThreadNum = 1;
+    // is quantize
+    bool quantize = false;
+    istringstream(argv[3]) >> boolalpha >> quantize;
+
+    RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum, quantize);
+    if (!AsrHanlde)
+    {
+        printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
+        exit(-1);
+    }
+    gettimeofday(&end, NULL);
+    long seconds = (end.tv_sec - start.tv_sec);
+    long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+    printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
+
+    // warm up
+    for (size_t i = 0; i < 30; i++)
+    {
+        RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, wav_list[0].c_str(), RASR_NONE, NULL);
+    }
+
+    // forward
+    float snippet_time = 0.0f;
+    float total_length = 0.0f;
+    long total_time = 0.0f;
+    
+    for (size_t i = 0; i < wav_list.size(); i++)
+    {
+        gettimeofday(&start, NULL);
+        RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, wav_list[i].c_str(), RASR_NONE, NULL);
+        gettimeofday(&end, NULL);
+        seconds = (end.tv_sec - start.tv_sec);
+        long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+        total_time += taking_micros;
+
+        if(Result){
+            string msg = RapidAsrGetResult(Result, 0);
+            printf("Result: %s \n", msg);
+
+            snippet_time = RapidAsrGetRetSnippetTime(Result);
+            total_length += snippet_time;
+            RapidAsrFreeResult(Result);
+        }else{
+            cout <<"No return data!";
+        }
+    }
+
+    printf("total_time_wav %ld ms.\n", (long)(total_length * 1000));
+    printf("total_time_comput %ld ms.\n", total_time / 1000);
+    printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000));
+
+    RapidAsrUninit(AsrHanlde);
+    return 0;
+}
--- a/Show More
+++ b/Show More