Merge branch 'main' into main

2025-09-15 14:48:36 +08:00 · 2023-04-06 10:53:13 +08:00 · 2023-04-06 10:53:13 +08:00 · 0eacba96a1
commit 0eacba96a1
parent 6d09603442 284c496743
594 changed files with 186074 additions and 16593 deletions
--- a/egs/aishell/conformer/run.sh
+++ b/egs/aishell/conformer/run.sh
@ -217,7 +217,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
        if [ -n "${inference_config}" ]; then
            _opts+="--config ${inference_config} "
        fi
-        ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1: "${_nj}" "${_logdir}"/asr_inference.JOB.log \
+        ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
            python -m funasr.bin.asr_inference_launch \
                --batch_size 1 \
                --ngpu "${_ngpu}" \
--- a/egs/aishell/transformer/utils/cmvn_converter.py
+++ b/egs/aishell/transformer/utils/cmvn_converter.py
@ -0,0 +1,53 @@
+import argparse
+import json
+import numpy as np
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="cmvn converter",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--cmvn-json",
+        "-c",
+        default=False,
+        required=True,
+        type=str,
+        help="cmvn json file",
+    )
+    parser.add_argument(
+        "--am-mvn",
+        "-a",
+        default=False,
+        required=True,
+        type=str,
+        help="am mvn file",
+    )
+    return parser
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    with open(args.cmvn_json, "r") as fin:
+        cmvn_dict = json.load(fin)
+
+    mean_stats = np.array(cmvn_dict["mean_stats"])
+    var_stats = np.array(cmvn_dict["var_stats"])
+    total_frame = np.array(cmvn_dict["total_frames"])
+
+    mean = -1.0 * mean_stats / total_frame
+    var = 1.0 / np.sqrt(var_stats / total_frame - mean * mean)
+    dims = mean.shape[0]
+    with open(args.am_mvn, 'w') as fout:
+        fout.write("<Nnet>" + "\n" + "<Splice> " + str(dims) + " " + str(dims) + '\n' + "[ 0 ]" + "\n" + "<AddShift> " + str(dims) + " " + str(dims) + "\n")
+        mean_str = str(list(mean)).replace(',', '').replace('[', '[ ').replace(']', ' ]')
+        fout.write("<LearnRateCoef> 0 " + mean_str + '\n')
+        fout.write("<Rescale> " + str(dims) + " " + str(dims) + '\n')
+        var_str = str(list(var)).replace(',', '').replace('[', '[ ').replace(']', ' ]')
+        fout.write("<LearnRateCoef> 0 " + var_str + '\n')
+        fout.write("</Nnet>" + '\n')
+
+if __name__ == '__main__':
+    main()
--- a/egs/aishell/transformer/utils/compute_wer.py
+++ b/egs/aishell/transformer/utils/compute_wer.py
@ -45,8 +45,8 @@ def compute_wer(ref_file,
           if out_item['wrong'] > 0:
               rst['wrong_sentences'] += 1
           cer_detail_writer.write(hyp_key + print_cer_detail(out_item) + '\n')
-           cer_detail_writer.write("ref:" + '\t' + "".join(ref_dict[hyp_key]) + '\n')
-           cer_detail_writer.write("hyp:" + '\t' + "".join(hyp_dict[hyp_key]) + '\n')
+           cer_detail_writer.write("ref:" + '\t' + " ".join(list(map(lambda x: x.lower(), ref_dict[hyp_key]))) + '\n')
+           cer_detail_writer.write("hyp:" + '\t' + " ".join(list(map(lambda x: x.lower(), hyp_dict[hyp_key]))) + '\n')

    if rst['Wrd'] > 0:
        rst['Err'] = round(rst['wrong_words'] * 100 / rst['Wrd'], 2)
--- a/egs/librispeech/conformer/conf/decode_asr_transformer.yaml
+++ b/egs/librispeech/conformer/conf/decode_asr_transformer.yaml
@ -0,0 +1,6 @@
+beam_size: 10
+penalty: 0.0
+maxlenratio: 0.0
+minlenratio: 0.0
+ctc_weight: 0.5
+lm_weight: 0.7
--- a/egs/librispeech/conformer/conf/train_asr_conformer.yaml
+++ b/egs/librispeech/conformer/conf/train_asr_conformer.yaml
@ -0,0 +1,80 @@
+encoder: conformer
+encoder_conf:
+    output_size: 512
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 12
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: conv2d
+    normalize_before: true
+    macaron_style: true
+    rel_pos_type: latest
+    pos_enc_layer_type: rel_pos
+    selfattention_layer_type: rel_selfattn
+    activation_type: swish
+    use_cnn_module: true
+    cnn_module_kernel: 31
+
+decoder: transformer
+decoder_conf:
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.1
+    src_attention_dropout_rate: 0.1
+
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1
+    length_normalized_loss: false
+
+accum_grad: 2
+max_epoch: 50
+patience: none
+init: none
+best_model_criterion:
+-   - valid
+    - acc
+    - max
+keep_nbest_models: 10
+
+optim: adam
+optim_conf:
+    lr: 0.0025
+    weight_decay: 0.000001
+scheduler: warmuplr
+scheduler_conf:
+    warmup_steps: 40000
+
+specaug: specaug
+specaug_conf:
+    apply_time_warp: true
+    time_warp_window: 5
+    time_warp_mode: bicubic
+    apply_freq_mask: true
+    freq_mask_width_range:
+    - 0
+    - 27
+    num_freq_mask: 2
+    apply_time_mask: true
+    time_mask_width_ratio_range:
+    - 0.
+    - 0.05
+    num_time_mask: 10
+
+dataset_conf:
+    shuffle: True
+    shuffle_conf:
+        shuffle_size: 1024
+        sort_size: 500
+    batch_conf:
+        batch_type: token
+        batch_size: 10000
+    num_workers: 8
+
+log_interval: 50
+normalize: None
--- a/egs/librispeech/conformer/conf/train_asr_conformer_uttnorm.yaml
+++ b/egs/librispeech/conformer/conf/train_asr_conformer_uttnorm.yaml
@ -0,0 +1,80 @@
+encoder: conformer
+encoder_conf:
+    output_size: 512
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 12
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: conv2d
+    normalize_before: true
+    macaron_style: true
+    rel_pos_type: latest
+    pos_enc_layer_type: rel_pos
+    selfattention_layer_type: rel_selfattn
+    activation_type: swish
+    use_cnn_module: true
+    cnn_module_kernel: 31
+
+decoder: transformer
+decoder_conf:
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.1
+    src_attention_dropout_rate: 0.1
+
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1
+    length_normalized_loss: false
+
+accum_grad: 2
+max_epoch: 50
+patience: none
+init: none
+best_model_criterion:
+-   - valid
+    - acc
+    - max
+keep_nbest_models: 10
+
+optim: adam
+optim_conf:
+    lr: 0.0025
+    weight_decay: 0.000001
+scheduler: warmuplr
+scheduler_conf:
+    warmup_steps: 40000
+
+specaug: specaug
+specaug_conf:
+    apply_time_warp: true
+    time_warp_window: 5
+    time_warp_mode: bicubic
+    apply_freq_mask: true
+    freq_mask_width_range:
+    - 0
+    - 27
+    num_freq_mask: 2
+    apply_time_mask: true
+    time_mask_width_ratio_range:
+    - 0.
+    - 0.05
+    num_time_mask: 10
+
+dataset_conf:
+    shuffle: True
+    shuffle_conf:
+        shuffle_size: 1024
+        sort_size: 500
+    batch_conf:
+        batch_type: token
+        batch_size: 10000
+    num_workers: 8
+
+log_interval: 50
+normalize: utterance_mvn
--- a/egs/librispeech/conformer/local/data_prep_librispeech.sh
+++ b/egs/librispeech/conformer/local/data_prep_librispeech.sh
@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+
+# Copyright 2014  Vassil Panayotov
+#           2014  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <src-dir> <dst-dir>"
+  echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean"
+  exit 1
+fi
+
+src=$1
+dst=$2
+
+# all utterances are FLAC compressed
+if ! which flac >&/dev/null; then
+   echo "Please install 'flac' on ALL worker nodes!"
+   exit 1
+fi
+
+spk_file=$src/../SPEAKERS.TXT
+
+mkdir -p $dst || exit 1
+
+[ ! -d $src ] && echo "$0: no such directory $src" && exit 1
+[ ! -f $spk_file ] && echo "$0: expected file $spk_file to exist" && exit 1
+
+
+wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp
+trans=$dst/text; [[ -f "$trans" ]] && rm $trans
+
+for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
+  reader=$(basename $reader_dir)
+  if ! [ $reader -eq $reader ]; then  # not integer.
+    echo "$0: unexpected subdirectory name $reader"
+    exit 1
+  fi
+
+  for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do
+    chapter=$(basename $chapter_dir)
+    if ! [ "$chapter" -eq "$chapter" ]; then
+      echo "$0: unexpected chapter-subdirectory name $chapter"
+      exit 1
+    fi
+
+    find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \
+      awk -v "dir=$chapter_dir" '{printf "%s %s/%s.flac \n", $0, dir, $0}' >>$wav_scp|| exit 1
+
+    chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt
+    [ ! -f  $chapter_trans ] && echo "$0: expected file $chapter_trans to exist" && exit 1
+    cat $chapter_trans >>$trans
+  done
+done
+
+echo "$0: successfully prepared data in $dst"
+
+exit 0
--- a/egs/librispeech/conformer/path.sh
+++ b/egs/librispeech/conformer/path.sh
@ -0,0 +1,5 @@
+export FUNASR_DIR=$PWD/../../..
+
+# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PATH=$FUNASR_DIR/funasr/bin:$PATH
--- a/egs/librispeech/conformer/run.sh
+++ b/egs/librispeech/conformer/run.sh
@ -0,0 +1,262 @@
+#!/usr/bin/env bash
+
+. ./path.sh || exit 1;
+
+# machines configuration
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+gpu_num=8
+count=1
+gpu_inference=true  # Whether to perform gpu decoding, set false for cpu decoding
+# for gpu decoding, inference_nj=ngpu*njob; for cpu decoding, inference_nj=njob
+njob=5
+train_cmd=utils/run.pl
+infer_cmd=utils/run.pl
+
+# general configuration
+feats_dir="../DATA" #feature output dictionary
+exp_dir="."
+lang=en
+dumpdir=dump/fbank
+feats_type=fbank
+token_type=bpe
+dataset_type=large
+scp=feats.scp
+type=kaldi_ark
+stage=3
+stop_stage=4
+
+# feature configuration
+feats_dim=80
+sample_frequency=16000
+nj=100
+speed_perturb="0.9,1.0,1.1"
+
+# data
+data_librispeech=
+
+# bpe model
+nbpe=5000
+bpemode=unigram
+
+# exp tag
+tag=""
+
+. utils/parse_options.sh || exit 1;
+
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+train_set=train_960
+valid_set=dev
+test_sets="test_clean test_other dev_clean dev_other"
+
+asr_config=conf/train_asr_conformer.yaml
+#asr_config=conf/train_asr_conformer_uttnorm.yaml
+model_dir="baseline_$(basename "${asr_config}" .yaml)_${feats_type}_${lang}_${token_type}_${tag}"
+
+inference_config=conf/decode_asr_transformer.yaml
+#inference_config=conf/decode_asr_transformer_beam60_ctc0.3.yaml
+inference_asr_model=valid.acc.ave_10best.pth
+
+# you can set gpu num for decoding here
+gpuid_list=$CUDA_VISIBLE_DEVICES  # set gpus for decoding, the same as training stage by default
+ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
+
+if ${gpu_inference}; then
+    inference_nj=$[${ngpu}*${njob}]
+    _ngpu=1
+else
+    inference_nj=$njob
+    _ngpu=0
+fi
+
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    echo "stage 0: Data preparation"
+    # Data preparation
+    for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
+        local/data_prep_librispeech.sh ${data_librispeech}/LibriSpeech/${x} ${feats_dir}/data/${x//-/_}
+    done
+fi
+
+feat_train_dir=${feats_dir}/${dumpdir}/$train_set; mkdir -p ${feat_train_dir}
+feat_dev_clean_dir=${feats_dir}/${dumpdir}/dev_clean; mkdir -p ${feat_dev_clean_dir}
+feat_dev_other_dir=${feats_dir}/${dumpdir}/dev_other; mkdir -p ${feat_dev_other_dir}
+feat_test_clean_dir=${feats_dir}/${dumpdir}/test_clean; mkdir -p ${feat_test_clean_dir}
+feat_test_other_dir=${feats_dir}/${dumpdir}/test_other; mkdir -p ${feat_test_other_dir}
+feat_dev_dir=${feats_dir}/${dumpdir}/$valid_set; mkdir -p ${feat_dev_dir}
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    echo "stage 1: Feature Generation"
+    # compute fbank features
+    fbankdir=${feats_dir}/fbank
+    for x in dev_clean dev_other test_clean test_other; do
+        utils/compute_fbank.sh --cmd "$train_cmd" --nj 1 --max_lengths 3000 --feats_dim ${feats_dim} --sample_frequency ${sample_frequency} \
+            ${feats_dir}/data/${x} ${exp_dir}/exp/make_fbank/${x} ${fbankdir}/${x}
+        utils/fix_data_feat.sh ${fbankdir}/${x}
+    done
+
+    mkdir ${feats_dir}/data/$train_set
+    train_sets="train_clean_100 train_clean_360 train_other_500"
+    for file in wav.scp text; do
+        ( for f in $train_sets; do cat $feats_dir/data/$f/$file; done ) | sort -k1 > $feats_dir/data/$train_set/$file || exit 1;
+    done
+    utils/compute_fbank.sh --cmd "$train_cmd" --nj $nj --max_lengths 3000 --feats_dim ${feats_dim} --sample_frequency ${sample_frequency} --speed_perturb ${speed_perturb} \
+    ${feats_dir}/data/$train_set ${exp_dir}/exp/make_fbank/$train_set ${fbankdir}/$train_set
+    utils/fix_data_feat.sh ${fbankdir}/$train_set
+
+    # compute global cmvn
+    utils/compute_cmvn.sh --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} \
+        ${fbankdir}/$train_set ${exp_dir}/exp/make_fbank/$train_set
+
+    # apply cmvn
+    utils/apply_cmvn.sh --cmd "$train_cmd" --nj $nj \
+        ${fbankdir}/$train_set ${fbankdir}/$train_set/cmvn.json ${exp_dir}/exp/make_fbank/$train_set ${feat_train_dir}
+    utils/apply_cmvn.sh --cmd "$train_cmd" --nj 1 \
+        ${fbankdir}/dev_clean ${fbankdir}/$train_set/cmvn.json ${exp_dir}/exp/make_fbank/dev_clean ${feat_dev_clean_dir}
+    utils/apply_cmvn.sh --cmd "$train_cmd" --nj 1\
+        ${fbankdir}/dev_other ${fbankdir}/$train_set/cmvn.json ${exp_dir}/exp/make_fbank/dev_other ${feat_dev_other_dir}
+    utils/apply_cmvn.sh --cmd "$train_cmd" --nj 1 \
+        ${fbankdir}/test_clean ${fbankdir}/$train_set/cmvn.json ${exp_dir}/exp/make_fbank/test_clean ${feat_test_clean_dir}
+    utils/apply_cmvn.sh --cmd "$train_cmd" --nj 1 \
+        ${fbankdir}/test_other ${fbankdir}/$train_set/cmvn.json ${exp_dir}/exp/make_fbank/test_other ${feat_test_other_dir}
+
+    cp ${fbankdir}/$train_set/text ${fbankdir}/$train_set/speech_shape ${fbankdir}/$train_set/text_shape ${feat_train_dir}
+    cp ${fbankdir}/dev_clean/text ${fbankdir}/dev_clean/speech_shape ${fbankdir}/dev_clean/text_shape ${feat_dev_clean_dir}
+    cp ${fbankdir}/dev_other/text ${fbankdir}/dev_other/speech_shape ${fbankdir}/dev_other/text_shape ${feat_dev_other_dir}
+    cp ${fbankdir}/test_clean/text ${fbankdir}/test_clean/speech_shape ${fbankdir}/test_clean/text_shape ${feat_test_clean_dir}
+    cp ${fbankdir}/test_other/text ${fbankdir}/test_other/speech_shape ${fbankdir}/test_other/text_shape ${feat_test_other_dir}
+
+    dev_sets="dev_clean dev_other"
+    for file in feats.scp text speech_shape text_shape; do
+        ( for f in $dev_sets; do cat $feats_dir/${dumpdir}/$f/$file; done ) | sort -k1 > $feat_dev_dir/$file || exit 1;
+    done
+
+    #generate ark list
+    utils/gen_ark_list.sh --cmd "$train_cmd" --nj $nj ${feat_train_dir} ${fbankdir}/${train_set} ${feat_train_dir}
+    utils/gen_ark_list.sh --cmd "$train_cmd" --nj $nj ${feat_dev_dir} ${fbankdir}/${valid_set} ${feat_dev_dir}
+fi
+
+dict=${feats_dir}/data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
+bpemodel=${feats_dir}/data/lang_char/${train_set}_${bpemode}${nbpe}
+echo "dictionary: ${dict}"
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    ### Task dependent. You have to check non-linguistic symbols used in the corpus.
+    echo "stage 2: Dictionary and Json Data Preparation"
+    mkdir -p ${feats_dir}/data/lang_char/
+    echo "<blank>" > ${dict}
+    echo "<s>" >> ${dict}
+    echo "</s>" >> ${dict}
+    cut -f 2- -d" " ${feats_dir}/data/${train_set}/text > ${feats_dir}/data/lang_char/input.txt
+    spm_train --input=${feats_dir}/data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
+    spm_encode --model=${bpemodel}.model --output_format=piece < ${feats_dir}/data/lang_char/input.txt | tr ' ' '\n' | sort | uniq | awk '{print $0}' >> ${dict}
+    echo "<unk>" >> ${dict}
+    wc -l ${dict}
+
+    vocab_size=$(cat ${dict} | wc -l)
+    awk -v v=,${vocab_size} '{print $0v}' ${feat_train_dir}/text_shape > ${feat_train_dir}/text_shape.char
+    awk -v v=,${vocab_size} '{print $0v}' ${feat_dev_dir}/text_shape > ${feat_dev_dir}/text_shape.char
+    mkdir -p ${feats_dir}/asr_stats_fbank_zh_char/$train_set
+    mkdir -p ${feats_dir}/asr_stats_fbank_zh_char/$valid_set
+    cp ${feat_train_dir}/speech_shape ${feat_train_dir}/text_shape ${feat_train_dir}/text_shape.char ${feats_dir}/asr_stats_fbank_zh_char/$train_set
+    cp ${feat_dev_dir}/speech_shape ${feat_dev_dir}/text_shape ${feat_dev_dir}/text_shape.char ${feats_dir}/asr_stats_fbank_zh_char/$valid_set
+fi
+
+
+# Training Stage
+world_size=$gpu_num  # run on one machine
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    echo "stage 3: Training"
+    mkdir -p ${exp_dir}/exp/${model_dir}
+    mkdir -p ${exp_dir}/exp/${model_dir}/log
+    INIT_FILE=${exp_dir}/exp/${model_dir}/ddp_init
+    if [ -f $INIT_FILE ];then
+        rm -f $INIT_FILE
+    fi
+    init_method=file://$(readlink -f $INIT_FILE)
+    echo "$0: init method is $init_method"
+    for ((i = 0; i < $gpu_num; ++i)); do
+        {
+            rank=$i
+            local_rank=$i
+            gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
+            asr_train.py \
+                --gpu_id $gpu_id \
+                --use_preprocessor true \
+                --split_with_space false \
+                --bpemodel ${bpemodel}.model \
+                --token_type $token_type \
+                --dataset_type $dataset_type \
+                --token_list $dict \
+                --train_data_file $feats_dir/$dumpdir/${train_set}/ark_txt.scp \
+                --valid_data_file $feats_dir/$dumpdir/${valid_set}/ark_txt.scp \
+                --resume true \
+                --output_dir ${exp_dir}/exp/${model_dir} \
+                --config $asr_config \
+                --input_size $feats_dim \
+                --ngpu $gpu_num \
+                --num_worker_count $count \
+                --multiprocessing_distributed true \
+                --dist_init_method $init_method \
+                --dist_world_size $world_size \
+                --dist_rank $rank \
+                --local_rank $local_rank 1> ${exp_dir}/exp/${model_dir}/log/train.log.$i 2>&1
+        } &
+        done
+        wait
+fi
+
+# Testing Stage
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    echo "stage 4: Inference"
+    for dset in ${test_sets}; do
+        asr_exp=${exp_dir}/exp/${model_dir}
+        inference_tag="$(basename "${inference_config}" .yaml)"
+        _dir="${asr_exp}/${inference_tag}/${inference_asr_model}/${dset}"
+        _logdir="${_dir}/logdir"
+        if [ -d ${_dir} ]; then
+            echo "${_dir} is already exists. if you want to decode again, please delete this dir first."
+            exit 0
+        fi
+        mkdir -p "${_logdir}"
+        _data="${feats_dir}/${dumpdir}/${dset}"
+        key_file=${_data}/${scp}
+        num_scp_file="$(<${key_file} wc -l)"
+        _nj=$([ $inference_nj -le $num_scp_file ] && echo "$inference_nj" || echo "$num_scp_file")
+        split_scps=
+        for n in $(seq "${_nj}"); do
+            split_scps+=" ${_logdir}/keys.${n}.scp"
+        done
+        # shellcheck disable=SC2086
+        utils/split_scp.pl "${key_file}" ${split_scps}
+        _opts=
+        if [ -n "${inference_config}" ]; then
+            _opts+="--config ${inference_config} "
+        fi
+        ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
+            python -m funasr.bin.asr_inference_launch \
+                --batch_size 1 \
+                --ngpu "${_ngpu}" \
+                --njob ${njob} \
+                --gpuid_list ${gpuid_list} \
+                --data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
+                --key_file "${_logdir}"/keys.JOB.scp \
+                --asr_train_config "${asr_exp}"/config.yaml \
+                --asr_model_file "${asr_exp}"/"${inference_asr_model}" \
+                --output_dir "${_logdir}"/output.JOB \
+                --mode asr \
+                ${_opts}
+
+        for f in token token_int score text; do
+            if [ -f "${_logdir}/output.1/1best_recog/${f}" ]; then
+                for i in $(seq "${_nj}"); do
+                    cat "${_logdir}/output.${i}/1best_recog/${f}"
+                done | sort -k1 >"${_dir}/${f}"
+            fi
+        done
+        python utils/compute_wer.py ${_data}/text ${_dir}/text ${_dir}/text.cer
+        tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
+        cat ${_dir}/text.cer.txt
+    done
+fi
--- a/egs/librispeech/conformer/utils
+++ b/egs/librispeech/conformer/utils
@ -0,0 +1 @@
+../../aishell/transformer/utils
--- a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer.py
+++ b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer.py
@ -74,7 +74,7 @@ def modelscope_infer(params):
    # If text exists, compute CER
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(best_recog_path, "token")
+        text_proc_file = os.path.join(best_recog_path, "text")
        compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))


--- a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer_after_finetune.py
+++ b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k/infer_after_finetune.py
@ -38,7 +38,7 @@ def modelscope_infer_after_finetune(params):
    # computer CER if GT text is set
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
+        text_proc_file = os.path.join(decoding_path, "1best_recog/text")
        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))


--- a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer.py
+++ b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer.py
@ -74,7 +74,7 @@ def modelscope_infer(params):
    # If text exists, compute CER
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(best_recog_path, "token")
+        text_proc_file = os.path.join(best_recog_path, "text")
        compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))


--- a/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer_after_finetune.py
+++ b/egs_modelscope/asr/data2vec/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch/infer_after_finetune.py
@ -38,7 +38,7 @@ def modelscope_infer_after_finetune(params):
    # computer CER if GT text is set
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
+        text_proc_file = os.path.join(decoding_path, "1best_recog/text")
        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))


--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.py
@ -17,7 +17,7 @@ def modelscope_infer(args):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch")
-    parser.add_argument('--audio_in', type=str, default="./data/test")
+    parser.add_argument('--audio_in', type=str, default="./data/test/wav.scp")
    parser.add_argument('--output_dir', type=str, default="./results/")
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--gpuid', type=str, default="0")
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.sh
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer.sh
@ -63,8 +63,8 @@ fi

 if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
    echo "Computing WER ..."
-    python utils/proce_text.py ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
-    python utils/proce_text.py ${data_dir}/text ${output_dir}/1best_recog/text.ref
+    cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
+    cp ${data_dir}/text ${output_dir}/1best_recog/text.ref
    python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer
    tail -n 3 ${output_dir}/1best_recog/text.cer
 fi
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer_after_finetune.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/infer_after_finetune.py
@ -34,7 +34,7 @@ def modelscope_infer_after_finetune(params):
    # computer CER if GT text is set
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
+        text_proc_file = os.path.join(decoding_path, "1best_recog/text")
        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))


--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.py
@ -17,7 +17,7 @@ def modelscope_infer(args):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, default="damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1")
-    parser.add_argument('--audio_in', type=str, default="./data/test")
+    parser.add_argument('--audio_in', type=str, default="./data/test/wav.scp")
    parser.add_argument('--output_dir', type=str, default="./results/")
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--gpuid', type=str, default="0")
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.sh
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer.sh
@ -63,8 +63,8 @@ fi

 if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
    echo "Computing WER ..."
-    python utils/proce_text.py ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
-    python utils/proce_text.py ${data_dir}/text ${output_dir}/1best_recog/text.ref
+    cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc
+    cp ${data_dir}/text ${output_dir}/1best_recog/text.ref
    python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer
    tail -n 3 ${output_dir}/1best_recog/text.cer
 fi
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer_after_finetune.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/infer_after_finetune.py
@ -34,7 +34,7 @@ def modelscope_infer_after_finetune(params):
    # computer CER if GT text is set
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
+        text_proc_file = os.path.join(decoding_path, "1best_recog/text")
        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))


--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py
@ -75,7 +75,7 @@ def modelscope_infer(params):
    # If text exists, compute CER
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(best_recog_path, "token")
+        text_proc_file = os.path.join(best_recog_path, "text")
        compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))


--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py
@ -39,7 +39,7 @@ def modelscope_infer_after_finetune(params):
    # computer CER if GT text is set
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
+        text_proc_file = os.path.join(decoding_path, "1best_recog/text")
        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))


--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer.py
@ -75,7 +75,7 @@ def modelscope_infer(params):
    # If text exists, compute CER
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(best_recog_path, "token")
+        text_proc_file = os.path.join(best_recog_path, "text")
        compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))


--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer_after_finetune.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/infer_after_finetune.py
@ -39,7 +39,7 @@ def modelscope_infer_after_finetune(params):
    # computer CER if GT text is set
    text_in = os.path.join(params["data_dir"], "text")
    if os.path.exists(text_in):
-        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
+        text_proc_file = os.path.join(decoding_path, "1best_recog/text")
        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))


--- a/funasr/bin/asr_inference_paraformer.py
+++ b/funasr/bin/asr_inference_paraformer.py
@ -797,7 +797,7 @@ def inference_modelscope(
                        finish_count += 1
                        # asr_utils.print_progress(finish_count / file_count)
                        if writer is not None:
-                            ibest_writer["text"][key] = text_postprocessed
+                            ibest_writer["text"][key] = " ".join(word_lists)

                    logging.info("decoding, utt: {}, predictions: {}".format(key, text))
        rtf_avg = "decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}".format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor))
--- a/funasr/bin/asr_inference_paraformer_streaming.py
+++ b/funasr/bin/asr_inference_paraformer_streaming.py
@ -42,6 +42,7 @@ from funasr.utils import asr_utils, wav_utils, postprocess_utils
 from funasr.models.frontend.wav_frontend import WavFrontend
 from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer
 from funasr.export.models.e2e_asr_paraformer import Paraformer as Paraformer_export
+np.set_printoptions(threshold=np.inf)

 class Speech2Text:
    """Speech2Text class
@ -203,7 +204,6 @@ class Speech2Text:
        # Input as audio signal
        if isinstance(speech, np.ndarray):
            speech = torch.tensor(speech)
-
        if self.frontend is not None:
            feats, feats_len = self.frontend.forward(speech, speech_lengths)
            feats = to_device(feats, device=self.device)
@ -213,13 +213,16 @@ class Speech2Text:
            feats = speech
            feats_len = speech_lengths
        lfr_factor = max(1, (feats.size()[-1] // 80) - 1)
+        feats_len = cache["encoder"]["stride"] + cache["encoder"]["pad_left"] + cache["encoder"]["pad_right"]
+        feats = feats[:,cache["encoder"]["start_idx"]:cache["encoder"]["start_idx"]+feats_len,:]
+        feats_len = torch.tensor([feats_len])
        batch = {"speech": feats, "speech_lengths": feats_len, "cache": cache}

        # a. To device
        batch = to_device(batch, device=self.device)

        # b. Forward Encoder
-        enc, enc_len = self.asr_model.encode_chunk(**batch)
+        enc, enc_len = self.asr_model.encode_chunk(feats, feats_len, cache)
        if isinstance(enc, tuple):
            enc = enc[0]
        # assert len(enc) == 1, len(enc)
@ -578,7 +581,22 @@ def inference_modelscope(
        speech2text = Speech2TextExport(**speech2text_kwargs)
    else:
        speech2text = Speech2Text(**speech2text_kwargs)
+        
+    def _load_bytes(input):
+        middle_data = np.frombuffer(input, dtype=np.int16)
+        middle_data = np.asarray(middle_data)
+        if middle_data.dtype.kind not in 'iu':
+            raise TypeError("'middle_data' must be an array of integers")
+        dtype = np.dtype('float32')
+        if dtype.kind != 'f':
+            raise TypeError("'dtype' must be a floating point type")

+        i = np.iinfo(middle_data.dtype)
+        abs_max = 2 ** (i.bits - 1)
+        offset = i.min + abs_max
+        array = np.frombuffer((middle_data.astype(dtype) - offset) / abs_max, dtype=np.float32)
+        return array
+    
    def _forward(
            data_path_and_name_and_type,
            raw_inputs: Union[np.ndarray, torch.Tensor] = None,
@ -589,10 +607,12 @@ def inference_modelscope(
    ):

        # 3. Build data-iterator
+        if data_path_and_name_and_type is not None and data_path_and_name_and_type[2] == "bytes":
+            raw_inputs = _load_bytes(data_path_and_name_and_type[0])
+            raw_inputs = torch.tensor(raw_inputs)
        if data_path_and_name_and_type is None and raw_inputs is not None:
            if isinstance(raw_inputs, np.ndarray):
                raw_inputs = torch.tensor(raw_inputs)
-
        is_final = False
        if param_dict is not None and "cache" in param_dict:
            cache = param_dict["cache"]
@ -605,62 +625,87 @@ def inference_modelscope(
        asr_result = ""
        wait = True
        if len(cache) == 0:
-            cache["encoder"] = {"start_idx": 0, "pad_left": 0, "stride": 10, "pad_right": 5, "cif_hidden": None, "cif_alphas": None}
+            cache["encoder"] = {"start_idx": 0, "pad_left": 0, "stride": 10, "pad_right": 5, "cif_hidden": None, "cif_alphas": None, "is_final": is_final, "left": 0, "right": 0}
            cache_de = {"decode_fsmn": None}
            cache["decoder"] = cache_de
            cache["first_chunk"] = True
            cache["speech"] = []
-            cache["chunk_index"] = 0
-            cache["speech_chunk"] = []
+            cache["accum_speech"] = 0

        if raw_inputs is not None:
            if len(cache["speech"]) == 0:
                cache["speech"] = raw_inputs
            else:
                cache["speech"] = torch.cat([cache["speech"], raw_inputs], dim=0)
-            if len(cache["speech_chunk"]) == 0:
-                cache["speech_chunk"] = raw_inputs
-            else:
-                cache["speech_chunk"] = torch.cat([cache["speech_chunk"], raw_inputs], dim=0)
-            while len(cache["speech_chunk"]) >= 960:
+            cache["accum_speech"] += len(raw_inputs)
+            while cache["accum_speech"] >= 960:
                if cache["first_chunk"]:
-                    if len(cache["speech_chunk"]) >= 14400:
-                        speech = torch.unsqueeze(cache["speech_chunk"][0:14400], axis=0)
-                        speech_length = torch.tensor([14400])
+                    if cache["accum_speech"] >= 14400:
+                        speech = torch.unsqueeze(cache["speech"], axis=0)
+                        speech_length = torch.tensor([len(cache["speech"])])
+                        cache["encoder"]["pad_left"] = 5 
+                        cache["encoder"]["pad_right"] = 5 
+                        cache["encoder"]["stride"] = 10
+                        cache["encoder"]["left"] = 5
+                        cache["encoder"]["right"] = 0
                        results = speech2text(cache, speech, speech_length)
-                        cache["speech_chunk"]= cache["speech_chunk"][4800:]
+                        cache["accum_speech"] -= 4800
                        cache["first_chunk"] = False
                        cache["encoder"]["start_idx"] = -5
+                        cache["encoder"]["is_final"] = False
                        wait = False
                    else:
                        if is_final:
-                            cache["encoder"]["stride"] = len(cache["speech_chunk"]) // 960
+                            cache["encoder"]["stride"] = len(cache["speech"]) // 960
+                            cache["encoder"]["pad_left"] = 0
                            cache["encoder"]["pad_right"] = 0
-                            speech = torch.unsqueeze(cache["speech_chunk"], axis=0)
-                            speech_length = torch.tensor([len(cache["speech_chunk"])])
+                            speech = torch.unsqueeze(cache["speech"], axis=0)
+                            speech_length = torch.tensor([len(cache["speech"])])
                            results = speech2text(cache, speech, speech_length)
-                            cache["speech_chunk"] = []
+                            cache["accum_speech"] = 0
                            wait = False
                        else:
                            break
                else:
-                    if len(cache["speech_chunk"]) >= 19200:
+                    if cache["accum_speech"] >= 19200:
                        cache["encoder"]["start_idx"] += 10
+                        cache["encoder"]["stride"] = 10
                        cache["encoder"]["pad_left"] = 5
-                        speech = torch.unsqueeze(cache["speech_chunk"][:19200], axis=0)
-                        speech_length = torch.tensor([19200])
+                        cache["encoder"]["pad_right"] = 5
+                        cache["encoder"]["left"] = 0
+                        cache["encoder"]["right"] = 0
+                        speech = torch.unsqueeze(cache["speech"], axis=0)
+                        speech_length = torch.tensor([len(cache["speech"])])
                        results = speech2text(cache, speech, speech_length)
-                        cache["speech_chunk"] = cache["speech_chunk"][9600:]
+                        cache["accum_speech"] -= 9600
                        wait = False
                    else:
                        if is_final:
-                            cache["encoder"]["stride"] = len(cache["speech_chunk"]) // 960
-                            cache["encoder"]["pad_right"] = 0
-                            speech = torch.unsqueeze(cache["speech_chunk"], axis=0)
-                            speech_length = torch.tensor([len(cache["speech_chunk"])])
-                            results = speech2text(cache, speech, speech_length)
-                            cache["speech_chunk"] = []
-                            wait = False
+                            cache["encoder"]["is_final"] = True
+                            if cache["accum_speech"] >= 14400:
+                                cache["encoder"]["start_idx"] += 10
+                                cache["encoder"]["stride"] = 10
+                                cache["encoder"]["pad_left"] = 5
+                                cache["encoder"]["pad_right"] = 5
+                                cache["encoder"]["left"] = 0
+                                cache["encoder"]["right"] = cache["accum_speech"] // 960 - 15
+                                speech = torch.unsqueeze(cache["speech"], axis=0)
+                                speech_length = torch.tensor([len(cache["speech"])])
+                                results = speech2text(cache, speech, speech_length)
+                                cache["accum_speech"] -= 9600
+                                wait = False
+                            else:
+                                cache["encoder"]["start_idx"] += 10
+                                cache["encoder"]["stride"] = cache["accum_speech"] // 960 - 5
+                                cache["encoder"]["pad_left"] = 5
+                                cache["encoder"]["pad_right"] = 0
+                                cache["encoder"]["left"] = 0
+                                cache["encoder"]["right"] = 0
+                                speech = torch.unsqueeze(cache["speech"], axis=0)
+                                speech_length = torch.tensor([len(cache["speech"])])
+                                results = speech2text(cache, speech, speech_length)
+                                cache["accum_speech"] = 0
+                                wait = False
                        else:
                            break
                
--- a/funasr/bin/asr_inference_paraformer_vad.py
+++ b/funasr/bin/asr_inference_paraformer_vad.py
@ -338,7 +338,7 @@ def inference_modelscope(
                    ibest_writer["token"][key] = " ".join(token)
                    ibest_writer["token_int"][key] = " ".join(map(str, token_int))
                    ibest_writer["vad"][key] = "{}".format(vadsegments)
-                    ibest_writer["text"][key] = text_postprocessed
+                    ibest_writer["text"][key] = " ".join(word_lists)
                    ibest_writer["text_with_punc"][key] = text_postprocessed_punc
                    if time_stamp_postprocessed is not None:
                        ibest_writer["time_stamp"][key] = "{}".format(time_stamp_postprocessed)
--- a/funasr/bin/asr_inference_paraformer_vad_punc.py
+++ b/funasr/bin/asr_inference_paraformer_vad_punc.py
@ -670,7 +670,7 @@ def inference_modelscope(
                    ibest_writer["token"][key] = " ".join(token)
                    ibest_writer["token_int"][key] = " ".join(map(str, token_int))
                    ibest_writer["vad"][key] = "{}".format(vadsegments)
-                    ibest_writer["text"][key] = text_postprocessed
+                    ibest_writer["text"][key] = " ".join(word_lists)
                    ibest_writer["text_with_punc"][key] = text_postprocessed_punc
                    if time_stamp_postprocessed is not None:
                        ibest_writer["time_stamp"][key] = "{}".format(time_stamp_postprocessed)
--- a/funasr/bin/asr_inference_rnnt.py
+++ b/funasr/bin/asr_inference_rnnt.py
@ -738,13 +738,13 @@ def inference_modelscope(
                        ibest_writer["rtf"][key] = rtf_cur

                    if text is not None:
-                        text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
+                        text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token)
                        item = {'key': key, 'value': text_postprocessed}
                        asr_result_list.append(item)
                        finish_count += 1
                        # asr_utils.print_progress(finish_count / file_count)
                        if writer is not None:
-                            ibest_writer["text"][key] = text_postprocessed
+                            ibest_writer["text"][key] = " ".join(word_lists)

                    logging.info("decoding, utt: {}, predictions: {}".format(key, text))
        rtf_avg = "decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}".format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor))
--- a/funasr/bin/asr_inference_uniasr.py
+++ b/funasr/bin/asr_inference_uniasr.py
@ -507,13 +507,13 @@ def inference_modelscope(
                    ibest_writer["score"][key] = str(hyp.score)
    
                if text is not None:
-                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
+                    text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1
                    asr_utils.print_progress(finish_count / file_count)
                    if writer is not None:
-                        ibest_writer["text"][key] = text_postprocessed
+                        ibest_writer["text"][key] = " ".join(word_lists)
        return asr_result_list
    
    return _forward
--- a/funasr/bin/asr_inference_uniasr_vad.py
+++ b/funasr/bin/asr_inference_uniasr_vad.py
@ -507,13 +507,13 @@ def inference_modelscope(
                    ibest_writer["score"][key] = str(hyp.score)
    
                if text is not None:
-                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
+                    text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1
                    asr_utils.print_progress(finish_count / file_count)
                    if writer is not None:
-                        ibest_writer["text"][key] = text_postprocessed
+                        ibest_writer["text"][key] = " ".join(word_lists)
        return asr_result_list
    
    return _forward
--- a/funasr/datasets/large_datasets/utils/tokenize.py
+++ b/funasr/datasets/large_datasets/utils/tokenize.py
@ -37,7 +37,7 @@ def tokenize(data,
    vad = -2

    if bpe_tokenizer is not None:
-        text = bpe_tokenizer.text2tokens(text)
+        text = bpe_tokenizer.text2tokens("".join(text))

    if seg_dict is not None:
        assert isinstance(seg_dict, dict)
--- a/funasr/export/export_model.py
+++ b/funasr/export/export_model.py
@ -19,6 +19,7 @@ class ModelExport:
        self,
        cache_dir: Union[Path, str] = None,
        onnx: bool = True,
+        device: str = "cpu",
        quant: bool = True,
        fallback_num: int = 0,
        audio_in: str = None,
@ -36,6 +37,7 @@ class ModelExport:
        )
        print("output dir: {}".format(self.cache_dir))
        self.onnx = onnx
+        self.device = device
        self.quant = quant
        self.fallback_num = fallback_num
        self.frontend = None
@ -112,6 +114,10 @@ class ModelExport:
        else:
            dummy_input = model.get_dummy_inputs()

+        if self.device == 'cuda':
+            model = model.cuda()
+            dummy_input = tuple([i.cuda() for i in dummy_input])
+
        # model_script = torch.jit.script(model)
        model_script = torch.jit.trace(model, dummy_input)
        model_script.save(os.path.join(path, f'{model.model_name}.torchscripts'))
@ -234,6 +240,7 @@ if __name__ == '__main__':
    parser.add_argument('--model-name', type=str, required=True)
    parser.add_argument('--export-dir', type=str, required=True)
    parser.add_argument('--type', type=str, default='onnx', help='["onnx", "torch"]')
+    parser.add_argument('--device', type=str, default='cpu', help='["cpu", "cuda"]')
    parser.add_argument('--quantize', type=str2bool, default=False, help='export quantized model')
    parser.add_argument('--fallback-num', type=int, default=0, help='amp fallback number')
    parser.add_argument('--audio_in', type=str, default=None, help='["wav", "wav.scp"]')
@ -243,6 +250,7 @@ if __name__ == '__main__':
    export_model = ModelExport(
        cache_dir=args.export_dir,
        onnx=args.type == 'onnx',
+        device=args.device,
        quant=args.quantize,
        fallback_num=args.fallback_num,
        audio_in=args.audio_in,
--- a/funasr/export/models/modules/multihead_att.py
+++ b/funasr/export/models/modules/multihead_att.py
@ -75,8 +75,8 @@ def preprocess_for_attn(x, mask, cache, pad_fn):
    return x, cache


-torch_version = float(".".join(torch.__version__.split(".")[:2]))
-if torch_version >= 1.8:
+torch_version = tuple([int(i) for i in torch.__version__.split(".")[:2]])
+if torch_version >= (1, 8):
    import torch.fx
    torch.fx.wrap('preprocess_for_attn')

--- a/funasr/models/decoder/contextual_decoder.py
+++ b/funasr/models/decoder/contextual_decoder.py
@ -74,7 +74,7 @@ class ContextualDecoderLayer(nn.Module):
        return x, tgt_mask, x_self_attn, x_src_attn


-class ContexutalBiasDecoder(nn.Module):
+class ContextualBiasDecoder(nn.Module):
    def __init__(
        self,
        size,
@ -83,7 +83,7 @@ class ContexutalBiasDecoder(nn.Module):
        normalize_before=True,
    ):
        """Construct an DecoderLayer object."""
-        super(ContexutalBiasDecoder, self).__init__()
+        super(ContextualBiasDecoder, self).__init__()
        self.size = size
        self.src_attn = src_attn
        if src_attn is not None:
@ -186,7 +186,7 @@ class ContextualParaformerDecoder(ParaformerSANMDecoder):
            ),
        )
        self.dropout = nn.Dropout(dropout_rate)
-        self.bias_decoder = ContexutalBiasDecoder(
+        self.bias_decoder = ContextualBiasDecoder(
            size=attention_dim,
            src_attn=MultiHeadedAttentionCrossAtt(
                attention_heads, attention_dim, src_attention_dropout_rate
--- a/funasr/models/decoder/sanm_decoder.py
+++ b/funasr/models/decoder/sanm_decoder.py
@ -104,7 +104,6 @@ class DecoderLayerSANM(nn.Module):

            x = residual + self.dropout(self.src_attn(x, memory, memory_mask))

-
        return x, tgt_mask, memory, memory_mask, cache

    def forward_chunk(self, tgt, tgt_mask, memory, memory_mask=None, cache=None):
@ -400,7 +399,7 @@ class FsmnDecoderSCAMAOpt(BaseTransformerDecoder):
        for i in range(self.att_layer_num):
            decoder = self.decoders[i]
            c = cache[i]
-            x, tgt_mask, memory, memory_mask, c_ret = decoder(
+            x, tgt_mask, memory, memory_mask, c_ret = decoder.forward_chunk(
                x, tgt_mask, memory, memory_mask, cache=c
            )
            new_cache.append(c_ret)
@ -410,13 +409,13 @@ class FsmnDecoderSCAMAOpt(BaseTransformerDecoder):
                j = i + self.att_layer_num
                decoder = self.decoders2[i]
                c = cache[j]
-                x, tgt_mask, memory, memory_mask, c_ret = decoder(
+                x, tgt_mask, memory, memory_mask, c_ret = decoder.forward_chunk(
                    x, tgt_mask, memory, memory_mask, cache=c
                )
                new_cache.append(c_ret)

        for decoder in self.decoders3:
-            x, tgt_mask, memory, memory_mask, _ = decoder(
+            x, tgt_mask, memory, memory_mask, _ = decoder.forward_chunk(
                x, tgt_mask, memory, None, cache=None
            )

@ -1077,7 +1076,7 @@ class ParaformerSANMDecoder(BaseTransformerDecoder):
        for i in range(self.att_layer_num):
            decoder = self.decoders[i]
            c = cache[i]
-            x, tgt_mask, memory, memory_mask, c_ret = decoder(
+            x, tgt_mask, memory, memory_mask, c_ret = decoder.forward_chunk(
                x, tgt_mask, memory, None, cache=c
            )
            new_cache.append(c_ret)
@ -1087,14 +1086,14 @@ class ParaformerSANMDecoder(BaseTransformerDecoder):
                j = i + self.att_layer_num
                decoder = self.decoders2[i]
                c = cache[j]
-                x, tgt_mask, memory, memory_mask, c_ret = decoder(
+                x, tgt_mask, memory, memory_mask, c_ret = decoder.forward_chunk(
                    x, tgt_mask, memory, None, cache=c
                )
                new_cache.append(c_ret)

        for decoder in self.decoders3:

-            x, tgt_mask, memory, memory_mask, _ = decoder(
+            x, tgt_mask, memory, memory_mask, _ = decoder.forward_chunk(
                x, tgt_mask, memory, None, cache=None
            )

--- a/funasr/models/e2e_asr_paraformer.py
+++ b/funasr/models/e2e_asr_paraformer.py
@ -370,19 +370,10 @@ class Paraformer(AbsESPnetModel):
                encoder_out, encoder_out_lens
            )

-        assert encoder_out.size(0) == speech.size(0), (
-            encoder_out.size(),
-            speech.size(0),
-        )
-        assert encoder_out.size(1) <= encoder_out_lens.max(), (
-            encoder_out.size(),
-            encoder_out_lens.max(),
-        )
-
        if intermediate_outs is not None:
            return (encoder_out, intermediate_outs), encoder_out_lens

-        return encoder_out, encoder_out_lens
+        return encoder_out, torch.tensor([encoder_out.size(1)])

    def calc_predictor(self, encoder_out, encoder_out_lens):

@ -1034,16 +1025,76 @@ class BiCifParaformer(Paraformer):

        # 1. Encoder
        encoder_out, encoder_out_lens = self.encode(speech, speech_lengths)
+        intermediate_outs = None
+        if isinstance(encoder_out, tuple):
+            intermediate_outs = encoder_out[1]
+            encoder_out = encoder_out[0]

+        loss_att, acc_att, cer_att, wer_att = None, None, None, None
+        loss_ctc, cer_ctc = None, None
+        loss_pre = None
        stats = dict()

+        # 1. CTC branch
+        if self.ctc_weight != 0.0:
+            loss_ctc, cer_ctc = self._calc_ctc_loss(
+                encoder_out, encoder_out_lens, text, text_lengths
+            )
+
+            # Collect CTC branch stats
+            stats["loss_ctc"] = loss_ctc.detach() if loss_ctc is not None else None
+            stats["cer_ctc"] = cer_ctc
+
+        # Intermediate CTC (optional)
+        loss_interctc = 0.0
+        if self.interctc_weight != 0.0 and intermediate_outs is not None:
+            for layer_idx, intermediate_out in intermediate_outs:
+                # we assume intermediate_out has the same length & padding
+                # as those of encoder_out
+                loss_ic, cer_ic = self._calc_ctc_loss(
+                    intermediate_out, encoder_out_lens, text, text_lengths
+                )
+                loss_interctc = loss_interctc + loss_ic
+
+                # Collect Intermedaite CTC stats
+                stats["loss_interctc_layer{}".format(layer_idx)] = (
+                    loss_ic.detach() if loss_ic is not None else None
+                )
+                stats["cer_interctc_layer{}".format(layer_idx)] = cer_ic
+
+            loss_interctc = loss_interctc / len(intermediate_outs)
+
+            # calculate whole encoder loss
+            loss_ctc = (
+                               1 - self.interctc_weight
+                       ) * loss_ctc + self.interctc_weight * loss_interctc
+
+        # 2b. Attention decoder branch
+        if self.ctc_weight != 1.0:
+            loss_att, acc_att, cer_att, wer_att, loss_pre = self._calc_att_loss(
+                encoder_out, encoder_out_lens, text, text_lengths
+            )
+
        loss_pre2 = self._calc_pre2_loss(
            encoder_out, encoder_out_lens, text, text_lengths
        )

-        loss = loss_pre2
+        # 3. CTC-Att loss definition
+        if self.ctc_weight == 0.0:
+            loss = loss_att + loss_pre * self.predictor_weight + loss_pre2 * self.predictor_weight * 0.5
+        elif self.ctc_weight == 1.0:
+            loss = loss_ctc
+        else:
+            loss = self.ctc_weight * loss_ctc + (1 - self.ctc_weight) * loss_att + loss_pre * self.predictor_weight + loss_pre2 * self.predictor_weight * 0.5

+        # Collect Attn branch stats
+        stats["loss_att"] = loss_att.detach() if loss_att is not None else None
+        stats["acc"] = acc_att
+        stats["cer"] = cer_att
+        stats["wer"] = wer_att
+        stats["loss_pre"] = loss_pre.detach().cpu() if loss_pre is not None else None
        stats["loss_pre2"] = loss_pre2.detach().cpu()
+
        stats["loss"] = torch.clone(loss.detach())

        # force_gatherable: to-device and to-tensor if scalar for DataParallel
@ -1094,6 +1145,7 @@ class ContextualParaformer(Paraformer):
            inner_dim: int = 256,
            bias_encoder_type: str = 'lstm',
            label_bracket: bool = False,
+            use_decoder_embedding: bool = False,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
@ -1147,6 +1199,7 @@ class ContextualParaformer(Paraformer):
            self.hotword_buffer = None
            self.length_record = []
            self.current_buffer_length = 0
+        self.use_decoder_embedding = use_decoder_embedding

    def forward(
            self,
@ -1288,7 +1341,10 @@ class ContextualParaformer(Paraformer):
                    hw_list.append(hw_tokens)
        # padding
        hw_list_pad = pad_list(hw_list, 0)
-        hw_embed = self.decoder.embed(hw_list_pad)
+        if self.use_decoder_embedding:
+            hw_embed = self.decoder.embed(hw_list_pad)
+        else:
+            hw_embed = self.bias_embed(hw_list_pad)
        hw_embed, (_, _) = self.bias_encoder(hw_embed)
        _ind = np.arange(0, len(hw_list)).tolist()
        # update self.hotword_buffer, throw a part if oversize
@ -1404,13 +1460,19 @@ class ContextualParaformer(Paraformer):
            # default hotword list
            hw_list = [torch.Tensor([self.sos]).long().to(encoder_out.device)]  # empty hotword list
            hw_list_pad = pad_list(hw_list, 0)
-            hw_embed = self.bias_embed(hw_list_pad)
+            if self.use_decoder_embedding:
+                hw_embed = self.decoder.embed(hw_list_pad)
+            else:
+                hw_embed = self.bias_embed(hw_list_pad)
            _, (h_n, _) = self.bias_encoder(hw_embed)
            contextual_info = h_n.squeeze(0).repeat(encoder_out.shape[0], 1, 1)
        else:
            hw_lengths = [len(i) for i in hw_list]
            hw_list_pad = pad_list([torch.Tensor(i).long() for i in hw_list], 0).to(encoder_out.device)
-            hw_embed = self.bias_embed(hw_list_pad)
+            if self.use_decoder_embedding:
+                hw_embed = self.decoder.embed(hw_list_pad)
+            else:
+                hw_embed = self.bias_embed(hw_list_pad)
            hw_embed = torch.nn.utils.rnn.pack_padded_sequence(hw_embed, hw_lengths, batch_first=True,
                                                               enforce_sorted=False)
            _, (h_n, _) = self.bias_encoder(hw_embed)
--- a/funasr/models/predictor/cif.py
+++ b/funasr/models/predictor/cif.py
@ -200,6 +200,7 @@ class CifPredictorV2(nn.Module):
        return acoustic_embeds, token_num, alphas, cif_peak

    def forward_chunk(self, hidden, cache=None):
+        b, t, d = hidden.size()
        h = hidden
        context = h.transpose(1, 2)
        queries = self.pad(context)
@ -220,6 +221,8 @@ class CifPredictorV2(nn.Module):
            alphas = alphas * mask_chunk_predictor
      
        if cache is not None:
+            if cache["is_final"]:
+                alphas[:, cache["stride"] + cache["pad_left"] - 1] += 0.45
            if cache["cif_hidden"] is not None:
                hidden = torch.cat((cache["cif_hidden"], hidden), 1)
            if cache["cif_alphas"] is not None:
@ -241,7 +244,6 @@ class CifPredictorV2(nn.Module):
                mask_chunk_peak_predictor[:, :pre_alphas_length] = 1.0
            mask_chunk_peak_predictor[:, pre_alphas_length + cache["pad_left"]:pre_alphas_length + cache["stride"] + cache["pad_left"]] = 1.0
            
-
        if mask_chunk_peak_predictor is not None:
            cif_peak = cif_peak * mask_chunk_peak_predictor.squeeze(-1)
        
--- a/funasr/modules/embedding.py
+++ b/funasr/modules/embedding.py
@ -8,7 +8,7 @@

 import math
 import torch
-
+import torch.nn.functional as F

 def _pre_hook(
    state_dict,
@ -409,9 +409,18 @@ class SinusoidalPositionEncoder(torch.nn.Module):

    def forward_chunk(self, x, cache=None):
        start_idx = 0
+        pad_left = 0
+        pad_right = 0
        batch_size, timesteps, input_dim = x.size()
        if cache is not None:
            start_idx = cache["start_idx"]
+            pad_left = cache["left"]
+            pad_right = cache["right"]
        positions = torch.arange(1, timesteps+start_idx+1)[None, :]
        position_encoding = self.encode(positions, input_dim, x.dtype).to(x.device)
-        return x + position_encoding[:, start_idx: start_idx + timesteps]
+        outputs = x + position_encoding[:, start_idx: start_idx + timesteps]
+        outputs = outputs.transpose(1,2)
+        outputs = F.pad(outputs, (pad_left, pad_right))
+        outputs = outputs.transpose(1,2)
+        return outputs
+       
--- a/funasr/runtime/grpc/CMakeLists.txt
+++ b/funasr/runtime/grpc/CMakeLists.txt
@ -48,7 +48,7 @@ include_directories("${CMAKE_CURRENT_BINARY_DIR}")

 include_directories(../onnxruntime/include/)
 link_directories(../onnxruntime/build/src/)
-link_directories(../onnxruntime/build/third_party/webrtc/)
+link_directories(../onnxruntime/build/third_party/yaml-cpp/)

 link_directories(${ONNXRUNTIME_DIR}/lib)
 add_subdirectory("../onnxruntime/src" onnx_src)
@ -75,7 +75,6 @@ foreach(_target
  target_link_libraries(${_target}
    rg_grpc_proto
    rapidasr
-    webrtcvad
    ${EXTRA_LIBS}
    ${_REFLECTION}
    ${_GRPC_GRPCPP}
--- a/funasr/runtime/grpc/Readme.md
+++ b/funasr/runtime/grpc/Readme.md
@ -1,14 +1,13 @@
 ## paraformer grpc onnx server in c++

-
 #### Step 1. Build ../onnxruntime as it's document
 ```
-#put onnx-lib & onnx-asr-model & vocab.txt into /path/to/asrmodel(eg: /data/asrmodel)
+#put onnx-lib & onnx-asr-model into /path/to/asrmodel(eg: /data/asrmodel)
 ls /data/asrmodel/
 onnxruntime-linux-x64-1.14.0  speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch

-file /data/asrmodel/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/vocab.txt
-UTF-8 Unicode text
+#make sure you have config.yaml, am.mvn, model.onnx(or model_quant.onnx) under speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+
 ```

 #### Step 2. Compile and install grpc v1.52.0 in case of grpc bugs
@ -48,13 +47,12 @@ Usage: ./cmake/build/paraformer_server port thread_num /path/to/model_file quant
 ./cmake/build/paraformer_server 10108 4 /data/asrmodel/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch false
 ```

-
-
 #### Step 5. Start grpc python paraformer client  on PC with MIC
 ```
 cd ../python/grpc
 python grpc_main_client_mic.py  --host $server_ip --port 10108
 ```
+
 The `grpc_main_client_mic.py` follows the [original design] (https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/grpc#workflow-in-desgin) by sending audio_data with chunks. If you want to send audio_data in one request, here is an example:

 ```
@ -115,3 +113,8 @@ if __name__ == '__main__':
    asyncio.run(test())

 ```
+
+
+## Acknowledge
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
+2. We acknowledge [DeepScience](https://www.deepscience.cn) for contributing the grpc service.
--- a/funasr/runtime/onnxruntime/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/CMakeLists.txt
@ -1,7 +1,6 @@
 cmake_minimum_required(VERSION 3.10)

-#-DONNXRUNTIME_DIR=D:\thirdpart\onnxruntime
-project(FastASR)
+project(FunASRonnx)

 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@ -23,8 +22,6 @@ link_directories(${ONNXRUNTIME_DIR}/lib)

 endif()

-#option(FASTASR_BUILD_PYTHON_MODULE "build python module, using FastASR in Python" OFF)
-
-add_subdirectory("./third_party/webrtc")
+add_subdirectory("./third_party/yaml-cpp")
 add_subdirectory(src)
 add_subdirectory(tester)
--- a/funasr/runtime/onnxruntime/include/webrtc_vad.h
+++ b/funasr/runtime/onnxruntime/include/webrtc_vad.h
@ -1,87 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * This header file includes the VAD API calls. Specific function calls are
- * given below.
- */
-
-#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
-#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
-
-#include <stddef.h>
-#include <stdint.h>
-
-typedef struct WebRtcVadInst VadInst;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Creates an instance to the VAD structure.
-VadInst* WebRtcVad_Create(void);
-
-// Frees the dynamic memory of a specified VAD instance.
-//
-// - handle [i] : Pointer to VAD instance that should be freed.
-void WebRtcVad_Free(VadInst* handle);
-
-// Initializes a VAD instance.
-//
-// - handle [i/o] : Instance that should be initialized.
-//
-// returns        : 0 - (OK),
-//                 -1 - (null pointer or Default mode could not be set).
-int WebRtcVad_Init(VadInst* handle);
-
-// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
-// restrictive in reporting speech. Put in other words the probability of being
-// speech when the VAD returns 1 is increased with increasing mode. As a
-// consequence also the missed detection rate goes up.
-//
-// - handle [i/o] : VAD instance.
-// - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
-//
-// returns        : 0 - (OK),
-//                 -1 - (null pointer, mode could not be set or the VAD instance
-//                       has not been initialized).
-int WebRtcVad_set_mode(VadInst* handle, int mode);
-
-// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
-// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
-//
-// - handle       [i/o] : VAD Instance. Needs to be initialized by
-//                        WebRtcVad_Init() before call.
-// - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
-// - audio_frame  [i]   : Audio frame buffer.
-// - frame_length [i]   : Length of audio frame buffer in number of samples.
-//
-// returns              : 1 - (Active Voice),
-//                        0 - (Non-active Voice),
-//                       -1 - (Error)
-int WebRtcVad_Process(VadInst* handle,
-                      int fs,
-                      const int16_t* audio_frame,
-                      size_t frame_length);
-
-// Checks for valid combinations of |rate| and |frame_length|. We support 10,
-// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
-//
-// - rate         [i] : Sampling frequency (Hz).
-// - frame_length [i] : Speech frame buffer length in number of samples.
-//
-// returns            : 0 - (valid combination), -1 - (invalid combination)
-int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/anchor.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/anchor.h
@ -0,0 +1,17 @@
+#ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+
+namespace YAML {
+typedef std::size_t anchor_t;
+const anchor_t NullAnchor = 0;
+}
+
+#endif  // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/binary.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/binary.h
@ -0,0 +1,67 @@
+#ifndef BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+YAML_CPP_API std::string EncodeBase64(const unsigned char *data,
+                                      std::size_t size);
+YAML_CPP_API std::vector<unsigned char> DecodeBase64(const std::string &input);
+
+class YAML_CPP_API Binary {
+ public:
+  Binary() : m_unownedData(0), m_unownedSize(0) {}
+  Binary(const unsigned char *data_, std::size_t size_)
+      : m_unownedData(data_), m_unownedSize(size_) {}
+
+  bool owned() const { return !m_unownedData; }
+  std::size_t size() const { return owned() ? m_data.size() : m_unownedSize; }
+  const unsigned char *data() const {
+    return owned() ? &m_data[0] : m_unownedData;
+  }
+
+  void swap(std::vector<unsigned char> &rhs) {
+    if (m_unownedData) {
+      m_data.swap(rhs);
+      rhs.clear();
+      rhs.resize(m_unownedSize);
+      std::copy(m_unownedData, m_unownedData + m_unownedSize, rhs.begin());
+      m_unownedData = 0;
+      m_unownedSize = 0;
+    } else {
+      m_data.swap(rhs);
+    }
+  }
+
+  bool operator==(const Binary &rhs) const {
+    const std::size_t s = size();
+    if (s != rhs.size())
+      return false;
+    const unsigned char *d1 = data();
+    const unsigned char *d2 = rhs.data();
+    for (std::size_t i = 0; i < s; i++) {
+      if (*d1++ != *d2++)
+        return false;
+    }
+    return true;
+  }
+
+  bool operator!=(const Binary &rhs) const { return !(*this == rhs); }
+
+ private:
+  std::vector<unsigned char> m_data;
+  const unsigned char *m_unownedData;
+  std::size_t m_unownedSize;
+};
+}
+
+#endif  // BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/contrib/anchordict.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/contrib/anchordict.h
@ -0,0 +1,39 @@
+#ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <vector>
+
+#include "../anchor.h"
+
+namespace YAML {
+/**
+ * An object that stores and retrieves values correlating to {@link anchor_t}
+ * values.
+ *
+ * <p>Efficient implementation that can make assumptions about how
+ * {@code anchor_t} values are assigned by the {@link Parser} class.
+ */
+template <class T>
+class AnchorDict {
+ public:
+  void Register(anchor_t anchor, T value) {
+    if (anchor > m_data.size()) {
+      m_data.resize(anchor);
+    }
+    m_data[anchor - 1] = value;
+  }
+
+  T Get(anchor_t anchor) const { return m_data[anchor - 1]; }
+
+ private:
+  std::vector<T> m_data;
+};
+}
+
+#endif  // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/contrib/graphbuilder.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/contrib/graphbuilder.h
@ -0,0 +1,149 @@
+#ifndef GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/mark.h"
+#include <string>
+
+namespace YAML {
+class Parser;
+
+// GraphBuilderInterface
+// . Abstraction of node creation
+// . pParentNode is always NULL or the return value of one of the NewXXX()
+//   functions.
+class GraphBuilderInterface {
+ public:
+  virtual ~GraphBuilderInterface() = 0;
+
+  // Create and return a new node with a null value.
+  virtual void *NewNull(const Mark &mark, void *pParentNode) = 0;
+
+  // Create and return a new node with the given tag and value.
+  virtual void *NewScalar(const Mark &mark, const std::string &tag,
+                          void *pParentNode, const std::string &value) = 0;
+
+  // Create and return a new sequence node
+  virtual void *NewSequence(const Mark &mark, const std::string &tag,
+                            void *pParentNode) = 0;
+
+  // Add pNode to pSequence.  pNode was created with one of the NewXxx()
+  // functions and pSequence with NewSequence().
+  virtual void AppendToSequence(void *pSequence, void *pNode) = 0;
+
+  // Note that no moew entries will be added to pSequence
+  virtual void SequenceComplete(void *pSequence) { (void)pSequence; }
+
+  // Create and return a new map node
+  virtual void *NewMap(const Mark &mark, const std::string &tag,
+                       void *pParentNode) = 0;
+
+  // Add the pKeyNode => pValueNode mapping to pMap.  pKeyNode and pValueNode
+  // were created with one of the NewXxx() methods and pMap with NewMap().
+  virtual void AssignInMap(void *pMap, void *pKeyNode, void *pValueNode) = 0;
+
+  // Note that no more assignments will be made in pMap
+  virtual void MapComplete(void *pMap) { (void)pMap; }
+
+  // Return the node that should be used in place of an alias referencing
+  // pNode (pNode by default)
+  virtual void *AnchorReference(const Mark &mark, void *pNode) {
+    (void)mark;
+    return pNode;
+  }
+};
+
+// Typesafe wrapper for GraphBuilderInterface.  Assumes that Impl defines
+// Node, Sequence, and Map types.  Sequence and Map must derive from Node
+// (unless Node is defined as void).  Impl must also implement function with
+// all of the same names as the virtual functions in GraphBuilderInterface
+// -- including the ones with default implementations -- but with the
+// prototypes changed to accept an explicit Node*, Sequence*, or Map* where
+// appropriate.
+template <class Impl>
+class GraphBuilder : public GraphBuilderInterface {
+ public:
+  typedef typename Impl::Node Node;
+  typedef typename Impl::Sequence Sequence;
+  typedef typename Impl::Map Map;
+
+  GraphBuilder(Impl &impl) : m_impl(impl) {
+    Map *pMap = NULL;
+    Sequence *pSeq = NULL;
+    Node *pNode = NULL;
+
+    // Type consistency checks
+    pNode = pMap;
+    pNode = pSeq;
+  }
+
+  GraphBuilderInterface &AsBuilderInterface() { return *this; }
+
+  virtual void *NewNull(const Mark &mark, void *pParentNode) {
+    return CheckType<Node>(m_impl.NewNull(mark, AsNode(pParentNode)));
+  }
+
+  virtual void *NewScalar(const Mark &mark, const std::string &tag,
+                          void *pParentNode, const std::string &value) {
+    return CheckType<Node>(
+        m_impl.NewScalar(mark, tag, AsNode(pParentNode), value));
+  }
+
+  virtual void *NewSequence(const Mark &mark, const std::string &tag,
+                            void *pParentNode) {
+    return CheckType<Sequence>(
+        m_impl.NewSequence(mark, tag, AsNode(pParentNode)));
+  }
+  virtual void AppendToSequence(void *pSequence, void *pNode) {
+    m_impl.AppendToSequence(AsSequence(pSequence), AsNode(pNode));
+  }
+  virtual void SequenceComplete(void *pSequence) {
+    m_impl.SequenceComplete(AsSequence(pSequence));
+  }
+
+  virtual void *NewMap(const Mark &mark, const std::string &tag,
+                       void *pParentNode) {
+    return CheckType<Map>(m_impl.NewMap(mark, tag, AsNode(pParentNode)));
+  }
+  virtual void AssignInMap(void *pMap, void *pKeyNode, void *pValueNode) {
+    m_impl.AssignInMap(AsMap(pMap), AsNode(pKeyNode), AsNode(pValueNode));
+  }
+  virtual void MapComplete(void *pMap) { m_impl.MapComplete(AsMap(pMap)); }
+
+  virtual void *AnchorReference(const Mark &mark, void *pNode) {
+    return CheckType<Node>(m_impl.AnchorReference(mark, AsNode(pNode)));
+  }
+
+ private:
+  Impl &m_impl;
+
+  // Static check for pointer to T
+  template <class T, class U>
+  static T *CheckType(U *p) {
+    return p;
+  }
+
+  static Node *AsNode(void *pNode) { return static_cast<Node *>(pNode); }
+  static Sequence *AsSequence(void *pSeq) {
+    return static_cast<Sequence *>(pSeq);
+  }
+  static Map *AsMap(void *pMap) { return static_cast<Map *>(pMap); }
+};
+
+void *BuildGraphOfNextDocument(Parser &parser,
+                               GraphBuilderInterface &graphBuilder);
+
+template <class Impl>
+typename Impl::Node *BuildGraphOfNextDocument(Parser &parser, Impl &impl) {
+  GraphBuilder<Impl> graphBuilder(impl);
+  return static_cast<typename Impl::Node *>(
+      BuildGraphOfNextDocument(parser, graphBuilder));
+}
+}
+
+#endif  // GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/dll.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/dll.h
@ -0,0 +1,33 @@
+#ifndef DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+// The following ifdef block is the standard way of creating macros which make
+// exporting from a DLL simpler. All files within this DLL are compiled with the
+// yaml_cpp_EXPORTS symbol defined on the command line. This symbol should not
+// be defined on any project that uses this DLL. This way any other project
+// whose source files include this file see YAML_CPP_API functions as being
+// imported from a DLL, whereas this DLL sees symbols defined with this macro as
+// being exported.
+#undef YAML_CPP_API
+
+#ifdef YAML_CPP_DLL      // Using or Building YAML-CPP DLL (definition defined
+                         // manually)
+#ifdef yaml_cpp_EXPORTS  // Building YAML-CPP DLL (definition created by CMake
+                         // or defined manually)
+//	#pragma message( "Defining YAML_CPP_API for DLL export" )
+#define YAML_CPP_API __declspec(dllexport)
+#else  // yaml_cpp_EXPORTS
+//	#pragma message( "Defining YAML_CPP_API for DLL import" )
+#define YAML_CPP_API __declspec(dllimport)
+#endif  // yaml_cpp_EXPORTS
+#else   // YAML_CPP_DLL
+#define YAML_CPP_API
+#endif  // YAML_CPP_DLL
+
+#endif  // DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emitfromevents.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emitfromevents.h
@ -0,0 +1,57 @@
+#ifndef EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <stack>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/eventhandler.h"
+
+namespace YAML {
+struct Mark;
+}  // namespace YAML
+
+namespace YAML {
+class Emitter;
+
+class EmitFromEvents : public EventHandler {
+ public:
+  EmitFromEvents(Emitter& emitter);
+
+  virtual void OnDocumentStart(const Mark& mark);
+  virtual void OnDocumentEnd();
+
+  virtual void OnNull(const Mark& mark, anchor_t anchor);
+  virtual void OnAlias(const Mark& mark, anchor_t anchor);
+  virtual void OnScalar(const Mark& mark, const std::string& tag,
+                        anchor_t anchor, const std::string& value);
+
+  virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
+                               anchor_t anchor, EmitterStyle::value style);
+  virtual void OnSequenceEnd();
+
+  virtual void OnMapStart(const Mark& mark, const std::string& tag,
+                          anchor_t anchor, EmitterStyle::value style);
+  virtual void OnMapEnd();
+
+ private:
+  void BeginNode();
+  void EmitProps(const std::string& tag, anchor_t anchor);
+
+ private:
+  Emitter& m_emitter;
+
+  struct State {
+    enum value { WaitingForSequenceEntry, WaitingForKey, WaitingForValue };
+  };
+  std::stack<State::value> m_stateStack;
+};
+}
+
+#endif  // EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emitter.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emitter.h
@ -0,0 +1,254 @@
+#ifndef EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "yaml-cpp/binary.h"
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/emitterdef.h"
+#include "yaml-cpp/emittermanip.h"
+#include "yaml-cpp/noncopyable.h"
+#include "yaml-cpp/null.h"
+#include "yaml-cpp/ostream_wrapper.h"
+
+namespace YAML {
+class Binary;
+struct _Null;
+}  // namespace YAML
+
+namespace YAML {
+class EmitterState;
+
+class YAML_CPP_API Emitter : private noncopyable {
+ public:
+  Emitter();
+  explicit Emitter(std::ostream& stream);
+  ~Emitter();
+
+  // output
+  const char* c_str() const;
+  std::size_t size() const;
+
+  // state checking
+  bool good() const;
+  const std::string GetLastError() const;
+
+  // global setters
+  bool SetOutputCharset(EMITTER_MANIP value);
+  bool SetStringFormat(EMITTER_MANIP value);
+  bool SetBoolFormat(EMITTER_MANIP value);
+  bool SetIntBase(EMITTER_MANIP value);
+  bool SetSeqFormat(EMITTER_MANIP value);
+  bool SetMapFormat(EMITTER_MANIP value);
+  bool SetIndent(std::size_t n);
+  bool SetPreCommentIndent(std::size_t n);
+  bool SetPostCommentIndent(std::size_t n);
+  bool SetFloatPrecision(std::size_t n);
+  bool SetDoublePrecision(std::size_t n);
+
+  // local setters
+  Emitter& SetLocalValue(EMITTER_MANIP value);
+  Emitter& SetLocalIndent(const _Indent& indent);
+  Emitter& SetLocalPrecision(const _Precision& precision);
+
+  // overloads of write
+  Emitter& Write(const std::string& str);
+  Emitter& Write(bool b);
+  Emitter& Write(char ch);
+  Emitter& Write(const _Alias& alias);
+  Emitter& Write(const _Anchor& anchor);
+  Emitter& Write(const _Tag& tag);
+  Emitter& Write(const _Comment& comment);
+  Emitter& Write(const _Null& n);
+  Emitter& Write(const Binary& binary);
+
+  template <typename T>
+  Emitter& WriteIntegralType(T value);
+
+  template <typename T>
+  Emitter& WriteStreamable(T value);
+
+ private:
+  template <typename T>
+  void SetStreamablePrecision(std::stringstream&) {}
+  std::size_t GetFloatPrecision() const;
+  std::size_t GetDoublePrecision() const;
+
+  void PrepareIntegralStream(std::stringstream& stream) const;
+  void StartedScalar();
+
+ private:
+  void EmitBeginDoc();
+  void EmitEndDoc();
+  void EmitBeginSeq();
+  void EmitEndSeq();
+  void EmitBeginMap();
+  void EmitEndMap();
+  void EmitNewline();
+  void EmitKindTag();
+  void EmitTag(bool verbatim, const _Tag& tag);
+
+  void PrepareNode(EmitterNodeType::value child);
+  void PrepareTopNode(EmitterNodeType::value child);
+  void FlowSeqPrepareNode(EmitterNodeType::value child);
+  void BlockSeqPrepareNode(EmitterNodeType::value child);
+
+  void FlowMapPrepareNode(EmitterNodeType::value child);
+
+  void FlowMapPrepareLongKey(EmitterNodeType::value child);
+  void FlowMapPrepareLongKeyValue(EmitterNodeType::value child);
+  void FlowMapPrepareSimpleKey(EmitterNodeType::value child);
+  void FlowMapPrepareSimpleKeyValue(EmitterNodeType::value child);
+
+  void BlockMapPrepareNode(EmitterNodeType::value child);
+
+  void BlockMapPrepareLongKey(EmitterNodeType::value child);
+  void BlockMapPrepareLongKeyValue(EmitterNodeType::value child);
+  void BlockMapPrepareSimpleKey(EmitterNodeType::value child);
+  void BlockMapPrepareSimpleKeyValue(EmitterNodeType::value child);
+
+  void SpaceOrIndentTo(bool requireSpace, std::size_t indent);
+
+  const char* ComputeFullBoolName(bool b) const;
+  bool CanEmitNewline() const;
+
+ private:
+  std::unique_ptr<EmitterState> m_pState;
+  ostream_wrapper m_stream;
+};
+
+template <typename T>
+inline Emitter& Emitter::WriteIntegralType(T value) {
+  if (!good())
+    return *this;
+
+  PrepareNode(EmitterNodeType::Scalar);
+
+  std::stringstream stream;
+  PrepareIntegralStream(stream);
+  stream << value;
+  m_stream << stream.str();
+
+  StartedScalar();
+
+  return *this;
+}
+
+template <typename T>
+inline Emitter& Emitter::WriteStreamable(T value) {
+  if (!good())
+    return *this;
+
+  PrepareNode(EmitterNodeType::Scalar);
+
+  std::stringstream stream;
+  SetStreamablePrecision<T>(stream);
+  stream << value;
+  m_stream << stream.str();
+
+  StartedScalar();
+
+  return *this;
+}
+
+template <>
+inline void Emitter::SetStreamablePrecision<float>(std::stringstream& stream) {
+  stream.precision(static_cast<std::streamsize>(GetFloatPrecision()));
+}
+
+template <>
+inline void Emitter::SetStreamablePrecision<double>(std::stringstream& stream) {
+  stream.precision(static_cast<std::streamsize>(GetDoublePrecision()));
+}
+
+// overloads of insertion
+inline Emitter& operator<<(Emitter& emitter, const std::string& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, bool v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, char v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned char v) {
+  return emitter.Write(static_cast<char>(v));
+}
+inline Emitter& operator<<(Emitter& emitter, const _Alias& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Anchor& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Tag& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Comment& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Null& v) {
+  return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const Binary& b) {
+  return emitter.Write(b);
+}
+
+inline Emitter& operator<<(Emitter& emitter, const char* v) {
+  return emitter.Write(std::string(v));
+}
+
+inline Emitter& operator<<(Emitter& emitter, int v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned int v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, short v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned short v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, long v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned long v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, long long v) {
+  return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned long long v) {
+  return emitter.WriteIntegralType(v);
+}
+
+inline Emitter& operator<<(Emitter& emitter, float v) {
+  return emitter.WriteStreamable(v);
+}
+inline Emitter& operator<<(Emitter& emitter, double v) {
+  return emitter.WriteStreamable(v);
+}
+
+inline Emitter& operator<<(Emitter& emitter, EMITTER_MANIP value) {
+  return emitter.SetLocalValue(value);
+}
+
+inline Emitter& operator<<(Emitter& emitter, _Indent indent) {
+  return emitter.SetLocalIndent(indent);
+}
+
+inline Emitter& operator<<(Emitter& emitter, _Precision precision) {
+  return emitter.SetLocalPrecision(precision);
+}
+}
+
+#endif  // EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emitterdef.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emitterdef.h
@ -0,0 +1,16 @@
+#ifndef EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct EmitterNodeType {
+  enum value { NoType, Property, Scalar, FlowSeq, BlockSeq, FlowMap, BlockMap };
+};
+}
+
+#endif  // EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emittermanip.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emittermanip.h
@ -0,0 +1,137 @@
+#ifndef EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+namespace YAML {
+enum EMITTER_MANIP {
+  // general manipulators
+  Auto,
+  TagByKind,
+  Newline,
+
+  // output character set
+  EmitNonAscii,
+  EscapeNonAscii,
+
+  // string manipulators
+  // Auto, // duplicate
+  SingleQuoted,
+  DoubleQuoted,
+  Literal,
+
+  // bool manipulators
+  YesNoBool,      // yes, no
+  TrueFalseBool,  // true, false
+  OnOffBool,      // on, off
+  UpperCase,      // TRUE, N
+  LowerCase,      // f, yes
+  CamelCase,      // No, Off
+  LongBool,       // yes, On
+  ShortBool,      // y, t
+
+  // int manipulators
+  Dec,
+  Hex,
+  Oct,
+
+  // document manipulators
+  BeginDoc,
+  EndDoc,
+
+  // sequence manipulators
+  BeginSeq,
+  EndSeq,
+  Flow,
+  Block,
+
+  // map manipulators
+  BeginMap,
+  EndMap,
+  Key,
+  Value,
+  // Flow, // duplicate
+  // Block, // duplicate
+  // Auto, // duplicate
+  LongKey
+};
+
+struct _Indent {
+  _Indent(int value_) : value(value_) {}
+  int value;
+};
+
+inline _Indent Indent(int value) { return _Indent(value); }
+
+struct _Alias {
+  _Alias(const std::string& content_) : content(content_) {}
+  std::string content;
+};
+
+inline _Alias Alias(const std::string content) { return _Alias(content); }
+
+struct _Anchor {
+  _Anchor(const std::string& content_) : content(content_) {}
+  std::string content;
+};
+
+inline _Anchor Anchor(const std::string content) { return _Anchor(content); }
+
+struct _Tag {
+  struct Type {
+    enum value { Verbatim, PrimaryHandle, NamedHandle };
+  };
+
+  explicit _Tag(const std::string& prefix_, const std::string& content_,
+                Type::value type_)
+      : prefix(prefix_), content(content_), type(type_) {}
+  std::string prefix;
+  std::string content;
+  Type::value type;
+};
+
+inline _Tag VerbatimTag(const std::string content) {
+  return _Tag("", content, _Tag::Type::Verbatim);
+}
+
+inline _Tag LocalTag(const std::string content) {
+  return _Tag("", content, _Tag::Type::PrimaryHandle);
+}
+
+inline _Tag LocalTag(const std::string& prefix, const std::string content) {
+  return _Tag(prefix, content, _Tag::Type::NamedHandle);
+}
+
+inline _Tag SecondaryTag(const std::string content) {
+  return _Tag("", content, _Tag::Type::NamedHandle);
+}
+
+struct _Comment {
+  _Comment(const std::string& content_) : content(content_) {}
+  std::string content;
+};
+
+inline _Comment Comment(const std::string content) { return _Comment(content); }
+
+struct _Precision {
+  _Precision(int floatPrecision_, int doublePrecision_)
+      : floatPrecision(floatPrecision_), doublePrecision(doublePrecision_) {}
+
+  int floatPrecision;
+  int doublePrecision;
+};
+
+inline _Precision FloatPrecision(int n) { return _Precision(n, -1); }
+
+inline _Precision DoublePrecision(int n) { return _Precision(-1, n); }
+
+inline _Precision Precision(int n) { return _Precision(n, n); }
+}
+
+#endif  // EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/emitterstyle.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/emitterstyle.h
@ -0,0 +1,16 @@
+#ifndef EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct EmitterStyle {
+  enum value { Default, Block, Flow };
+};
+}
+
+#endif  // EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/eventhandler.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/eventhandler.h
@ -0,0 +1,40 @@
+#ifndef EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/emitterstyle.h"
+
+namespace YAML {
+struct Mark;
+
+class EventHandler {
+ public:
+  virtual ~EventHandler() {}
+
+  virtual void OnDocumentStart(const Mark& mark) = 0;
+  virtual void OnDocumentEnd() = 0;
+
+  virtual void OnNull(const Mark& mark, anchor_t anchor) = 0;
+  virtual void OnAlias(const Mark& mark, anchor_t anchor) = 0;
+  virtual void OnScalar(const Mark& mark, const std::string& tag,
+                        anchor_t anchor, const std::string& value) = 0;
+
+  virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
+                               anchor_t anchor, EmitterStyle::value style) = 0;
+  virtual void OnSequenceEnd() = 0;
+
+  virtual void OnMapStart(const Mark& mark, const std::string& tag,
+                          anchor_t anchor, EmitterStyle::value style) = 0;
+  virtual void OnMapEnd() = 0;
+};
+}
+
+#endif  // EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/exceptions.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/exceptions.h
@ -0,0 +1,267 @@
+#ifndef EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/mark.h"
+#include "yaml-cpp/traits.h"
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+// This is here for compatibility with older versions of Visual Studio
+// which don't support noexcept
+#ifdef _MSC_VER
+    #define YAML_CPP_NOEXCEPT _NOEXCEPT
+#else
+    #define YAML_CPP_NOEXCEPT noexcept
+#endif
+
+namespace YAML {
+// error messages
+namespace ErrorMsg {
+const char* const YAML_DIRECTIVE_ARGS =
+    "YAML directives must have exactly one argument";
+const char* const YAML_VERSION = "bad YAML version: ";
+const char* const YAML_MAJOR_VERSION = "YAML major version too large";
+const char* const REPEATED_YAML_DIRECTIVE = "repeated YAML directive";
+const char* const TAG_DIRECTIVE_ARGS =
+    "TAG directives must have exactly two arguments";
+const char* const REPEATED_TAG_DIRECTIVE = "repeated TAG directive";
+const char* const CHAR_IN_TAG_HANDLE =
+    "illegal character found while scanning tag handle";
+const char* const TAG_WITH_NO_SUFFIX = "tag handle with no suffix";
+const char* const END_OF_VERBATIM_TAG = "end of verbatim tag not found";
+const char* const END_OF_MAP = "end of map not found";
+const char* const END_OF_MAP_FLOW = "end of map flow not found";
+const char* const END_OF_SEQ = "end of sequence not found";
+const char* const END_OF_SEQ_FLOW = "end of sequence flow not found";
+const char* const MULTIPLE_TAGS =
+    "cannot assign multiple tags to the same node";
+const char* const MULTIPLE_ANCHORS =
+    "cannot assign multiple anchors to the same node";
+const char* const MULTIPLE_ALIASES =
+    "cannot assign multiple aliases to the same node";
+const char* const ALIAS_CONTENT =
+    "aliases can't have any content, *including* tags";
+const char* const INVALID_HEX = "bad character found while scanning hex number";
+const char* const INVALID_UNICODE = "invalid unicode: ";
+const char* const INVALID_ESCAPE = "unknown escape character: ";
+const char* const UNKNOWN_TOKEN = "unknown token";
+const char* const DOC_IN_SCALAR = "illegal document indicator in scalar";
+const char* const EOF_IN_SCALAR = "illegal EOF in scalar";
+const char* const CHAR_IN_SCALAR = "illegal character in scalar";
+const char* const TAB_IN_INDENTATION =
+    "illegal tab when looking for indentation";
+const char* const FLOW_END = "illegal flow end";
+const char* const BLOCK_ENTRY = "illegal block entry";
+const char* const MAP_KEY = "illegal map key";
+const char* const MAP_VALUE = "illegal map value";
+const char* const ALIAS_NOT_FOUND = "alias not found after *";
+const char* const ANCHOR_NOT_FOUND = "anchor not found after &";
+const char* const CHAR_IN_ALIAS =
+    "illegal character found while scanning alias";
+const char* const CHAR_IN_ANCHOR =
+    "illegal character found while scanning anchor";
+const char* const ZERO_INDENT_IN_BLOCK =
+    "cannot set zero indentation for a block scalar";
+const char* const CHAR_IN_BLOCK = "unexpected character in block scalar";
+const char* const AMBIGUOUS_ANCHOR =
+    "cannot assign the same alias to multiple nodes";
+const char* const UNKNOWN_ANCHOR = "the referenced anchor is not defined";
+
+const char* const INVALID_NODE =
+    "invalid node; this may result from using a map iterator as a sequence "
+    "iterator, or vice-versa";
+const char* const INVALID_SCALAR = "invalid scalar";
+const char* const KEY_NOT_FOUND = "key not found";
+const char* const BAD_CONVERSION = "bad conversion";
+const char* const BAD_DEREFERENCE = "bad dereference";
+const char* const BAD_SUBSCRIPT = "operator[] call on a scalar";
+const char* const BAD_PUSHBACK = "appending to a non-sequence";
+const char* const BAD_INSERT = "inserting in a non-convertible-to-map";
+
+const char* const UNMATCHED_GROUP_TAG = "unmatched group tag";
+const char* const UNEXPECTED_END_SEQ = "unexpected end sequence token";
+const char* const UNEXPECTED_END_MAP = "unexpected end map token";
+const char* const SINGLE_QUOTED_CHAR =
+    "invalid character in single-quoted string";
+const char* const INVALID_ANCHOR = "invalid anchor";
+const char* const INVALID_ALIAS = "invalid alias";
+const char* const INVALID_TAG = "invalid tag";
+const char* const BAD_FILE = "bad file";
+
+template <typename T>
+inline const std::string KEY_NOT_FOUND_WITH_KEY(
+    const T&, typename disable_if<is_numeric<T>>::type* = 0) {
+  return KEY_NOT_FOUND;
+}
+
+inline const std::string KEY_NOT_FOUND_WITH_KEY(const std::string& key) {
+  std::stringstream stream;
+  stream << KEY_NOT_FOUND << ": " << key;
+  return stream.str();
+}
+
+template <typename T>
+inline const std::string KEY_NOT_FOUND_WITH_KEY(
+    const T& key, typename enable_if<is_numeric<T>>::type* = 0) {
+  std::stringstream stream;
+  stream << KEY_NOT_FOUND << ": " << key;
+  return stream.str();
+}
+}
+
+class YAML_CPP_API Exception : public std::runtime_error {
+ public:
+  Exception(const Mark& mark_, const std::string& msg_)
+      : std::runtime_error(build_what(mark_, msg_)), mark(mark_), msg(msg_) {}
+  virtual ~Exception() YAML_CPP_NOEXCEPT;
+
+  Exception(const Exception&) = default;
+
+  Mark mark;
+  std::string msg;
+
+ private:
+  static const std::string build_what(const Mark& mark,
+                                      const std::string& msg) {
+    if (mark.is_null()) {
+      return msg.c_str();
+    }
+
+    std::stringstream output;
+    output << "yaml-cpp: error at line " << mark.line + 1 << ", column "
+           << mark.column + 1 << ": " << msg;
+    return output.str();
+  }
+};
+
+class YAML_CPP_API ParserException : public Exception {
+ public:
+  ParserException(const Mark& mark_, const std::string& msg_)
+      : Exception(mark_, msg_) {}
+  ParserException(const ParserException&) = default;
+  virtual ~ParserException() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API RepresentationException : public Exception {
+ public:
+  RepresentationException(const Mark& mark_, const std::string& msg_)
+      : Exception(mark_, msg_) {}
+  RepresentationException(const RepresentationException&) = default;
+  virtual ~RepresentationException() YAML_CPP_NOEXCEPT;
+};
+
+// representation exceptions
+class YAML_CPP_API InvalidScalar : public RepresentationException {
+ public:
+  InvalidScalar(const Mark& mark_)
+      : RepresentationException(mark_, ErrorMsg::INVALID_SCALAR) {}
+  InvalidScalar(const InvalidScalar&) = default;
+  virtual ~InvalidScalar() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API KeyNotFound : public RepresentationException {
+ public:
+  template <typename T>
+  KeyNotFound(const Mark& mark_, const T& key_)
+      : RepresentationException(mark_, ErrorMsg::KEY_NOT_FOUND_WITH_KEY(key_)) {
+  }
+  KeyNotFound(const KeyNotFound&) = default;
+  virtual ~KeyNotFound() YAML_CPP_NOEXCEPT;
+};
+
+template <typename T>
+class YAML_CPP_API TypedKeyNotFound : public KeyNotFound {
+ public:
+  TypedKeyNotFound(const Mark& mark_, const T& key_)
+      : KeyNotFound(mark_, key_), key(key_) {}
+  virtual ~TypedKeyNotFound() YAML_CPP_NOEXCEPT {}
+
+  T key;
+};
+
+template <typename T>
+inline TypedKeyNotFound<T> MakeTypedKeyNotFound(const Mark& mark,
+                                                const T& key) {
+  return TypedKeyNotFound<T>(mark, key);
+}
+
+class YAML_CPP_API InvalidNode : public RepresentationException {
+ public:
+  InvalidNode()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::INVALID_NODE) {}
+  InvalidNode(const InvalidNode&) = default;
+  virtual ~InvalidNode() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadConversion : public RepresentationException {
+ public:
+  explicit BadConversion(const Mark& mark_)
+      : RepresentationException(mark_, ErrorMsg::BAD_CONVERSION) {}
+  BadConversion(const BadConversion&) = default;
+  virtual ~BadConversion() YAML_CPP_NOEXCEPT;
+};
+
+template <typename T>
+class TypedBadConversion : public BadConversion {
+ public:
+  explicit TypedBadConversion(const Mark& mark_) : BadConversion(mark_) {}
+};
+
+class YAML_CPP_API BadDereference : public RepresentationException {
+ public:
+  BadDereference()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_DEREFERENCE) {}
+  BadDereference(const BadDereference&) = default;
+  virtual ~BadDereference() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadSubscript : public RepresentationException {
+ public:
+  BadSubscript()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_SUBSCRIPT) {}
+  BadSubscript(const BadSubscript&) = default;
+  virtual ~BadSubscript() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadPushback : public RepresentationException {
+ public:
+  BadPushback()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_PUSHBACK) {}
+  BadPushback(const BadPushback&) = default;
+  virtual ~BadPushback() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadInsert : public RepresentationException {
+ public:
+  BadInsert()
+      : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_INSERT) {}
+  BadInsert(const BadInsert&) = default;
+  virtual ~BadInsert() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API EmitterException : public Exception {
+ public:
+  EmitterException(const std::string& msg_)
+      : Exception(Mark::null_mark(), msg_) {}
+  EmitterException(const EmitterException&) = default;
+  virtual ~EmitterException() YAML_CPP_NOEXCEPT;
+};
+
+class YAML_CPP_API BadFile : public Exception {
+ public:
+  BadFile() : Exception(Mark::null_mark(), ErrorMsg::BAD_FILE) {}
+  BadFile(const BadFile&) = default;
+  virtual ~BadFile() YAML_CPP_NOEXCEPT;
+};
+}
+
+#undef YAML_CPP_NOEXCEPT
+
+#endif  // EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/mark.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/mark.h
@ -0,0 +1,29 @@
+#ifndef MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+struct YAML_CPP_API Mark {
+  Mark() : pos(0), line(0), column(0) {}
+
+  static const Mark null_mark() { return Mark(-1, -1, -1); }
+
+  bool is_null() const { return pos == -1 && line == -1 && column == -1; }
+
+  int pos;
+  int line, column;
+
+ private:
+  Mark(int pos_, int line_, int column_)
+      : pos(pos_), line(line_), column(column_) {}
+};
+}
+
+#endif  // MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/convert.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/convert.h
@ -0,0 +1,331 @@
+#ifndef NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <array>
+#include <limits>
+#include <list>
+#include <map>
+#include <sstream>
+#include <vector>
+
+#include "yaml-cpp/binary.h"
+#include "yaml-cpp/node/impl.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/null.h"
+
+namespace YAML {
+class Binary;
+struct _Null;
+template <typename T>
+struct convert;
+}  // namespace YAML
+
+namespace YAML {
+namespace conversion {
+inline bool IsInfinity(const std::string& input) {
+  return input == ".inf" || input == ".Inf" || input == ".INF" ||
+         input == "+.inf" || input == "+.Inf" || input == "+.INF";
+}
+
+inline bool IsNegativeInfinity(const std::string& input) {
+  return input == "-.inf" || input == "-.Inf" || input == "-.INF";
+}
+
+inline bool IsNaN(const std::string& input) {
+  return input == ".nan" || input == ".NaN" || input == ".NAN";
+}
+}
+
+// Node
+template <>
+struct convert<Node> {
+  static Node encode(const Node& rhs) { return rhs; }
+
+  static bool decode(const Node& node, Node& rhs) {
+    rhs.reset(node);
+    return true;
+  }
+};
+
+// std::string
+template <>
+struct convert<std::string> {
+  static Node encode(const std::string& rhs) { return Node(rhs); }
+
+  static bool decode(const Node& node, std::string& rhs) {
+    if (!node.IsScalar())
+      return false;
+    rhs = node.Scalar();
+    return true;
+  }
+};
+
+// C-strings can only be encoded
+template <>
+struct convert<const char*> {
+  static Node encode(const char*& rhs) { return Node(rhs); }
+};
+
+template <std::size_t N>
+struct convert<const char[N]> {
+  static Node encode(const char(&rhs)[N]) { return Node(rhs); }
+};
+
+template <>
+struct convert<_Null> {
+  static Node encode(const _Null& /* rhs */) { return Node(); }
+
+  static bool decode(const Node& node, _Null& /* rhs */) {
+    return node.IsNull();
+  }
+};
+
+#define YAML_DEFINE_CONVERT_STREAMABLE(type, negative_op)                \
+  template <>                                                            \
+  struct convert<type> {                                                 \
+    static Node encode(const type& rhs) {                                \
+      std::stringstream stream;                                          \
+      stream.precision(std::numeric_limits<type>::digits10 + 1);         \
+      stream << rhs;                                                     \
+      return Node(stream.str());                                         \
+    }                                                                    \
+                                                                         \
+    static bool decode(const Node& node, type& rhs) {                    \
+      if (node.Type() != NodeType::Scalar)                               \
+        return false;                                                    \
+      const std::string& input = node.Scalar();                          \
+      std::stringstream stream(input);                                   \
+      stream.unsetf(std::ios::dec);                                      \
+      if ((stream >> std::noskipws >> rhs) && (stream >> std::ws).eof()) \
+        return true;                                                     \
+      if (std::numeric_limits<type>::has_infinity) {                     \
+        if (conversion::IsInfinity(input)) {                             \
+          rhs = std::numeric_limits<type>::infinity();                   \
+          return true;                                                   \
+        } else if (conversion::IsNegativeInfinity(input)) {              \
+          rhs = negative_op std::numeric_limits<type>::infinity();       \
+          return true;                                                   \
+        }                                                                \
+      }                                                                  \
+                                                                         \
+      if (std::numeric_limits<type>::has_quiet_NaN &&                    \
+          conversion::IsNaN(input)) {                                    \
+        rhs = std::numeric_limits<type>::quiet_NaN();                    \
+        return true;                                                     \
+      }                                                                  \
+                                                                         \
+      return false;                                                      \
+    }                                                                    \
+  }
+
+#define YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(type) \
+  YAML_DEFINE_CONVERT_STREAMABLE(type, -)
+
+#define YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(type) \
+  YAML_DEFINE_CONVERT_STREAMABLE(type, +)
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(int);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(short);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long long);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned short);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned long);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned long long);
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(char);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(signed char);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned char);
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(float);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(double);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long double);
+
+#undef YAML_DEFINE_CONVERT_STREAMABLE_SIGNED
+#undef YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED
+#undef YAML_DEFINE_CONVERT_STREAMABLE
+
+// bool
+template <>
+struct convert<bool> {
+  static Node encode(bool rhs) { return rhs ? Node("true") : Node("false"); }
+
+  YAML_CPP_API static bool decode(const Node& node, bool& rhs);
+};
+
+// std::map
+template <typename K, typename V>
+struct convert<std::map<K, V>> {
+  static Node encode(const std::map<K, V>& rhs) {
+    Node node(NodeType::Map);
+    for (typename std::map<K, V>::const_iterator it = rhs.begin();
+         it != rhs.end(); ++it)
+      node.force_insert(it->first, it->second);
+    return node;
+  }
+
+  static bool decode(const Node& node, std::map<K, V>& rhs) {
+    if (!node.IsMap())
+      return false;
+
+    rhs.clear();
+    for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+      // workaround for GCC 3:
+      rhs[it->first.template as<K>()] = it->second.template as<V>();
+#else
+      rhs[it->first.as<K>()] = it->second.as<V>();
+#endif
+    return true;
+  }
+};
+
+// std::vector
+template <typename T>
+struct convert<std::vector<T>> {
+  static Node encode(const std::vector<T>& rhs) {
+    Node node(NodeType::Sequence);
+    for (typename std::vector<T>::const_iterator it = rhs.begin();
+         it != rhs.end(); ++it)
+      node.push_back(*it);
+    return node;
+  }
+
+  static bool decode(const Node& node, std::vector<T>& rhs) {
+    if (!node.IsSequence())
+      return false;
+
+    rhs.clear();
+    for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+      // workaround for GCC 3:
+      rhs.push_back(it->template as<T>());
+#else
+      rhs.push_back(it->as<T>());
+#endif
+    return true;
+  }
+};
+
+// std::list
+template <typename T>
+struct convert<std::list<T>> {
+  static Node encode(const std::list<T>& rhs) {
+    Node node(NodeType::Sequence);
+    for (typename std::list<T>::const_iterator it = rhs.begin();
+         it != rhs.end(); ++it)
+      node.push_back(*it);
+    return node;
+  }
+
+  static bool decode(const Node& node, std::list<T>& rhs) {
+    if (!node.IsSequence())
+      return false;
+
+    rhs.clear();
+    for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+      // workaround for GCC 3:
+      rhs.push_back(it->template as<T>());
+#else
+      rhs.push_back(it->as<T>());
+#endif
+    return true;
+  }
+};
+
+// std::array
+template <typename T, std::size_t N>
+struct convert<std::array<T, N>> {
+  static Node encode(const std::array<T, N>& rhs) {
+    Node node(NodeType::Sequence);
+    for (const auto& element : rhs) {
+      node.push_back(element);
+    }
+    return node;
+  }
+
+  static bool decode(const Node& node, std::array<T, N>& rhs) {
+    if (!isNodeValid(node)) {
+      return false;
+    }
+
+    for (auto i = 0u; i < node.size(); ++i) {
+#if defined(__GNUC__) && __GNUC__ < 4
+      // workaround for GCC 3:
+      rhs[i] = node[i].template as<T>();
+#else
+      rhs[i] = node[i].as<T>();
+#endif
+    }
+    return true;
+  }
+
+ private:
+  static bool isNodeValid(const Node& node) {
+    return node.IsSequence() && node.size() == N;
+  }
+};
+
+// std::pair
+template <typename T, typename U>
+struct convert<std::pair<T, U>> {
+  static Node encode(const std::pair<T, U>& rhs) {
+    Node node(NodeType::Sequence);
+    node.push_back(rhs.first);
+    node.push_back(rhs.second);
+    return node;
+  }
+
+  static bool decode(const Node& node, std::pair<T, U>& rhs) {
+    if (!node.IsSequence())
+      return false;
+    if (node.size() != 2)
+      return false;
+
+#if defined(__GNUC__) && __GNUC__ < 4
+    // workaround for GCC 3:
+    rhs.first = node[0].template as<T>();
+#else
+    rhs.first = node[0].as<T>();
+#endif
+#if defined(__GNUC__) && __GNUC__ < 4
+    // workaround for GCC 3:
+    rhs.second = node[1].template as<U>();
+#else
+    rhs.second = node[1].as<U>();
+#endif
+    return true;
+  }
+};
+
+// binary
+template <>
+struct convert<Binary> {
+  static Node encode(const Binary& rhs) {
+    return Node(EncodeBase64(rhs.data(), rhs.size()));
+  }
+
+  static bool decode(const Node& node, Binary& rhs) {
+    if (!node.IsScalar())
+      return false;
+
+    std::vector<unsigned char> data = DecodeBase64(node.Scalar());
+    if (data.empty() && !node.Scalar().empty())
+      return false;
+
+    rhs.swap(data);
+    return true;
+  }
+};
+}
+
+#endif  // NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/bool_type.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/bool_type.h
@ -0,0 +1,26 @@
+#ifndef NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+namespace detail {
+struct unspecified_bool {
+  struct NOT_ALLOWED;
+  static void true_value(NOT_ALLOWED*) {}
+};
+typedef void (*unspecified_bool_type)(unspecified_bool::NOT_ALLOWED*);
+}
+}
+
+#define YAML_CPP_OPERATOR_BOOL()                                            \
+  operator YAML::detail::unspecified_bool_type() const {                    \
+    return this->operator!() ? 0                                            \
+                             : &YAML::detail::unspecified_bool::true_value; \
+  }
+
+#endif  // NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/impl.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/impl.h
@ -0,0 +1,185 @@
+#ifndef NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/node/detail/node.h"
+#include "yaml-cpp/node/detail/node_data.h"
+#include <type_traits>
+
+namespace YAML {
+namespace detail {
+template <typename Key, typename Enable = void>
+struct get_idx {
+  static node* get(const std::vector<node*>& /* sequence */,
+                   const Key& /* key */, shared_memory_holder /* pMemory */) {
+    return 0;
+  }
+};
+
+template <typename Key>
+struct get_idx<Key,
+               typename std::enable_if<std::is_unsigned<Key>::value &&
+                                       !std::is_same<Key, bool>::value>::type> {
+  static node* get(const std::vector<node*>& sequence, const Key& key,
+                   shared_memory_holder /* pMemory */) {
+    return key < sequence.size() ? sequence[key] : 0;
+  }
+
+  static node* get(std::vector<node*>& sequence, const Key& key,
+                   shared_memory_holder pMemory) {
+   if (key > sequence.size() || (key > 0 && !sequence[key-1]->is_defined()))
+      return 0;
+    if (key == sequence.size())
+      sequence.push_back(&pMemory->create_node());
+    return sequence[key];
+  }
+};
+
+template <typename Key>
+struct get_idx<Key, typename std::enable_if<std::is_signed<Key>::value>::type> {
+  static node* get(const std::vector<node*>& sequence, const Key& key,
+                   shared_memory_holder pMemory) {
+    return key >= 0 ? get_idx<std::size_t>::get(
+                          sequence, static_cast<std::size_t>(key), pMemory)
+                    : 0;
+  }
+  static node* get(std::vector<node*>& sequence, const Key& key,
+                   shared_memory_holder pMemory) {
+    return key >= 0 ? get_idx<std::size_t>::get(
+                          sequence, static_cast<std::size_t>(key), pMemory)
+                    : 0;
+  }
+};
+
+template <typename T>
+inline bool node::equals(const T& rhs, shared_memory_holder pMemory) {
+  T lhs;
+  if (convert<T>::decode(Node(*this, pMemory), lhs)) {
+    return lhs == rhs;
+  }
+  return false;
+}
+
+inline bool node::equals(const char* rhs, shared_memory_holder pMemory) {
+  return equals<std::string>(rhs, pMemory);
+}
+
+// indexing
+template <typename Key>
+inline node* node_data::get(const Key& key,
+                            shared_memory_holder pMemory) const {
+  switch (m_type) {
+    case NodeType::Map:
+      break;
+    case NodeType::Undefined:
+    case NodeType::Null:
+      return NULL;
+    case NodeType::Sequence:
+      if (node* pNode = get_idx<Key>::get(m_sequence, key, pMemory))
+        return pNode;
+      return NULL;
+    case NodeType::Scalar:
+      throw BadSubscript();
+  }
+
+  for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
+    if (it->first->equals(key, pMemory)) {
+      return it->second;
+    }
+  }
+
+  return NULL;
+}
+
+template <typename Key>
+inline node& node_data::get(const Key& key, shared_memory_holder pMemory) {
+  switch (m_type) {
+    case NodeType::Map:
+      break;
+    case NodeType::Undefined:
+    case NodeType::Null:
+    case NodeType::Sequence:
+      if (node* pNode = get_idx<Key>::get(m_sequence, key, pMemory)) {
+        m_type = NodeType::Sequence;
+        return *pNode;
+      }
+
+      convert_to_map(pMemory);
+      break;
+    case NodeType::Scalar:
+      throw BadSubscript();
+  }
+
+  for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
+    if (it->first->equals(key, pMemory)) {
+      return *it->second;
+    }
+  }
+
+  node& k = convert_to_node(key, pMemory);
+  node& v = pMemory->create_node();
+  insert_map_pair(k, v);
+  return v;
+}
+
+template <typename Key>
+inline bool node_data::remove(const Key& key, shared_memory_holder pMemory) {
+  if (m_type != NodeType::Map)
+    return false;
+
+  for (kv_pairs::iterator it = m_undefinedPairs.begin();
+       it != m_undefinedPairs.end();) {
+    kv_pairs::iterator jt = std::next(it);
+    if (it->first->equals(key, pMemory))
+      m_undefinedPairs.erase(it);
+    it = jt;
+  }
+
+  for (node_map::iterator it = m_map.begin(); it != m_map.end(); ++it) {
+    if (it->first->equals(key, pMemory)) {
+      m_map.erase(it);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// map
+template <typename Key, typename Value>
+inline void node_data::force_insert(const Key& key, const Value& value,
+                                    shared_memory_holder pMemory) {
+  switch (m_type) {
+    case NodeType::Map:
+      break;
+    case NodeType::Undefined:
+    case NodeType::Null:
+    case NodeType::Sequence:
+      convert_to_map(pMemory);
+      break;
+    case NodeType::Scalar:
+      throw BadInsert();
+  }
+
+  node& k = convert_to_node(key, pMemory);
+  node& v = convert_to_node(value, pMemory);
+  insert_map_pair(k, v);
+}
+
+template <typename T>
+inline node& node_data::convert_to_node(const T& rhs,
+                                        shared_memory_holder pMemory) {
+  Node value = convert<T>::encode(rhs);
+  value.EnsureNodeExists();
+  pMemory->merge(*value.m_pMemory);
+  return *value.m_pNode;
+}
+}
+}
+
+#endif  // NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/iterator.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/iterator.h
@ -0,0 +1,92 @@
+#ifndef VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_iterator.h"
+#include <cstddef>
+#include <iterator>
+
+namespace YAML {
+namespace detail {
+struct iterator_value;
+
+template <typename V>
+class iterator_base : public std::iterator<std::forward_iterator_tag, V,
+                                           std::ptrdiff_t, V*, V> {
+
+ private:
+  template <typename>
+  friend class iterator_base;
+  struct enabler {};
+  typedef node_iterator base_type;
+
+  struct proxy {
+    explicit proxy(const V& x) : m_ref(x) {}
+    V* operator->() { return std::addressof(m_ref); }
+    operator V*() { return std::addressof(m_ref); }
+
+    V m_ref;
+  };
+
+ public:
+  typedef typename iterator_base::value_type value_type;
+
+ public:
+  iterator_base() : m_iterator(), m_pMemory() {}
+  explicit iterator_base(base_type rhs, shared_memory_holder pMemory)
+      : m_iterator(rhs), m_pMemory(pMemory) {}
+
+  template <class W>
+  iterator_base(const iterator_base<W>& rhs,
+                typename std::enable_if<std::is_convertible<W*, V*>::value,
+                                        enabler>::type = enabler())
+      : m_iterator(rhs.m_iterator), m_pMemory(rhs.m_pMemory) {}
+
+  iterator_base<V>& operator++() {
+    ++m_iterator;
+    return *this;
+  }
+
+  iterator_base<V> operator++(int) {
+    iterator_base<V> iterator_pre(*this);
+    ++(*this);
+    return iterator_pre;
+  }
+
+  template <typename W>
+  bool operator==(const iterator_base<W>& rhs) const {
+    return m_iterator == rhs.m_iterator;
+  }
+
+  template <typename W>
+  bool operator!=(const iterator_base<W>& rhs) const {
+    return m_iterator != rhs.m_iterator;
+  }
+
+  value_type operator*() const {
+    const typename base_type::value_type& v = *m_iterator;
+    if (v.pNode)
+      return value_type(Node(*v, m_pMemory));
+    if (v.first && v.second)
+      return value_type(Node(*v.first, m_pMemory), Node(*v.second, m_pMemory));
+    return value_type();
+  }
+
+  proxy operator->() const { return proxy(**this); }
+
+ private:
+  base_type m_iterator;
+  shared_memory_holder m_pMemory;
+};
+}
+}
+
+#endif  // VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/iterator_fwd.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/iterator_fwd.h
@ -0,0 +1,27 @@
+#ifndef VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+
+namespace detail {
+struct iterator_value;
+template <typename V>
+class iterator_base;
+}
+
+typedef detail::iterator_base<detail::iterator_value> iterator;
+typedef detail::iterator_base<const detail::iterator_value> const_iterator;
+}
+
+#endif  // VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/memory.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/memory.h
@ -0,0 +1,46 @@
+#ifndef VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <set>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/ptr.h"
+
+namespace YAML {
+namespace detail {
+class node;
+}  // namespace detail
+}  // namespace YAML
+
+namespace YAML {
+namespace detail {
+class YAML_CPP_API memory {
+ public:
+  node& create_node();
+  void merge(const memory& rhs);
+
+ private:
+  typedef std::set<shared_node> Nodes;
+  Nodes m_nodes;
+};
+
+class YAML_CPP_API memory_holder {
+ public:
+  memory_holder() : m_pMemory(new memory) {}
+
+  node& create_node() { return m_pMemory->create_node(); }
+  void merge(memory_holder& rhs);
+
+ private:
+  shared_memory m_pMemory;
+};
+}
+}
+
+#endif  // VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node.h
@ -0,0 +1,169 @@
+#ifndef NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_ref.h"
+#include <set>
+
+namespace YAML {
+namespace detail {
+class node {
+ public:
+  node() : m_pRef(new node_ref) {}
+  node(const node&) = delete;
+  node& operator=(const node&) = delete;
+
+  bool is(const node& rhs) const { return m_pRef == rhs.m_pRef; }
+  const node_ref* ref() const { return m_pRef.get(); }
+
+  bool is_defined() const { return m_pRef->is_defined(); }
+  const Mark& mark() const { return m_pRef->mark(); }
+  NodeType::value type() const { return m_pRef->type(); }
+
+  const std::string& scalar() const { return m_pRef->scalar(); }
+  const std::string& tag() const { return m_pRef->tag(); }
+  EmitterStyle::value style() const { return m_pRef->style(); }
+
+  template <typename T>
+  bool equals(const T& rhs, shared_memory_holder pMemory);
+  bool equals(const char* rhs, shared_memory_holder pMemory);
+
+  void mark_defined() {
+    if (is_defined())
+      return;
+
+    m_pRef->mark_defined();
+    for (nodes::iterator it = m_dependencies.begin();
+         it != m_dependencies.end(); ++it)
+      (*it)->mark_defined();
+    m_dependencies.clear();
+  }
+
+  void add_dependency(node& rhs) {
+    if (is_defined())
+      rhs.mark_defined();
+    else
+      m_dependencies.insert(&rhs);
+  }
+
+  void set_ref(const node& rhs) {
+    if (rhs.is_defined())
+      mark_defined();
+    m_pRef = rhs.m_pRef;
+  }
+  void set_data(const node& rhs) {
+    if (rhs.is_defined())
+      mark_defined();
+    m_pRef->set_data(*rhs.m_pRef);
+  }
+
+  void set_mark(const Mark& mark) { m_pRef->set_mark(mark); }
+
+  void set_type(NodeType::value type) {
+    if (type != NodeType::Undefined)
+      mark_defined();
+    m_pRef->set_type(type);
+  }
+  void set_null() {
+    mark_defined();
+    m_pRef->set_null();
+  }
+  void set_scalar(const std::string& scalar) {
+    mark_defined();
+    m_pRef->set_scalar(scalar);
+  }
+  void set_tag(const std::string& tag) {
+    mark_defined();
+    m_pRef->set_tag(tag);
+  }
+
+  // style
+  void set_style(EmitterStyle::value style) {
+    mark_defined();
+    m_pRef->set_style(style);
+  }
+
+  // size/iterator
+  std::size_t size() const { return m_pRef->size(); }
+
+  const_node_iterator begin() const {
+    return static_cast<const node_ref&>(*m_pRef).begin();
+  }
+  node_iterator begin() { return m_pRef->begin(); }
+
+  const_node_iterator end() const {
+    return static_cast<const node_ref&>(*m_pRef).end();
+  }
+  node_iterator end() { return m_pRef->end(); }
+
+  // sequence
+  void push_back(node& input, shared_memory_holder pMemory) {
+    m_pRef->push_back(input, pMemory);
+    input.add_dependency(*this);
+  }
+  void insert(node& key, node& value, shared_memory_holder pMemory) {
+    m_pRef->insert(key, value, pMemory);
+    key.add_dependency(*this);
+    value.add_dependency(*this);
+  }
+
+  // indexing
+  template <typename Key>
+  node* get(const Key& key, shared_memory_holder pMemory) const {
+    // NOTE: this returns a non-const node so that the top-level Node can wrap
+    // it, and returns a pointer so that it can be NULL (if there is no such
+    // key).
+    return static_cast<const node_ref&>(*m_pRef).get(key, pMemory);
+  }
+  template <typename Key>
+  node& get(const Key& key, shared_memory_holder pMemory) {
+    node& value = m_pRef->get(key, pMemory);
+    value.add_dependency(*this);
+    return value;
+  }
+  template <typename Key>
+  bool remove(const Key& key, shared_memory_holder pMemory) {
+    return m_pRef->remove(key, pMemory);
+  }
+
+  node* get(node& key, shared_memory_holder pMemory) const {
+    // NOTE: this returns a non-const node so that the top-level Node can wrap
+    // it, and returns a pointer so that it can be NULL (if there is no such
+    // key).
+    return static_cast<const node_ref&>(*m_pRef).get(key, pMemory);
+  }
+  node& get(node& key, shared_memory_holder pMemory) {
+    node& value = m_pRef->get(key, pMemory);
+    key.add_dependency(*this);
+    value.add_dependency(*this);
+    return value;
+  }
+  bool remove(node& key, shared_memory_holder pMemory) {
+    return m_pRef->remove(key, pMemory);
+  }
+
+  // map
+  template <typename Key, typename Value>
+  void force_insert(const Key& key, const Value& value,
+                    shared_memory_holder pMemory) {
+    m_pRef->force_insert(key, value, pMemory);
+  }
+
+ private:
+  shared_node_ref m_pRef;
+  typedef std::set<node*> nodes;
+  nodes m_dependencies;
+};
+}
+}
+
+#endif  // NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_data.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_data.h
@ -0,0 +1,127 @@
+#ifndef VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <list>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/detail/node_iterator.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+namespace detail {
+class node;
+}  // namespace detail
+}  // namespace YAML
+
+namespace YAML {
+namespace detail {
+class YAML_CPP_API node_data {
+ public:
+  node_data();
+  node_data(const node_data&) = delete;
+  node_data& operator=(const node_data&) = delete;
+
+  void mark_defined();
+  void set_mark(const Mark& mark);
+  void set_type(NodeType::value type);
+  void set_tag(const std::string& tag);
+  void set_null();
+  void set_scalar(const std::string& scalar);
+  void set_style(EmitterStyle::value style);
+
+  bool is_defined() const { return m_isDefined; }
+  const Mark& mark() const { return m_mark; }
+  NodeType::value type() const {
+    return m_isDefined ? m_type : NodeType::Undefined;
+  }
+  const std::string& scalar() const { return m_scalar; }
+  const std::string& tag() const { return m_tag; }
+  EmitterStyle::value style() const { return m_style; }
+
+  // size/iterator
+  std::size_t size() const;
+
+  const_node_iterator begin() const;
+  node_iterator begin();
+
+  const_node_iterator end() const;
+  node_iterator end();
+
+  // sequence
+  void push_back(node& node, shared_memory_holder pMemory);
+  void insert(node& key, node& value, shared_memory_holder pMemory);
+
+  // indexing
+  template <typename Key>
+  node* get(const Key& key, shared_memory_holder pMemory) const;
+  template <typename Key>
+  node& get(const Key& key, shared_memory_holder pMemory);
+  template <typename Key>
+  bool remove(const Key& key, shared_memory_holder pMemory);
+
+  node* get(node& key, shared_memory_holder pMemory) const;
+  node& get(node& key, shared_memory_holder pMemory);
+  bool remove(node& key, shared_memory_holder pMemory);
+
+  // map
+  template <typename Key, typename Value>
+  void force_insert(const Key& key, const Value& value,
+                    shared_memory_holder pMemory);
+
+ public:
+  static std::string empty_scalar;
+
+ private:
+  void compute_seq_size() const;
+  void compute_map_size() const;
+
+  void reset_sequence();
+  void reset_map();
+
+  void insert_map_pair(node& key, node& value);
+  void convert_to_map(shared_memory_holder pMemory);
+  void convert_sequence_to_map(shared_memory_holder pMemory);
+
+  template <typename T>
+  static node& convert_to_node(const T& rhs, shared_memory_holder pMemory);
+
+ private:
+  bool m_isDefined;
+  Mark m_mark;
+  NodeType::value m_type;
+  std::string m_tag;
+  EmitterStyle::value m_style;
+
+  // scalar
+  std::string m_scalar;
+
+  // sequence
+  typedef std::vector<node*> node_seq;
+  node_seq m_sequence;
+
+  mutable std::size_t m_seqSize;
+
+  // map
+  typedef std::vector<std::pair<node*, node*>> node_map;
+  node_map m_map;
+
+  typedef std::pair<node*, node*> kv_pair;
+  typedef std::list<kv_pair> kv_pairs;
+  mutable kv_pairs m_undefinedPairs;
+};
+}
+}
+
+#endif  // VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_iterator.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_iterator.h
@ -0,0 +1,180 @@
+#ifndef VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/ptr.h"
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <map>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+namespace detail {
+struct iterator_type {
+  enum value { NoneType, Sequence, Map };
+};
+
+template <typename V>
+struct node_iterator_value : public std::pair<V*, V*> {
+  typedef std::pair<V*, V*> kv;
+
+  node_iterator_value() : kv(), pNode(0) {}
+  explicit node_iterator_value(V& rhs) : kv(), pNode(&rhs) {}
+  explicit node_iterator_value(V& key, V& value) : kv(&key, &value), pNode(0) {}
+
+  V& operator*() const { return *pNode; }
+  V& operator->() const { return *pNode; }
+
+  V* pNode;
+};
+
+typedef std::vector<node*> node_seq;
+typedef std::vector<std::pair<node*, node*>> node_map;
+
+template <typename V>
+struct node_iterator_type {
+  typedef node_seq::iterator seq;
+  typedef node_map::iterator map;
+};
+
+template <typename V>
+struct node_iterator_type<const V> {
+  typedef node_seq::const_iterator seq;
+  typedef node_map::const_iterator map;
+};
+
+template <typename V>
+class node_iterator_base
+    : public std::iterator<std::forward_iterator_tag, node_iterator_value<V>,
+                           std::ptrdiff_t, node_iterator_value<V>*,
+                           node_iterator_value<V>> {
+ private:
+  struct enabler {};
+
+  struct proxy {
+    explicit proxy(const node_iterator_value<V>& x) : m_ref(x) {}
+    node_iterator_value<V>* operator->() { return std::addressof(m_ref); }
+    operator node_iterator_value<V>*() { return std::addressof(m_ref); }
+
+    node_iterator_value<V> m_ref;
+  };
+
+ public:
+  typedef typename node_iterator_type<V>::seq SeqIter;
+  typedef typename node_iterator_type<V>::map MapIter;
+  typedef node_iterator_value<V> value_type;
+
+  node_iterator_base()
+      : m_type(iterator_type::NoneType), m_seqIt(), m_mapIt(), m_mapEnd() {}
+  explicit node_iterator_base(SeqIter seqIt)
+      : m_type(iterator_type::Sequence),
+        m_seqIt(seqIt),
+        m_mapIt(),
+        m_mapEnd() {}
+  explicit node_iterator_base(MapIter mapIt, MapIter mapEnd)
+      : m_type(iterator_type::Map),
+        m_seqIt(),
+        m_mapIt(mapIt),
+        m_mapEnd(mapEnd) {
+    m_mapIt = increment_until_defined(m_mapIt);
+  }
+
+  template <typename W>
+  node_iterator_base(const node_iterator_base<W>& rhs,
+                     typename std::enable_if<std::is_convertible<W*, V*>::value,
+                                             enabler>::type = enabler())
+      : m_type(rhs.m_type),
+        m_seqIt(rhs.m_seqIt),
+        m_mapIt(rhs.m_mapIt),
+        m_mapEnd(rhs.m_mapEnd) {}
+
+  template <typename>
+  friend class node_iterator_base;
+
+  template <typename W>
+  bool operator==(const node_iterator_base<W>& rhs) const {
+    if (m_type != rhs.m_type)
+      return false;
+
+    switch (m_type) {
+      case iterator_type::NoneType:
+        return true;
+      case iterator_type::Sequence:
+        return m_seqIt == rhs.m_seqIt;
+      case iterator_type::Map:
+        return m_mapIt == rhs.m_mapIt;
+    }
+    return true;
+  }
+
+  template <typename W>
+  bool operator!=(const node_iterator_base<W>& rhs) const {
+    return !(*this == rhs);
+  }
+
+  node_iterator_base<V>& operator++() {
+    switch (m_type) {
+      case iterator_type::NoneType:
+        break;
+      case iterator_type::Sequence:
+        ++m_seqIt;
+        break;
+      case iterator_type::Map:
+        ++m_mapIt;
+        m_mapIt = increment_until_defined(m_mapIt);
+        break;
+    }
+    return *this;
+  }
+
+  node_iterator_base<V> operator++(int) {
+    node_iterator_base<V> iterator_pre(*this);
+    ++(*this);
+    return iterator_pre;
+  }
+
+  value_type operator*() const {
+    switch (m_type) {
+      case iterator_type::NoneType:
+        return value_type();
+      case iterator_type::Sequence:
+        return value_type(**m_seqIt);
+      case iterator_type::Map:
+        return value_type(*m_mapIt->first, *m_mapIt->second);
+    }
+    return value_type();
+  }
+
+  proxy operator->() const { return proxy(**this); }
+
+  MapIter increment_until_defined(MapIter it) {
+    while (it != m_mapEnd && !is_defined(it))
+      ++it;
+    return it;
+  }
+
+  bool is_defined(MapIter it) const {
+    return it->first->is_defined() && it->second->is_defined();
+  }
+
+ private:
+  typename iterator_type::value m_type;
+
+  SeqIter m_seqIt;
+  MapIter m_mapIt, m_mapEnd;
+};
+
+typedef node_iterator_base<node> node_iterator;
+typedef node_iterator_base<const node> const_node_iterator;
+}
+}
+
+#endif  // VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_ref.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/detail/node_ref.h
@ -0,0 +1,98 @@
+#ifndef VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_data.h"
+
+namespace YAML {
+namespace detail {
+class node_ref {
+ public:
+  node_ref() : m_pData(new node_data) {}
+  node_ref(const node_ref&) = delete;
+  node_ref& operator=(const node_ref&) = delete;
+
+  bool is_defined() const { return m_pData->is_defined(); }
+  const Mark& mark() const { return m_pData->mark(); }
+  NodeType::value type() const { return m_pData->type(); }
+  const std::string& scalar() const { return m_pData->scalar(); }
+  const std::string& tag() const { return m_pData->tag(); }
+  EmitterStyle::value style() const { return m_pData->style(); }
+
+  void mark_defined() { m_pData->mark_defined(); }
+  void set_data(const node_ref& rhs) { m_pData = rhs.m_pData; }
+
+  void set_mark(const Mark& mark) { m_pData->set_mark(mark); }
+  void set_type(NodeType::value type) { m_pData->set_type(type); }
+  void set_tag(const std::string& tag) { m_pData->set_tag(tag); }
+  void set_null() { m_pData->set_null(); }
+  void set_scalar(const std::string& scalar) { m_pData->set_scalar(scalar); }
+  void set_style(EmitterStyle::value style) { m_pData->set_style(style); }
+
+  // size/iterator
+  std::size_t size() const { return m_pData->size(); }
+
+  const_node_iterator begin() const {
+    return static_cast<const node_data&>(*m_pData).begin();
+  }
+  node_iterator begin() { return m_pData->begin(); }
+
+  const_node_iterator end() const {
+    return static_cast<const node_data&>(*m_pData).end();
+  }
+  node_iterator end() { return m_pData->end(); }
+
+  // sequence
+  void push_back(node& node, shared_memory_holder pMemory) {
+    m_pData->push_back(node, pMemory);
+  }
+  void insert(node& key, node& value, shared_memory_holder pMemory) {
+    m_pData->insert(key, value, pMemory);
+  }
+
+  // indexing
+  template <typename Key>
+  node* get(const Key& key, shared_memory_holder pMemory) const {
+    return static_cast<const node_data&>(*m_pData).get(key, pMemory);
+  }
+  template <typename Key>
+  node& get(const Key& key, shared_memory_holder pMemory) {
+    return m_pData->get(key, pMemory);
+  }
+  template <typename Key>
+  bool remove(const Key& key, shared_memory_holder pMemory) {
+    return m_pData->remove(key, pMemory);
+  }
+
+  node* get(node& key, shared_memory_holder pMemory) const {
+    return static_cast<const node_data&>(*m_pData).get(key, pMemory);
+  }
+  node& get(node& key, shared_memory_holder pMemory) {
+    return m_pData->get(key, pMemory);
+  }
+  bool remove(node& key, shared_memory_holder pMemory) {
+    return m_pData->remove(key, pMemory);
+  }
+
+  // map
+  template <typename Key, typename Value>
+  void force_insert(const Key& key, const Value& value,
+                    shared_memory_holder pMemory) {
+    m_pData->force_insert(key, value, pMemory);
+  }
+
+ private:
+  shared_node_data m_pData;
+};
+}
+}
+
+#endif  // VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/emit.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/emit.h
@ -0,0 +1,32 @@
+#ifndef NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <iosfwd>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class Emitter;
+class Node;
+
+/**
+ * Emits the node to the given {@link Emitter}. If there is an error in writing,
+ * {@link Emitter#good} will return false.
+ */
+YAML_CPP_API Emitter& operator<<(Emitter& out, const Node& node);
+
+/** Emits the node to the given output stream. */
+YAML_CPP_API std::ostream& operator<<(std::ostream& out, const Node& node);
+
+/** Converts the node to a YAML string. */
+YAML_CPP_API std::string Dump(const Node& node);
+}  // namespace YAML
+
+#endif  // NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/impl.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/impl.h
@ -0,0 +1,448 @@
+#ifndef NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/detail/memory.h"
+#include "yaml-cpp/node/detail/node.h"
+#include "yaml-cpp/exceptions.h"
+#include <string>
+
+namespace YAML {
+inline Node::Node() : m_isValid(true), m_pNode(NULL) {}
+
+inline Node::Node(NodeType::value type)
+    : m_isValid(true),
+      m_pMemory(new detail::memory_holder),
+      m_pNode(&m_pMemory->create_node()) {
+  m_pNode->set_type(type);
+}
+
+template <typename T>
+inline Node::Node(const T& rhs)
+    : m_isValid(true),
+      m_pMemory(new detail::memory_holder),
+      m_pNode(&m_pMemory->create_node()) {
+  Assign(rhs);
+}
+
+inline Node::Node(const detail::iterator_value& rhs)
+    : m_isValid(rhs.m_isValid),
+      m_pMemory(rhs.m_pMemory),
+      m_pNode(rhs.m_pNode) {}
+
+inline Node::Node(const Node& rhs)
+    : m_isValid(rhs.m_isValid),
+      m_pMemory(rhs.m_pMemory),
+      m_pNode(rhs.m_pNode) {}
+
+inline Node::Node(Zombie) : m_isValid(false), m_pNode(NULL) {}
+
+inline Node::Node(detail::node& node, detail::shared_memory_holder pMemory)
+    : m_isValid(true), m_pMemory(pMemory), m_pNode(&node) {}
+
+inline Node::~Node() {}
+
+inline void Node::EnsureNodeExists() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  if (!m_pNode) {
+    m_pMemory.reset(new detail::memory_holder);
+    m_pNode = &m_pMemory->create_node();
+    m_pNode->set_null();
+  }
+}
+
+inline bool Node::IsDefined() const {
+  if (!m_isValid) {
+    return false;
+  }
+  return m_pNode ? m_pNode->is_defined() : true;
+}
+
+inline Mark Node::Mark() const {
+  if (!m_isValid) {
+    throw InvalidNode();
+  }
+  return m_pNode ? m_pNode->mark() : Mark::null_mark();
+}
+
+inline NodeType::value Node::Type() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->type() : NodeType::Null;
+}
+
+// access
+
+// template helpers
+template <typename T, typename S>
+struct as_if {
+  explicit as_if(const Node& node_) : node(node_) {}
+  const Node& node;
+
+  T operator()(const S& fallback) const {
+    if (!node.m_pNode)
+      return fallback;
+
+    T t;
+    if (convert<T>::decode(node, t))
+      return t;
+    return fallback;
+  }
+};
+
+template <typename S>
+struct as_if<std::string, S> {
+  explicit as_if(const Node& node_) : node(node_) {}
+  const Node& node;
+
+  std::string operator()(const S& fallback) const {
+    if (node.Type() != NodeType::Scalar)
+      return fallback;
+    return node.Scalar();
+  }
+};
+
+template <typename T>
+struct as_if<T, void> {
+  explicit as_if(const Node& node_) : node(node_) {}
+  const Node& node;
+
+  T operator()() const {
+    if (!node.m_pNode)
+      throw TypedBadConversion<T>(node.Mark());
+
+    T t;
+    if (convert<T>::decode(node, t))
+      return t;
+    throw TypedBadConversion<T>(node.Mark());
+  }
+};
+
+template <>
+struct as_if<std::string, void> {
+  explicit as_if(const Node& node_) : node(node_) {}
+  const Node& node;
+
+  std::string operator()() const {
+    if (node.Type() != NodeType::Scalar)
+      throw TypedBadConversion<std::string>(node.Mark());
+    return node.Scalar();
+  }
+};
+
+// access functions
+template <typename T>
+inline T Node::as() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return as_if<T, void>(*this)();
+}
+
+template <typename T, typename S>
+inline T Node::as(const S& fallback) const {
+  if (!m_isValid)
+    return fallback;
+  return as_if<T, S>(*this)(fallback);
+}
+
+inline const std::string& Node::Scalar() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->scalar() : detail::node_data::empty_scalar;
+}
+
+inline const std::string& Node::Tag() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->tag() : detail::node_data::empty_scalar;
+}
+
+inline void Node::SetTag(const std::string& tag) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_tag(tag);
+}
+
+inline EmitterStyle::value Node::Style() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->style() : EmitterStyle::Default;
+}
+
+inline void Node::SetStyle(EmitterStyle::value style) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_style(style);
+}
+
+// assignment
+inline bool Node::is(const Node& rhs) const {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  if (!m_pNode || !rhs.m_pNode)
+    return false;
+  return m_pNode->is(*rhs.m_pNode);
+}
+
+template <typename T>
+inline Node& Node::operator=(const T& rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  Assign(rhs);
+  return *this;
+}
+
+inline void Node::reset(const YAML::Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  m_pMemory = rhs.m_pMemory;
+  m_pNode = rhs.m_pNode;
+}
+
+template <typename T>
+inline void Node::Assign(const T& rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  AssignData(convert<T>::encode(rhs));
+}
+
+template <>
+inline void Node::Assign(const std::string& rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_scalar(rhs);
+}
+
+inline void Node::Assign(const char* rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_scalar(rhs);
+}
+
+inline void Node::Assign(char* rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->set_scalar(rhs);
+}
+
+inline Node& Node::operator=(const Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  if (is(rhs))
+    return *this;
+  AssignNode(rhs);
+  return *this;
+}
+
+inline void Node::AssignData(const Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  rhs.EnsureNodeExists();
+
+  m_pNode->set_data(*rhs.m_pNode);
+  m_pMemory->merge(*rhs.m_pMemory);
+}
+
+inline void Node::AssignNode(const Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  rhs.EnsureNodeExists();
+
+  if (!m_pNode) {
+    m_pNode = rhs.m_pNode;
+    m_pMemory = rhs.m_pMemory;
+    return;
+  }
+
+  m_pNode->set_ref(*rhs.m_pNode);
+  m_pMemory->merge(*rhs.m_pMemory);
+  m_pNode = rhs.m_pNode;
+}
+
+// size/iterator
+inline std::size_t Node::size() const {
+  if (!m_isValid)
+    throw InvalidNode();
+  return m_pNode ? m_pNode->size() : 0;
+}
+
+inline const_iterator Node::begin() const {
+  if (!m_isValid)
+    return const_iterator();
+  return m_pNode ? const_iterator(m_pNode->begin(), m_pMemory)
+                 : const_iterator();
+}
+
+inline iterator Node::begin() {
+  if (!m_isValid)
+    return iterator();
+  return m_pNode ? iterator(m_pNode->begin(), m_pMemory) : iterator();
+}
+
+inline const_iterator Node::end() const {
+  if (!m_isValid)
+    return const_iterator();
+  return m_pNode ? const_iterator(m_pNode->end(), m_pMemory) : const_iterator();
+}
+
+inline iterator Node::end() {
+  if (!m_isValid)
+    return iterator();
+  return m_pNode ? iterator(m_pNode->end(), m_pMemory) : iterator();
+}
+
+// sequence
+template <typename T>
+inline void Node::push_back(const T& rhs) {
+  if (!m_isValid)
+    throw InvalidNode();
+  push_back(Node(rhs));
+}
+
+inline void Node::push_back(const Node& rhs) {
+  if (!m_isValid || !rhs.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  rhs.EnsureNodeExists();
+
+  m_pNode->push_back(*rhs.m_pNode, m_pMemory);
+  m_pMemory->merge(*rhs.m_pMemory);
+}
+
+// helpers for indexing
+namespace detail {
+template <typename T>
+struct to_value_t {
+  explicit to_value_t(const T& t_) : t(t_) {}
+  const T& t;
+  typedef const T& return_type;
+
+  const T& operator()() const { return t; }
+};
+
+template <>
+struct to_value_t<const char*> {
+  explicit to_value_t(const char* t_) : t(t_) {}
+  const char* t;
+  typedef std::string return_type;
+
+  const std::string operator()() const { return t; }
+};
+
+template <>
+struct to_value_t<char*> {
+  explicit to_value_t(char* t_) : t(t_) {}
+  const char* t;
+  typedef std::string return_type;
+
+  const std::string operator()() const { return t; }
+};
+
+template <std::size_t N>
+struct to_value_t<char[N]> {
+  explicit to_value_t(const char* t_) : t(t_) {}
+  const char* t;
+  typedef std::string return_type;
+
+  const std::string operator()() const { return t; }
+};
+
+// converts C-strings to std::strings so they can be copied
+template <typename T>
+inline typename to_value_t<T>::return_type to_value(const T& t) {
+  return to_value_t<T>(t)();
+}
+}
+
+// indexing
+template <typename Key>
+inline const Node Node::operator[](const Key& key) const {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  detail::node* value = static_cast<const detail::node&>(*m_pNode)
+                            .get(detail::to_value(key), m_pMemory);
+  if (!value) {
+    return Node(ZombieNode);
+  }
+  return Node(*value, m_pMemory);
+}
+
+template <typename Key>
+inline Node Node::operator[](const Key& key) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  detail::node& value = m_pNode->get(detail::to_value(key), m_pMemory);
+  return Node(value, m_pMemory);
+}
+
+template <typename Key>
+inline bool Node::remove(const Key& key) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  return m_pNode->remove(detail::to_value(key), m_pMemory);
+}
+
+inline const Node Node::operator[](const Node& key) const {
+  if (!m_isValid || !key.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  key.EnsureNodeExists();
+  m_pMemory->merge(*key.m_pMemory);
+  detail::node* value =
+      static_cast<const detail::node&>(*m_pNode).get(*key.m_pNode, m_pMemory);
+  if (!value) {
+    return Node(ZombieNode);
+  }
+  return Node(*value, m_pMemory);
+}
+
+inline Node Node::operator[](const Node& key) {
+  if (!m_isValid || !key.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  key.EnsureNodeExists();
+  m_pMemory->merge(*key.m_pMemory);
+  detail::node& value = m_pNode->get(*key.m_pNode, m_pMemory);
+  return Node(value, m_pMemory);
+}
+
+inline bool Node::remove(const Node& key) {
+  if (!m_isValid || !key.m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  key.EnsureNodeExists();
+  return m_pNode->remove(*key.m_pNode, m_pMemory);
+}
+
+// map
+template <typename Key, typename Value>
+inline void Node::force_insert(const Key& key, const Value& value) {
+  if (!m_isValid)
+    throw InvalidNode();
+  EnsureNodeExists();
+  m_pNode->force_insert(detail::to_value(key), detail::to_value(value),
+                        m_pMemory);
+}
+
+// free functions
+inline bool operator==(const Node& lhs, const Node& rhs) { return lhs.is(rhs); }
+}
+
+#endif  // NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/iterator.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/iterator.h
@ -0,0 +1,31 @@
+#ifndef VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/detail/iterator_fwd.h"
+#include "yaml-cpp/node/detail/iterator.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+namespace detail {
+struct iterator_value : public Node, std::pair<Node, Node> {
+  iterator_value() {}
+  explicit iterator_value(const Node& rhs)
+      : Node(rhs),
+        std::pair<Node, Node>(Node(Node::ZombieNode), Node(Node::ZombieNode)) {}
+  explicit iterator_value(const Node& key, const Node& value)
+      : Node(Node::ZombieNode), std::pair<Node, Node>(key, value) {}
+};
+}
+}
+
+#endif  // VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/node.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/node.h
@ -0,0 +1,145 @@
+#ifndef NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <stdexcept>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/mark.h"
+#include "yaml-cpp/node/detail/bool_type.h"
+#include "yaml-cpp/node/detail/iterator_fwd.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+namespace detail {
+class node;
+class node_data;
+struct iterator_value;
+}  // namespace detail
+}  // namespace YAML
+
+namespace YAML {
+class YAML_CPP_API Node {
+ public:
+  friend class NodeBuilder;
+  friend class NodeEvents;
+  friend struct detail::iterator_value;
+  friend class detail::node;
+  friend class detail::node_data;
+  template <typename>
+  friend class detail::iterator_base;
+  template <typename T, typename S>
+  friend struct as_if;
+
+  typedef YAML::iterator iterator;
+  typedef YAML::const_iterator const_iterator;
+
+  Node();
+  explicit Node(NodeType::value type);
+  template <typename T>
+  explicit Node(const T& rhs);
+  explicit Node(const detail::iterator_value& rhs);
+  Node(const Node& rhs);
+  ~Node();
+
+  YAML::Mark Mark() const;
+  NodeType::value Type() const;
+  bool IsDefined() const;
+  bool IsNull() const { return Type() == NodeType::Null; }
+  bool IsScalar() const { return Type() == NodeType::Scalar; }
+  bool IsSequence() const { return Type() == NodeType::Sequence; }
+  bool IsMap() const { return Type() == NodeType::Map; }
+
+  // bool conversions
+  YAML_CPP_OPERATOR_BOOL()
+  bool operator!() const { return !IsDefined(); }
+
+  // access
+  template <typename T>
+  T as() const;
+  template <typename T, typename S>
+  T as(const S& fallback) const;
+  const std::string& Scalar() const;
+
+  const std::string& Tag() const;
+  void SetTag(const std::string& tag);
+
+  // style
+  // WARNING: This API might change in future releases.
+  EmitterStyle::value Style() const;
+  void SetStyle(EmitterStyle::value style);
+
+  // assignment
+  bool is(const Node& rhs) const;
+  template <typename T>
+  Node& operator=(const T& rhs);
+  Node& operator=(const Node& rhs);
+  void reset(const Node& rhs = Node());
+
+  // size/iterator
+  std::size_t size() const;
+
+  const_iterator begin() const;
+  iterator begin();
+
+  const_iterator end() const;
+  iterator end();
+
+  // sequence
+  template <typename T>
+  void push_back(const T& rhs);
+  void push_back(const Node& rhs);
+
+  // indexing
+  template <typename Key>
+  const Node operator[](const Key& key) const;
+  template <typename Key>
+  Node operator[](const Key& key);
+  template <typename Key>
+  bool remove(const Key& key);
+
+  const Node operator[](const Node& key) const;
+  Node operator[](const Node& key);
+  bool remove(const Node& key);
+
+  // map
+  template <typename Key, typename Value>
+  void force_insert(const Key& key, const Value& value);
+
+ private:
+  enum Zombie { ZombieNode };
+  explicit Node(Zombie);
+  explicit Node(detail::node& node, detail::shared_memory_holder pMemory);
+
+  void EnsureNodeExists() const;
+
+  template <typename T>
+  void Assign(const T& rhs);
+  void Assign(const char* rhs);
+  void Assign(char* rhs);
+
+  void AssignData(const Node& rhs);
+  void AssignNode(const Node& rhs);
+
+ private:
+  bool m_isValid;
+  mutable detail::shared_memory_holder m_pMemory;
+  mutable detail::node* m_pNode;
+};
+
+YAML_CPP_API bool operator==(const Node& lhs, const Node& rhs);
+
+YAML_CPP_API Node Clone(const Node& node);
+
+template <typename T>
+struct convert;
+}
+
+#endif  // NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/parse.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/parse.h
@ -0,0 +1,78 @@
+#ifndef VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class Node;
+
+/**
+ * Loads the input string as a single YAML document.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API Node Load(const std::string& input);
+
+/**
+ * Loads the input string as a single YAML document.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API Node Load(const char* input);
+
+/**
+ * Loads the input stream as a single YAML document.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API Node Load(std::istream& input);
+
+/**
+ * Loads the input file as a single YAML document.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ * @throws {@link BadFile} if the file cannot be loaded.
+ */
+YAML_CPP_API Node LoadFile(const std::string& filename);
+
+/**
+ * Loads the input string as a list of YAML documents.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API std::vector<Node> LoadAll(const std::string& input);
+
+/**
+ * Loads the input string as a list of YAML documents.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API std::vector<Node> LoadAll(const char* input);
+
+/**
+ * Loads the input stream as a list of YAML documents.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ */
+YAML_CPP_API std::vector<Node> LoadAll(std::istream& input);
+
+/**
+ * Loads the input file as a list of YAML documents.
+ *
+ * @throws {@link ParserException} if it is malformed.
+ * @throws {@link BadFile} if the file cannot be loaded.
+ */
+YAML_CPP_API std::vector<Node> LoadAllFromFile(const std::string& filename);
+}  // namespace YAML
+
+#endif  // VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/ptr.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/ptr.h
@ -0,0 +1,29 @@
+#ifndef VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include <memory>
+
+namespace YAML {
+namespace detail {
+class node;
+class node_ref;
+class node_data;
+class memory;
+class memory_holder;
+
+typedef std::shared_ptr<node> shared_node;
+typedef std::shared_ptr<node_ref> shared_node_ref;
+typedef std::shared_ptr<node_data> shared_node_data;
+typedef std::shared_ptr<memory_holder> shared_memory_holder;
+typedef std::shared_ptr<memory> shared_memory;
+}
+}
+
+#endif  // VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/node/type.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/node/type.h
@ -0,0 +1,16 @@
+#ifndef VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct NodeType {
+  enum value { Undefined, Null, Scalar, Sequence, Map };
+};
+}
+
+#endif  // VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/noncopyable.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/noncopyable.h
@ -0,0 +1,25 @@
+#ifndef NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+// this is basically boost::noncopyable
+class YAML_CPP_API noncopyable {
+ protected:
+  noncopyable() {}
+  ~noncopyable() {}
+
+ private:
+  noncopyable(const noncopyable&);
+  const noncopyable& operator=(const noncopyable&);
+};
+}
+
+#endif  // NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/null.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/null.h
@ -0,0 +1,26 @@
+#ifndef NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include <string>
+
+namespace YAML {
+class Node;
+
+struct YAML_CPP_API _Null {};
+inline bool operator==(const _Null&, const _Null&) { return true; }
+inline bool operator!=(const _Null&, const _Null&) { return false; }
+
+YAML_CPP_API bool IsNull(const Node& node);  // old API only
+YAML_CPP_API bool IsNullString(const std::string& str);
+
+extern YAML_CPP_API _Null Null;
+}
+
+#endif  // NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/ostream_wrapper.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/ostream_wrapper.h
@ -0,0 +1,72 @@
+#ifndef OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class YAML_CPP_API ostream_wrapper {
+ public:
+  ostream_wrapper();
+  explicit ostream_wrapper(std::ostream& stream);
+  ~ostream_wrapper();
+
+  void write(const std::string& str);
+  void write(const char* str, std::size_t size);
+
+  void set_comment() { m_comment = true; }
+
+  const char* str() const {
+    if (m_pStream) {
+      return 0;
+    } else {
+      m_buffer[m_pos] = '\0';
+      return &m_buffer[0];
+    }
+  }
+
+  std::size_t row() const { return m_row; }
+  std::size_t col() const { return m_col; }
+  std::size_t pos() const { return m_pos; }
+  bool comment() const { return m_comment; }
+
+ private:
+  void update_pos(char ch);
+
+ private:
+  mutable std::vector<char> m_buffer;
+  std::ostream* const m_pStream;
+
+  std::size_t m_pos;
+  std::size_t m_row, m_col;
+  bool m_comment;
+};
+
+template <std::size_t N>
+inline ostream_wrapper& operator<<(ostream_wrapper& stream,
+                                   const char(&str)[N]) {
+  stream.write(str, N - 1);
+  return stream;
+}
+
+inline ostream_wrapper& operator<<(ostream_wrapper& stream,
+                                   const std::string& str) {
+  stream.write(str);
+  return stream;
+}
+
+inline ostream_wrapper& operator<<(ostream_wrapper& stream, char ch) {
+  stream.write(&ch, 1);
+  return stream;
+}
+}
+
+#endif  // OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/parser.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/parser.h
@ -0,0 +1,86 @@
+#ifndef PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <ios>
+#include <memory>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/noncopyable.h"
+
+namespace YAML {
+class EventHandler;
+class Node;
+class Scanner;
+struct Directives;
+struct Token;
+
+/**
+ * A parser turns a stream of bytes into one stream of "events" per YAML
+ * document in the input stream.
+ */
+class YAML_CPP_API Parser : private noncopyable {
+ public:
+  /** Constructs an empty parser (with no input. */
+  Parser();
+
+  /**
+   * Constructs a parser from the given input stream. The input stream must
+   * live as long as the parser.
+   */
+  explicit Parser(std::istream& in);
+
+  ~Parser();
+
+  /** Evaluates to true if the parser has some valid input to be read. */
+  explicit operator bool() const;
+
+  /**
+   * Resets the parser with the given input stream. Any existing state is
+   * erased.
+   */
+  void Load(std::istream& in);
+
+  /**
+   * Handles the next document by calling events on the {@code eventHandler}.
+   *
+   * @throw a ParserException on error.
+   * @return false if there are no more documents
+   */
+  bool HandleNextDocument(EventHandler& eventHandler);
+
+  void PrintTokens(std::ostream& out);
+
+ private:
+  /**
+   * Reads any directives that are next in the queue, setting the internal
+   * {@code m_pDirectives} state.
+   */
+  void ParseDirectives();
+
+  void HandleDirective(const Token& token);
+
+  /**
+   * Handles a "YAML" directive, which should be of the form 'major.minor' (like
+   * a version number).
+   */
+  void HandleYamlDirective(const Token& token);
+
+  /**
+   * Handles a "TAG" directive, which should be of the form 'handle prefix',
+   * where 'handle' is converted to 'prefix' in the file.
+   */
+  void HandleTagDirective(const Token& token);
+
+ private:
+  std::unique_ptr<Scanner> m_pScanner;
+  std::unique_ptr<Directives> m_pDirectives;
+};
+}
+
+#endif  // PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/stlemitter.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/stlemitter.h
@ -0,0 +1,51 @@
+#ifndef STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <vector>
+#include <list>
+#include <set>
+#include <map>
+
+namespace YAML {
+template <typename Seq>
+inline Emitter& EmitSeq(Emitter& emitter, const Seq& seq) {
+  emitter << BeginSeq;
+  for (typename Seq::const_iterator it = seq.begin(); it != seq.end(); ++it)
+    emitter << *it;
+  emitter << EndSeq;
+  return emitter;
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::vector<T>& v) {
+  return EmitSeq(emitter, v);
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::list<T>& v) {
+  return EmitSeq(emitter, v);
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::set<T>& v) {
+  return EmitSeq(emitter, v);
+}
+
+template <typename K, typename V>
+inline Emitter& operator<<(Emitter& emitter, const std::map<K, V>& m) {
+  typedef typename std::map<K, V> map;
+  emitter << BeginMap;
+  for (typename map::const_iterator it = m.begin(); it != m.end(); ++it)
+    emitter << Key << it->first << Value << it->second;
+  emitter << EndMap;
+  return emitter;
+}
+}
+
+#endif  // STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/traits.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/traits.h
@ -0,0 +1,103 @@
+#ifndef TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+template <typename>
+struct is_numeric {
+  enum { value = false };
+};
+
+template <>
+struct is_numeric<char> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned char> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<long int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned long int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<short int> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned short int> {
+  enum { value = true };
+};
+#if defined(_MSC_VER) && (_MSC_VER < 1310)
+template <>
+struct is_numeric<__int64> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned __int64> {
+  enum { value = true };
+};
+#else
+template <>
+struct is_numeric<long long> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<unsigned long long> {
+  enum { value = true };
+};
+#endif
+template <>
+struct is_numeric<float> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<double> {
+  enum { value = true };
+};
+template <>
+struct is_numeric<long double> {
+  enum { value = true };
+};
+
+template <bool, class T = void>
+struct enable_if_c {
+  typedef T type;
+};
+
+template <class T>
+struct enable_if_c<false, T> {};
+
+template <class Cond, class T = void>
+struct enable_if : public enable_if_c<Cond::value, T> {};
+
+template <bool, class T = void>
+struct disable_if_c {
+  typedef T type;
+};
+
+template <class T>
+struct disable_if_c<true, T> {};
+
+template <class Cond, class T = void>
+struct disable_if : public disable_if_c<Cond::value, T> {};
+}
+
+#endif  // TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/include/yaml-cpp/yaml.h
+++ b/funasr/runtime/onnxruntime/include/yaml-cpp/yaml.h
@ -0,0 +1,24 @@
+#ifndef YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) ||                                            \
+    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/parser.h"
+#include "yaml-cpp/emitter.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/stlemitter.h"
+#include "yaml-cpp/exceptions.h"
+
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/impl.h"
+#include "yaml-cpp/node/convert.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/detail/impl.h"
+#include "yaml-cpp/node/parse.h"
+#include "yaml-cpp/node/emit.h"
+
+#endif  // YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
--- a/funasr/runtime/onnxruntime/models/readme.md
+++ b/funasr/runtime/onnxruntime/models/readme.md
@ -1 +0,0 @@
-Place model.onnx here!
--- a/funasr/runtime/onnxruntime/models/vocab.txt
+++ b/funasr/runtime/onnxruntime/models/vocab.txt
--- a/funasr/runtime/onnxruntime/readme.md
+++ b/funasr/runtime/onnxruntime/readme.md
@ -1,6 +1,4 @@

-
-
 ## 快速使用

 ### Windows
@ -9,19 +7,16 @@

 Windows下已经预置fftw3及onnxruntime库

-
 ### Linux
 See the bottom of this page: Building Guidance

-
 ###  运行程序

-tester  /path/to/models/dir /path/to/wave/file quantize(true or false)
+tester  /path/to/models_dir /path/to/wave_file quantize(true or false)

- 例如： tester /data/models  /data/test.wav false
-
-/data/models 需要包括如下两个文件： model.onnx 和vocab.txt
+例如： tester /data/models  /data/test.wav false

+/data/models 需要包括如下三个文件: config.yaml, am.mvn, model.onnx(or model_quant.onnx)

 ## 支持平台
 - Windows
@ -66,7 +61,7 @@ centos: yum install fftw fftw-devel
 bash ./third_party/install_openblas.sh

 # build
- cmake  -DCMAKE_BUILD_TYPE=release .. -DONNXRUNTIME_DIR=/mnt/c/Users/ma139/RapidASR/cpp_onnx/build/onnxruntime-linux-x64-1.14.0
+ cmake  -DCMAKE_BUILD_TYPE=release .. -DONNXRUNTIME_DIR=/path/to/onnxruntime-linux-x64-1.14.0
 make

 # then in the subfolder tester of current direcotry, you will see a program, tester
@ -80,35 +75,11 @@ onnxruntime_xxx
 └───lib
 ```

-## 线程数与性能关系
-
-测试环境Rocky Linux 8，仅测试cpp版本结果（未测python版本），@acely 
-
-简述：
-在3台配置不同的机器上分别编译并测试，在fftw和onnxruntime版本都相同的前提下，识别同一个30分钟的音频文件，分别测试不同onnx线程数量的表现。
-
-![线程数关系](images/threadnum.png "Windows ASR")
-
-目前可以总结出大致规律：
-
-并非onnx线程数越多越好
-2线程比1线程提升显著，线程再多则提升较小
-线程数等于CPU物理核心数时效率最好
-实操建议：
-
-大部分场景用3-4线程性价比最高
-低配机器用2线程合适
-
-
-
-##  演示
-
-![Windows演示](images/demo.png "Windows ASR")
-
 ## 注意
 本程序只支持 采样率16000hz, 位深16bit的 **单声道** 音频。


 ## Acknowledge
-1. We acknowledge [mayong](https://github.com/RapidAI/RapidASR/tree/main/cpp_onnx) for contributing the onnxruntime(cpp api).
-2. We borrowed a lot of code from [FastASR](https://github.com/chenkui164/FastASR) for audio frontend and text-postprocess.
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
+2. We acknowledge [mayong](https://github.com/RapidAI/RapidASR/tree/main/cpp_onnx) for contributing the onnxruntime(cpp api).
+3. We borrowed a lot of code from [FastASR](https://github.com/chenkui164/FastASR) for audio frontend and text-postprocess.
--- a/funasr/runtime/onnxruntime/src/Audio.cpp
+++ b/funasr/runtime/onnxruntime/src/Audio.cpp
@ -3,7 +3,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <webrtc_vad.h>

 #include "Audio.h"

@ -138,9 +137,9 @@ bool Audio::loadwav(const char *filename)
    fp = fopen(filename, "rb");
    if (fp == nullptr)
        return false;
-    fseek(fp, 0, SEEK_END);
-    uint32_t nFileLen = ftell(fp);
-    fseek(fp, 44, SEEK_SET);
+    fseek(fp, 0, SEEK_END);  /*定位到文件末尾*/
+    uint32_t nFileLen = ftell(fp);  /*得到文件大小*/
+    fseek(fp, 44, SEEK_SET);  /*跳过wav文件头*/

    speech_len = (nFileLen - 44) / 2;
    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
@ -414,6 +413,7 @@ void Audio::padding()
 #define SPEECH_LEN_20S (16000 * 20)
 #define SPEECH_LEN_30S (16000 * 30)

+/*
 void Audio::split()
 {
    VadInst *handle = WebRtcVad_Create();
@ -472,3 +472,4 @@ void Audio::split()
    }
    WebRtcVad_Free(handle);
 }
+*/
--- a/funasr/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt
@ -10,7 +10,7 @@ add_library(rapidasr ${files})

 if(WIN32)

-        set(EXTRA_LIBS libfftw3f-3 webrtcvad)
+        set(EXTRA_LIBS libfftw3f-3 yaml-cpp)
        if(CMAKE_CL_64)
            target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
        else()
@ -21,7 +21,7 @@ if(WIN32)
        target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT)
 else()

-    set(EXTRA_LIBS fftw3f webrtcvad pthread)
+    set(EXTRA_LIBS fftw3f pthread yaml-cpp)
    target_include_directories(rapidasr PUBLIC "/usr/local/opt/fftw/include")
    target_link_directories(rapidasr PUBLIC "/usr/local/opt/fftw/lib")

--- a/funasr/runtime/onnxruntime/src/Vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/Vocab.cpp
@ -1,4 +1,5 @@
 #include "Vocab.h"
+#include "yaml-cpp/yaml.h"

 #include <fstream>
 #include <iostream>
@ -11,25 +12,42 @@ using namespace std;
 Vocab::Vocab(const char *filename)
 {
    ifstream in(filename);
-    string line;
+    loadVocabFromYaml(filename);

+    /*
+    string line;
    if (in) // 有该文件
    {
        while (getline(in, line)) // line中不包括每行的换行符
        {
            vocab.push_back(line);
        }
-        // cout << vocab[1719] << endl;
    }
-    // else // 没有该文件
-    //{
-    //     cout << "no such file" << endl;
-    // }
+    else{
+        printf("Cannot load vocab from: %s, there must be file vocab.txt", filename);
+        exit(-1);
+    }
+    */
 }
 Vocab::~Vocab()
 {
 }

+void Vocab::loadVocabFromYaml(const char* filename){
+    YAML::Node config;
+    try{
+        config = YAML::LoadFile(filename);
+    }catch(...){
+        printf("error loading file, yaml file error or not exist.\n");
+        exit(-1);
+    }
+
+    YAML::Node myList = config["token_list"];
+    for (YAML::const_iterator it = myList.begin(); it != myList.end(); ++it) {
+        vocab.push_back(it->as<string>());
+    }
+}
+
 string Vocab::vector2string(vector<int> in)
 {
    int i;
@ -67,7 +85,6 @@ bool Vocab::isChinese(string ch)
    return false;
 }

-
 string Vocab::vector2stringV2(vector<int> in)
 {
    int i;
--- a/funasr/runtime/onnxruntime/src/Vocab.h
+++ b/funasr/runtime/onnxruntime/src/Vocab.h
@ -12,6 +12,7 @@ class Vocab {
    vector<string> vocab;
    bool isChinese(string ch);
    bool isEnglish(string ch);
+    void loadVocabFromYaml(const char* filename);

  public:
    Vocab(const char *filename);
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
+++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
@ -6,14 +6,17 @@ using namespace paraformer;
 ModelImp::ModelImp(const char* path,int nNumThread, bool quantize)
 {
    string model_path;
-    string vocab_path;
+    string cmvn_path;
+    string config_path;
+
    if(quantize)
    {
        model_path = pathAppend(path, "model_quant.onnx");
    }else{
        model_path = pathAppend(path, "model.onnx");
    }
-    vocab_path = pathAppend(path, "vocab.txt");
+    cmvn_path = pathAppend(path, "am.mvn");
+    config_path = pathAppend(path, "config.yaml");

    fe = new FeatureExtract(3);

@ -43,7 +46,8 @@ ModelImp::ModelImp(const char* path,int nNumThread, bool quantize)
        m_szInputNames.push_back(item.c_str());
    for (auto& item : m_strOutputNames)
        m_szOutputNames.push_back(item.c_str());
-    vocab = new Vocab(vocab_path.c_str());
+    vocab = new Vocab(config_path.c_str());
+    load_cmvn(cmvn_path.c_str());
 }

 ModelImp::~ModelImp()
@ -88,16 +92,49 @@ void ModelImp::apply_lfr(Tensor<float>*& din)
    din = tmp;
 }

+void ModelImp::load_cmvn(const char *filename)
+{
+    ifstream cmvn_stream(filename);
+    string line;
+
+    while (getline(cmvn_stream, line)) {
+        istringstream iss(line);
+        vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
+        if (line_item[0] == "<AddShift>") {
+            getline(cmvn_stream, line);
+            istringstream means_lines_stream(line);
+            vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
+            if (means_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < means_lines.size() - 1; j++) {
+                    means_list.push_back(stof(means_lines[j]));
+                }
+                continue;
+            }
+        }
+        else if (line_item[0] == "<Rescale>") {
+            getline(cmvn_stream, line);
+            istringstream vars_lines_stream(line);
+            vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
+            if (vars_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < vars_lines.size() - 1; j++) {
+                    vars_list.push_back(stof(vars_lines[j])*scale);
+                }
+                continue;
+            }
+        }
+    }
+}
+
 void ModelImp::apply_cmvn(Tensor<float>* din)
 {
    const float* var;
    const float* mean;
-    float scale = 22.6274169979695;
+    var = vars_list.data();
+    mean= means_list.data();
+
    int m = din->size[2];
    int n = din->size[3];

-    var = (const float*)paraformer_cmvn_var_hex;
-    mean = (const float*)paraformer_cmvn_mean_hex;
    for (int i = 0; i < m; i++) {
        for (int j = 0; j < n; j++) {
            int idx = i * n + j;
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.h
+++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.h
@ -11,9 +11,13 @@ namespace paraformer {
        FeatureExtract* fe;

        Vocab* vocab;
+        vector<float> means_list;
+        vector<float> vars_list;
+        const float scale = 22.6274169979695;

        void apply_lfr(Tensor<float>*& din);
        void apply_cmvn(Tensor<float>* din);
+        void load_cmvn(const char *filename);

        string greedy_search( float* in, int nLen);

--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@ -1,13 +1,15 @@
 #pragma once 
 // system 

-#include <iostream>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <deque>
 #include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iterator>
 #include <list>
 #include <locale.h>
 #include <vector>
--- a/funasr/runtime/onnxruntime/tester/tester.cpp
+++ b/funasr/runtime/onnxruntime/tester/tester.cpp
@ -51,8 +51,7 @@ int main(int argc, char *argv[])
        string msg = RapidAsrGetResult(Result, 0);
        setbuf(stdout, NULL);
        cout << "Result: \"";
-        cout << msg << endl;
-        cout << "\"." << endl;
+        cout << msg << "\"." << endl;
        snippet_time = RapidAsrGetRetSnippetTime(Result);
        RapidAsrFreeResult(Result);
    }
--- a/funasr/runtime/onnxruntime/third_party/webrtc/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/CMakeLists.txt
@ -1,16 +0,0 @@
-
-
-if(WIN32)
-    add_definitions(-DWEBRTC_WIN)
-else()
-    add_definitions(-DWEBRTC_POSIX)
-endif()
-
-
-include_directories("..")
-
-file(GLOB_RECURSE files "*.c" "rtc_base/checks.cc")
-
-message("${files}")
-
-add_library(webrtcvad ${files})
--- a/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/complex_bit_reverse.c
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/complex_bit_reverse.c
@ -1,108 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-
-/* Tables for data buffer indexes that are bit reversed and thus need to be
- * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
- * operations, while index_7[{1, 3, 5, ...}] are for the right side of the
- * operation. Same for index_8.
- */
-
-/* Indexes for the case of stages == 7. */
-static const int16_t index_7[112] = {
-  1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
-  12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
-  23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
-  37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
-  51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
-  81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
-  103, 115, 111, 123
-};
-
-/* Indexes for the case of stages == 8. */
-static const int16_t index_8[240] = {
-  1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
-  11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
-  40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
-  30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
-  148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
-  51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
-  124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
-  75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
-  234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
-  166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
-  115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
-  193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
-  149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
-  213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
-  203, 211, 207, 243, 215, 235, 223, 251, 239, 247
-};
-
-void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
-  /* For any specific value of stages, we know exactly the indexes that are
-   * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
-   * stages are 7 and 8, so we use tables to save unnecessary iterations and
-   * calculations for these two cases.
-   */
-  if (stages == 7 || stages == 8) {
-    int m = 0;
-    int length = 112;
-    const int16_t* index = index_7;
-
-    if (stages == 8) {
-      length = 240;
-      index = index_8;
-    }
-
-    /* Decimation in time. Swap the elements with bit-reversed indexes. */
-    for (m = 0; m < length; m += 2) {
-      /* We declare a int32_t* type pointer, to load both the 16-bit real
-       * and imaginary elements from complex_data in one instruction, reducing
-       * complexity.
-       */
-      int32_t* complex_data_ptr = (int32_t*)complex_data;
-      int32_t temp = 0;
-
-      temp = complex_data_ptr[index[m]];  /* Real and imaginary */
-      complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
-      complex_data_ptr[index[m + 1]] = temp;
-    }
-  }
-  else {
-    int m = 0, mr = 0, l = 0;
-    int n = 1 << stages;
-    int nn = n - 1;
-
-    /* Decimation in time - re-order data */
-    for (m = 1; m <= nn; ++m) {
-      int32_t* complex_data_ptr = (int32_t*)complex_data;
-      int32_t temp = 0;
-
-      /* Find out indexes that are bit-reversed. */
-      l = n;
-      do {
-        l >>= 1;
-      } while (l > nn - mr);
-      mr = (mr & (l - 1)) + l;
-
-      if (mr <= m) {
-        continue;
-      }
-
-      /* Swap the elements with bit-reversed indexes.
-       * This is similar to the loop in the stages == 7 or 8 cases.
-       */
-      temp = complex_data_ptr[m];  /* Real and imaginary */
-      complex_data_ptr[m] = complex_data_ptr[mr];
-      complex_data_ptr[mr] = temp;
-    }
-  }
-}
--- a/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/complex_fft.c
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/complex_fft.c
@ -1,299 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This file contains the function WebRtcSpl_ComplexFFT().
- * The description header can be found in signal_processing_library.h
- *
- */
-
-#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/rtc_base/system/arch.h"
-
-#define CFFTSFT 14
-#define CFFTRND 1
-#define CFFTRND2 16384
-
-#define CIFFTSFT 14
-#define CIFFTRND 1
-
-
-int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
-{
-    int i, j, l, k, istep, n, m;
-    int16_t wr, wi;
-    int32_t tr32, ti32, qr32, qi32;
-
-    /* The 1024-value is a constant given from the size of kSinTable1024[],
-     * and should not be changed depending on the input parameter 'stages'
-     */
-    n = 1 << stages;
-    if (n > 1024)
-        return -1;
-
-    l = 1;
-    k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
-         depending on the input parameter 'stages' */
-
-    if (mode == 0)
-    {
-        // mode==0: Low-complexity and Low-accuracy mode
-        while (l < n)
-        {
-            istep = l << 1;
-
-            for (m = 0; m < l; ++m)
-            {
-                j = m << k;
-
-                /* The 256-value is a constant given as 1/4 of the size of
-                 * kSinTable1024[], and should not be changed depending on the input
-                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
-                 */
-                wr = kSinTable1024[j + 256];
-                wi = -kSinTable1024[j];
-
-                for (i = m; i < n; i += istep)
-                {
-                    j = i + l;
-
-                    tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
-
-                    ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
-
-                    qr32 = (int32_t)frfi[2 * i];
-                    qi32 = (int32_t)frfi[2 * i + 1];
-                    frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
-                    frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
-                    frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
-                    frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
-                }
-            }
-
-            --k;
-            l = istep;
-
-        }
-
-    } else
-    {
-        // mode==1: High-complexity and High-accuracy mode
-        while (l < n)
-        {
-            istep = l << 1;
-
-            for (m = 0; m < l; ++m)
-            {
-                j = m << k;
-
-                /* The 256-value is a constant given as 1/4 of the size of
-                 * kSinTable1024[], and should not be changed depending on the input
-                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
-                 */
-                wr = kSinTable1024[j + 256];
-                wi = -kSinTable1024[j];
-
-#ifdef WEBRTC_ARCH_ARM_V7
-                int32_t wri = 0;
-                __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
-                    "r"((int32_t)wr), "r"((int32_t)wi));
-#endif
-
-                for (i = m; i < n; i += istep)
-                {
-                    j = i + l;
-
-#ifdef WEBRTC_ARCH_ARM_V7
-                    register int32_t frfi_r;
-                    __asm __volatile(
-                        "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
-                        " lsl #16\n\t"
-                        "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
-                        "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
-                        :[frfi_r]"=&r"(frfi_r),
-                         [tr32]"=&r"(tr32),
-                         [ti32]"=r"(ti32)
-                        :[frfi_even]"r"((int32_t)frfi[2*j]),
-                         [frfi_odd]"r"((int32_t)frfi[2*j +1]),
-                         [wri]"r"(wri),
-                         [cfftrnd]"r"(CFFTRND));
-#else
-                    tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
-
-                    ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
-#endif
-
-                    tr32 >>= 15 - CFFTSFT;
-                    ti32 >>= 15 - CFFTSFT;
-
-                    qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT);
-                    qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT);
-
-                    frfi[2 * j] = (int16_t)(
-                        (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
-                    frfi[2 * j + 1] = (int16_t)(
-                        (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
-                    frfi[2 * i] = (int16_t)(
-                        (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
-                    frfi[2 * i + 1] = (int16_t)(
-                        (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
-                }
-            }
-
-            --k;
-            l = istep;
-        }
-    }
-    return 0;
-}
-
-int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
-{
-    size_t i, j, l, istep, n, m;
-    int k, scale, shift;
-    int16_t wr, wi;
-    int32_t tr32, ti32, qr32, qi32;
-    int32_t tmp32, round2;
-
-    /* The 1024-value is a constant given from the size of kSinTable1024[],
-     * and should not be changed depending on the input parameter 'stages'
-     */
-    n = ((size_t)1) << stages;
-    if (n > 1024)
-        return -1;
-
-    scale = 0;
-
-    l = 1;
-    k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
-         depending on the input parameter 'stages' */
-
-    while (l < n)
-    {
-        // variable scaling, depending upon data
-        shift = 0;
-        round2 = 8192;
-
-        tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
-        if (tmp32 > 13573)
-        {
-            shift++;
-            scale++;
-            round2 <<= 1;
-        }
-        if (tmp32 > 27146)
-        {
-            shift++;
-            scale++;
-            round2 <<= 1;
-        }
-
-        istep = l << 1;
-
-        if (mode == 0)
-        {
-            // mode==0: Low-complexity and Low-accuracy mode
-            for (m = 0; m < l; ++m)
-            {
-                j = m << k;
-
-                /* The 256-value is a constant given as 1/4 of the size of
-                 * kSinTable1024[], and should not be changed depending on the input
-                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
-                 */
-                wr = kSinTable1024[j + 256];
-                wi = kSinTable1024[j];
-
-                for (i = m; i < n; i += istep)
-                {
-                    j = i + l;
-
-                    tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
-
-                    ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
-
-                    qr32 = (int32_t)frfi[2 * i];
-                    qi32 = (int32_t)frfi[2 * i + 1];
-                    frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
-                    frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
-                    frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
-                    frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
-                }
-            }
-        } else
-        {
-            // mode==1: High-complexity and High-accuracy mode
-
-            for (m = 0; m < l; ++m)
-            {
-                j = m << k;
-
-                /* The 256-value is a constant given as 1/4 of the size of
-                 * kSinTable1024[], and should not be changed depending on the input
-                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
-                 */
-                wr = kSinTable1024[j + 256];
-                wi = kSinTable1024[j];
-
-#ifdef WEBRTC_ARCH_ARM_V7
-                int32_t wri = 0;
-                __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
-                    "r"((int32_t)wr), "r"((int32_t)wi));
-#endif
-
-                for (i = m; i < n; i += istep)
-                {
-                    j = i + l;
-
-#ifdef WEBRTC_ARCH_ARM_V7
-                    register int32_t frfi_r;
-                    __asm __volatile(
-                      "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
-                      "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
-                      "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
-                      :[frfi_r]"=&r"(frfi_r),
-                       [tr32]"=&r"(tr32),
-                       [ti32]"=r"(ti32)
-                      :[frfi_even]"r"((int32_t)frfi[2*j]),
-                       [frfi_odd]"r"((int32_t)frfi[2*j +1]),
-                       [wri]"r"(wri),
-                       [cifftrnd]"r"(CIFFTRND)
-                    );
-#else
-
-                    tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
-
-                    ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
-#endif
-                    tr32 >>= 15 - CIFFTSFT;
-                    ti32 >>= 15 - CIFFTSFT;
-
-                    qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT);
-                    qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT);
-
-                    frfi[2 * j] = (int16_t)(
-                        (qr32 - tr32 + round2) >> (shift + CIFFTSFT));
-                    frfi[2 * j + 1] = (int16_t)(
-                        (qi32 - ti32 + round2) >> (shift + CIFFTSFT));
-                    frfi[2 * i] = (int16_t)(
-                        (qr32 + tr32 + round2) >> (shift + CIFFTSFT));
-                    frfi[2 * i + 1] = (int16_t)(
-                        (qi32 + ti32 + round2) >> (shift + CIFFTSFT));
-                }
-            }
-
-        }
-        --k;
-        l = istep;
-    }
-    return scale;
-}
--- a/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/complex_fft_tables.h
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/complex_fft_tables.h
@ -1,132 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
-#define COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
-
-#include <stdint.h>
-
-static const int16_t kSinTable1024[] = {
-    0,      201,    402,    603,    804,    1005,   1206,   1406,   1607,
-    1808,   2009,   2209,   2410,   2610,   2811,   3011,   3211,   3411,
-    3611,   3811,   4011,   4210,   4409,   4608,   4807,   5006,   5205,
-    5403,   5601,   5799,   5997,   6195,   6392,   6589,   6786,   6982,
-    7179,   7375,   7571,   7766,   7961,   8156,   8351,   8545,   8739,
-    8932,   9126,   9319,   9511,   9703,   9895,   10087,  10278,  10469,
-    10659,  10849,  11038,  11227,  11416,  11604,  11792,  11980,  12166,
-    12353,  12539,  12724,  12909,  13094,  13278,  13462,  13645,  13827,
-    14009,  14191,  14372,  14552,  14732,  14911,  15090,  15268,  15446,
-    15623,  15799,  15975,  16150,  16325,  16499,  16672,  16845,  17017,
-    17189,  17360,  17530,  17699,  17868,  18036,  18204,  18371,  18537,
-    18702,  18867,  19031,  19194,  19357,  19519,  19680,  19840,  20000,
-    20159,  20317,  20474,  20631,  20787,  20942,  21096,  21249,  21402,
-    21554,  21705,  21855,  22004,  22153,  22301,  22448,  22594,  22739,
-    22883,  23027,  23169,  23311,  23452,  23592,  23731,  23869,  24006,
-    24143,  24278,  24413,  24546,  24679,  24811,  24942,  25072,  25201,
-    25329,  25456,  25582,  25707,  25831,  25954,  26077,  26198,  26318,
-    26437,  26556,  26673,  26789,  26905,  27019,  27132,  27244,  27355,
-    27466,  27575,  27683,  27790,  27896,  28001,  28105,  28208,  28309,
-    28410,  28510,  28608,  28706,  28802,  28897,  28992,  29085,  29177,
-    29268,  29358,  29446,  29534,  29621,  29706,  29790,  29873,  29955,
-    30036,  30116,  30195,  30272,  30349,  30424,  30498,  30571,  30643,
-    30713,  30783,  30851,  30918,  30984,  31049,  31113,  31175,  31236,
-    31297,  31356,  31413,  31470,  31525,  31580,  31633,  31684,  31735,
-    31785,  31833,  31880,  31926,  31970,  32014,  32056,  32097,  32137,
-    32176,  32213,  32249,  32284,  32318,  32350,  32382,  32412,  32441,
-    32468,  32495,  32520,  32544,  32567,  32588,  32609,  32628,  32646,
-    32662,  32678,  32692,  32705,  32717,  32727,  32736,  32744,  32751,
-    32757,  32761,  32764,  32766,  32767,  32766,  32764,  32761,  32757,
-    32751,  32744,  32736,  32727,  32717,  32705,  32692,  32678,  32662,
-    32646,  32628,  32609,  32588,  32567,  32544,  32520,  32495,  32468,
-    32441,  32412,  32382,  32350,  32318,  32284,  32249,  32213,  32176,
-    32137,  32097,  32056,  32014,  31970,  31926,  31880,  31833,  31785,
-    31735,  31684,  31633,  31580,  31525,  31470,  31413,  31356,  31297,
-    31236,  31175,  31113,  31049,  30984,  30918,  30851,  30783,  30713,
-    30643,  30571,  30498,  30424,  30349,  30272,  30195,  30116,  30036,
-    29955,  29873,  29790,  29706,  29621,  29534,  29446,  29358,  29268,
-    29177,  29085,  28992,  28897,  28802,  28706,  28608,  28510,  28410,
-    28309,  28208,  28105,  28001,  27896,  27790,  27683,  27575,  27466,
-    27355,  27244,  27132,  27019,  26905,  26789,  26673,  26556,  26437,
-    26318,  26198,  26077,  25954,  25831,  25707,  25582,  25456,  25329,
-    25201,  25072,  24942,  24811,  24679,  24546,  24413,  24278,  24143,
-    24006,  23869,  23731,  23592,  23452,  23311,  23169,  23027,  22883,
-    22739,  22594,  22448,  22301,  22153,  22004,  21855,  21705,  21554,
-    21402,  21249,  21096,  20942,  20787,  20631,  20474,  20317,  20159,
-    20000,  19840,  19680,  19519,  19357,  19194,  19031,  18867,  18702,
-    18537,  18371,  18204,  18036,  17868,  17699,  17530,  17360,  17189,
-    17017,  16845,  16672,  16499,  16325,  16150,  15975,  15799,  15623,
-    15446,  15268,  15090,  14911,  14732,  14552,  14372,  14191,  14009,
-    13827,  13645,  13462,  13278,  13094,  12909,  12724,  12539,  12353,
-    12166,  11980,  11792,  11604,  11416,  11227,  11038,  10849,  10659,
-    10469,  10278,  10087,  9895,   9703,   9511,   9319,   9126,   8932,
-    8739,   8545,   8351,   8156,   7961,   7766,   7571,   7375,   7179,
-    6982,   6786,   6589,   6392,   6195,   5997,   5799,   5601,   5403,
-    5205,   5006,   4807,   4608,   4409,   4210,   4011,   3811,   3611,
-    3411,   3211,   3011,   2811,   2610,   2410,   2209,   2009,   1808,
-    1607,   1406,   1206,   1005,   804,    603,    402,    201,    0,
-    -201,   -402,   -603,   -804,   -1005,  -1206,  -1406,  -1607,  -1808,
-    -2009,  -2209,  -2410,  -2610,  -2811,  -3011,  -3211,  -3411,  -3611,
-    -3811,  -4011,  -4210,  -4409,  -4608,  -4807,  -5006,  -5205,  -5403,
-    -5601,  -5799,  -5997,  -6195,  -6392,  -6589,  -6786,  -6982,  -7179,
-    -7375,  -7571,  -7766,  -7961,  -8156,  -8351,  -8545,  -8739,  -8932,
-    -9126,  -9319,  -9511,  -9703,  -9895,  -10087, -10278, -10469, -10659,
-    -10849, -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353,
-    -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, -14009,
-    -14191, -14372, -14552, -14732, -14911, -15090, -15268, -15446, -15623,
-    -15799, -15975, -16150, -16325, -16499, -16672, -16845, -17017, -17189,
-    -17360, -17530, -17699, -17868, -18036, -18204, -18371, -18537, -18702,
-    -18867, -19031, -19194, -19357, -19519, -19680, -19840, -20000, -20159,
-    -20317, -20474, -20631, -20787, -20942, -21096, -21249, -21402, -21554,
-    -21705, -21855, -22004, -22153, -22301, -22448, -22594, -22739, -22883,
-    -23027, -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143,
-    -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, -25329,
-    -25456, -25582, -25707, -25831, -25954, -26077, -26198, -26318, -26437,
-    -26556, -26673, -26789, -26905, -27019, -27132, -27244, -27355, -27466,
-    -27575, -27683, -27790, -27896, -28001, -28105, -28208, -28309, -28410,
-    -28510, -28608, -28706, -28802, -28897, -28992, -29085, -29177, -29268,
-    -29358, -29446, -29534, -29621, -29706, -29790, -29873, -29955, -30036,
-    -30116, -30195, -30272, -30349, -30424, -30498, -30571, -30643, -30713,
-    -30783, -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297,
-    -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, -31785,
-    -31833, -31880, -31926, -31970, -32014, -32056, -32097, -32137, -32176,
-    -32213, -32249, -32284, -32318, -32350, -32382, -32412, -32441, -32468,
-    -32495, -32520, -32544, -32567, -32588, -32609, -32628, -32646, -32662,
-    -32678, -32692, -32705, -32717, -32727, -32736, -32744, -32751, -32757,
-    -32761, -32764, -32766, -32767, -32766, -32764, -32761, -32757, -32751,
-    -32744, -32736, -32727, -32717, -32705, -32692, -32678, -32662, -32646,
-    -32628, -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441,
-    -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, -32137,
-    -32097, -32056, -32014, -31970, -31926, -31880, -31833, -31785, -31735,
-    -31684, -31633, -31580, -31525, -31470, -31413, -31356, -31297, -31236,
-    -31175, -31113, -31049, -30984, -30918, -30851, -30783, -30713, -30643,
-    -30571, -30498, -30424, -30349, -30272, -30195, -30116, -30036, -29955,
-    -29873, -29790, -29706, -29621, -29534, -29446, -29358, -29268, -29177,
-    -29085, -28992, -28897, -28802, -28706, -28608, -28510, -28410, -28309,
-    -28208, -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355,
-    -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, -26318,
-    -26198, -26077, -25954, -25831, -25707, -25582, -25456, -25329, -25201,
-    -25072, -24942, -24811, -24679, -24546, -24413, -24278, -24143, -24006,
-    -23869, -23731, -23592, -23452, -23311, -23169, -23027, -22883, -22739,
-    -22594, -22448, -22301, -22153, -22004, -21855, -21705, -21554, -21402,
-    -21249, -21096, -20942, -20787, -20631, -20474, -20317, -20159, -20000,
-    -19840, -19680, -19519, -19357, -19194, -19031, -18867, -18702, -18537,
-    -18371, -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017,
-    -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, -15446,
-    -15268, -15090, -14911, -14732, -14552, -14372, -14191, -14009, -13827,
-    -13645, -13462, -13278, -13094, -12909, -12724, -12539, -12353, -12166,
-    -11980, -11792, -11604, -11416, -11227, -11038, -10849, -10659, -10469,
-    -10278, -10087, -9895,  -9703,  -9511,  -9319,  -9126,  -8932,  -8739,
-    -8545,  -8351,  -8156,  -7961,  -7766,  -7571,  -7375,  -7179,  -6982,
-    -6786,  -6589,  -6392,  -6195,  -5997,  -5799,  -5601,  -5403,  -5205,
-    -5006,  -4807,  -4608,  -4409,  -4210,  -4011,  -3811,  -3611,  -3411,
-    -3211,  -3011,  -2811,  -2610,  -2410,  -2209,  -2009,  -1808,  -1607,
-    -1406,  -1206,  -1005,  -804,   -603,   -402,   -201};
-
-#endif  // COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
--- a/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/cross_correlation.c
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/cross_correlation.c
@ -1,30 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-
-/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
-void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
-                                 const int16_t* seq1,
-                                 const int16_t* seq2,
-                                 size_t dim_seq,
-                                 size_t dim_cross_correlation,
-                                 int right_shifts,
-                                 int step_seq2) {
-  size_t i = 0, j = 0;
-
-  for (i = 0; i < dim_cross_correlation; i++) {
-    int32_t corr = 0;
-    for (j = 0; j < dim_seq; j++)
-      corr += (seq1[j] * seq2[j]) >> right_shifts;
-    seq2 += step_seq2;
-    *cross_correlation++ = corr;
-  }
-}
--- a/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/division_operations.c
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/division_operations.c
@ -1,141 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This file contains implementations of the divisions
- * WebRtcSpl_DivU32U16()
- * WebRtcSpl_DivW32W16()
- * WebRtcSpl_DivW32W16ResW16()
- * WebRtcSpl_DivResultInQ31()
- * WebRtcSpl_DivW32HiLow()
- *
- * The description header can be found in signal_processing_library.h
- *
- */
-
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/rtc_base/sanitizer.h"
-
-uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
-{
-    // Guard against division with 0
-    if (den != 0)
-    {
-        return (uint32_t)(num / den);
-    } else
-    {
-        return (uint32_t)0xFFFFFFFF;
-    }
-}
-
-int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
-{
-    // Guard against division with 0
-    if (den != 0)
-    {
-        return (int32_t)(num / den);
-    } else
-    {
-        return (int32_t)0x7FFFFFFF;
-    }
-}
-
-int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
-{
-    // Guard against division with 0
-    if (den != 0)
-    {
-        return (int16_t)(num / den);
-    } else
-    {
-        return (int16_t)0x7FFF;
-    }
-}
-
-int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
-{
-    int32_t L_num = num;
-    int32_t L_den = den;
-    int32_t div = 0;
-    int k = 31;
-    int change_sign = 0;
-
-    if (num == 0)
-        return 0;
-
-    if (num < 0)
-    {
-        change_sign++;
-        L_num = -num;
-    }
-    if (den < 0)
-    {
-        change_sign++;
-        L_den = -den;
-    }
-    while (k--)
-    {
-        div <<= 1;
-        L_num <<= 1;
-        if (L_num >= L_den)
-        {
-            L_num -= L_den;
-            div++;
-        }
-    }
-    if (change_sign == 1)
-    {
-        div = -div;
-    }
-    return div;
-}
-
-int32_t RTC_NO_SANITIZE("signed-integer-overflow")  // bugs.webrtc.org/5486
-WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
-{
-    int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
-    int32_t tmpW32;
-
-    approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
-    // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
-
-    // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
-    tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
-    // tmpW32 = den * approx
-
-    tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
-    // UBSan: 2147483647 - -2 cannot be represented in type 'int'
-
-    // Store tmpW32 in hi and low format
-    tmp_hi = (int16_t)(tmpW32 >> 16);
-    tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
-
-    // tmpW32 = 1/den in Q29
-    tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
-
-    // 1/den in hi and low format
-    tmp_hi = (int16_t)(tmpW32 >> 16);
-    tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
-
-    // Store num in hi and low format
-    num_hi = (int16_t)(num >> 16);
-    num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
-
-    // num * (1/den) by 32 bit multiplication (result in Q28)
-
-    tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
-        (num_low * tmp_hi >> 15);
-
-    // Put result in Q31 (convert from Q28)
-    tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
-
-    return tmpW32;
-}
--- a/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.cc
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.cc
@ -1,34 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/common_audio/signal_processing/dot_product_with_scale.h"
-
-#include "webrtc/rtc_base/numerics/safe_conversions.h"
-
-int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
-                                      const int16_t* vector2,
-                                      size_t length,
-                                      int scaling) {
-  int64_t sum = 0;
-  size_t i = 0;
-
-  /* Unroll the loop to improve performance. */
-  for (i = 0; i + 3 < length; i += 4) {
-    sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
-    sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
-    sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
-    sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
-  }
-  for (; i < length; i++) {
-    sum += (vector1[i] * vector2[i]) >> scaling;
-  }
-
-  return rtc::saturated_cast<int32_t>(sum);
-}
--- a/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.h
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.h
@ -1,40 +0,0 @@
-/*
- *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
-#define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
-
-#include <stdint.h>
-#include <string.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Calculates the dot product between two (int16_t) vectors.
-//
-// Input:
-//      - vector1       : Vector 1
-//      - vector2       : Vector 2
-//      - vector_length : Number of samples used in the dot product
-//      - scaling       : The number of right bit shifts to apply on each term
-//                        during calculation to avoid overflow, i.e., the
-//                        output will be in Q(-|scaling|)
-//
-// Return value         : The dot product in Q(-scaling)
-int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
-                                      const int16_t* vector2,
-                                      size_t length,
-                                      int scaling);
-
-#ifdef __cplusplus
-}
-#endif  // __cplusplus
-#endif  // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
--- a/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/downsample_fast.c
+++ b/funasr/runtime/onnxruntime/third_party/webrtc/common_audio/signal_processing/downsample_fast.c
@ -1,65 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-
-#include "webrtc/rtc_base/checks.h"
-#include "webrtc/rtc_base/sanitizer.h"
-
-// TODO(Bjornv): Change the function parameter order to WebRTC code style.
-// C version of WebRtcSpl_DownsampleFast() for generic platforms.
-int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
-                              size_t data_in_length,
-                              int16_t* data_out,
-                              size_t data_out_length,
-                              const int16_t* __restrict coefficients,
-                              size_t coefficients_length,
-                              int factor,
-                              size_t delay) {
-  int16_t* const original_data_out = data_out;
-  size_t i = 0;
-  size_t j = 0;
-  int32_t out_s32 = 0;
-  size_t endpos = delay + factor * (data_out_length - 1) + 1;
-
-  // Return error if any of the running conditions doesn't meet.
-  if (data_out_length == 0 || coefficients_length == 0
-                           || data_in_length < endpos) {
-    return -1;
-  }
-
-  rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]),
-                           coefficients_length);
-
-  for (i = delay; i < endpos; i += factor) {
-    out_s32 = 2048;  // Round value, 0.5 in Q12.
-
-    for (j = 0; j < coefficients_length; j++) {
-      // Negative overflow is permitted here, because this is
-      // auto-regressive filters, and the state for each batch run is
-      // stored in the "negative" positions of the output vector.
-      rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j],
-          sizeof(data_in[0]), 1);
-      // out_s32 is in Q12 domain.
-      out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j];
-    }
-
-    out_s32 >>= 12;  // Q0.
-
-    // Saturate and store the output.
-    *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
-  }
-
-  RTC_DCHECK_EQ(original_data_out + data_out_length, data_out);
-  rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]),
-                           data_out_length);
-
-  return 0;
-}
--- a/Show More
+++ b/Show More