add jieba for ct-transformer

2025-09-15 14:48:36 +08:00 · 2023-10-16 14:47:17 +08:00 · 2023-10-16 14:47:17 +08:00 · 91231a03f5
commit 91231a03f5
parent afddb95326
41 changed files with 4362 additions and 3 deletions
--- a/funasr/runtime/onnxruntime/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/CMakeLists.txt
@ -32,6 +32,8 @@ endif()
 include_directories(${PROJECT_SOURCE_DIR}/third_party/kaldi-native-fbank)
 include_directories(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include)
 include_directories(${PROJECT_SOURCE_DIR}/third_party/jieba/include)
 include_directories(${PROJECT_SOURCE_DIR}/third_party/jieba/include/limonp/include)
 if(ENABLE_GLOG)
    include_directories(${PROJECT_SOURCE_DIR}/third_party/glog/src)
--- a/funasr/runtime/onnxruntime/include/com-define.h
+++ b/funasr/runtime/onnxruntime/include/com-define.h
@ -107,4 +107,8 @@ namespace funasr {
 #define DUN_INDEX 5
 #define CACHE_POP_TRIGGER_LIMIT   200
 #define JIEBA_DICT "jieba.c.dict"
 #define JIEBA_USERDICT "jieba_usr_dict"
 #define JIEBA_HMM_MODEL "jieba.hmm"
 } // namespace funasr
--- a/funasr/runtime/onnxruntime/src/ct-transformer.cpp
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
@ -40,6 +40,7 @@ void CTTransformer::InitPunc(const std::string &punc_model, const std::string &p
        m_szOutputNames.push_back(item.c_str());
 	m_tokenizer.OpenYaml(punc_config.c_str());
    m_tokenizer.JiebaInit(punc_config);
 }
 CTTransformer::~CTTransformer()
--- a/funasr/runtime/onnxruntime/src/tokenizer.cpp
+++ b/funasr/runtime/onnxruntime/src/tokenizer.cpp
@ -17,6 +17,41 @@ CTokenizer::CTokenizer():m_ready(false)
 CTokenizer::~CTokenizer()
 {
 	delete jieba_dict_trie_;
    delete jieba_model_;
 }
 void CTokenizer::SetJiebaRes(cppjieba::DictTrie *dict, cppjieba::HMMModel *hmm) {
 	jieba_processor_.SetJiebaRes(dict, hmm);
 }
 void CTokenizer::JiebaInit(std::string punc_config){
    if (seg_jieba){
        std::string model_path = punc_config.substr(0, punc_config.length() - (sizeof(PUNC_CONFIG_NAME)-1));
        std::string jieba_dict_file = PathAppend(model_path, JIEBA_DICT);
        std::string jieba_hmm_file = PathAppend(model_path, JIEBA_HMM_MODEL);
        std::string jieba_userdict_file = PathAppend(model_path, JIEBA_USERDICT);
 		try{
        	jieba_dict_trie_ = new cppjieba::DictTrie(jieba_dict_file, jieba_userdict_file);
 			LOG(INFO) << "Successfully load file from " << jieba_dict_file << ", " << jieba_userdict_file;
 		}catch(exception const &e){
 			LOG(ERROR) << "Error loading file, Jieba dict file error or not exist.";
 			exit(-1);
 		}
 		try{
        	jieba_model_ = new cppjieba::HMMModel(jieba_hmm_file);
 			LOG(INFO) << "Successfully load model from " << jieba_hmm_file;
 		}catch(exception const &e){
 			LOG(ERROR) << "Error loading file, Jieba hmm file error or not exist.";
 			exit(-1);
 		}
        SetJiebaRes(jieba_dict_trie_, jieba_model_);
    }else {
        jieba_dict_trie_ = NULL;
        jieba_model_ = NULL;
    }
 }
 void CTokenizer::ReadYaml(const YAML::Node& node) 
@ -50,6 +85,11 @@ bool CTokenizer::OpenYaml(const char* sz_yamlfile)
 	try
 	{
 		YAML::Node conf_seg_jieba = m_Config["seg_jieba"];
        if (conf_seg_jieba.IsDefined()){
            seg_jieba = conf_seg_jieba.as<bool>();
        }
 		auto Tokens = m_Config["token_list"];
 		if (Tokens.IsSequence())
 		{
@ -167,6 +207,14 @@ vector<string> CTokenizer::SplitChineseString(const string & str_info)
 	return list;
 }
 vector<string> CTokenizer::SplitChineseJieba(const string & str_info)
 {
 	vector<string> list;
 	jieba_processor_.Cut(str_info, list, false);
 	return list;
 }
 void CTokenizer::StrSplit(const string& str, const char split, vector<string>& res)
 {
 	if (str == "")
@ -184,7 +232,7 @@ void CTokenizer::StrSplit(const string& str, const char split, vector<string>& r
 	}
 }
- void CTokenizer::Tokenize(const char* str_info, vector<string> & str_out, vector<int> & id_out)
+void CTokenizer::Tokenize(const char* str_info, vector<string> & str_out, vector<int> & id_out)
 {
 	vector<string>  strList;
 	StrSplit(str_info,' ', strList);
@ -200,7 +248,12 @@ void CTokenizer::StrSplit(const string& str, const char split, vector<string>& r
 				if (current_chinese.size() > 0)
 				{
 					// for utf-8 chinese
-					auto chineseList = SplitChineseString(current_chinese);
+					vector<string> chineseList;
 					if(seg_jieba){
 						chineseList = SplitChineseJieba(current_chinese);
 					}else{
 						chineseList = SplitChineseString(current_chinese);
 					}
 					str_out.insert(str_out.end(), chineseList.begin(),chineseList.end());
 					current_chinese = "";
 				}
@ -218,7 +271,13 @@ void CTokenizer::StrSplit(const string& str, const char split, vector<string>& r
 		}
 		if (current_chinese.size() > 0)
 		{
-			auto chineseList = SplitChineseString(current_chinese);
+			// for utf-8 chinese
 			vector<string> chineseList;
 			if(seg_jieba){
 				chineseList = SplitChineseJieba(current_chinese);
 			}else{
 				chineseList = SplitChineseString(current_chinese);
 			}
 			str_out.insert(str_out.end(), chineseList.begin(), chineseList.end());
 			current_chinese = "";
 		}
--- a/funasr/runtime/onnxruntime/src/tokenizer.h
+++ b/funasr/runtime/onnxruntime/src/tokenizer.h
@ -5,6 +5,9 @@
 #pragma once
 #include <yaml-cpp/yaml.h>
 #include "cppjieba/DictTrie.hpp"
 #include "cppjieba/HMMModel.hpp"
 #include "cppjieba/Jieba.hpp"
 namespace funasr {
 class CTokenizer {
@ -14,6 +17,10 @@ private:
 	vector<string>   m_id2token,m_id2punc;
 	map<string, int>  m_token2id,m_punc2id;
 	cppjieba::DictTrie *jieba_dict_trie_;
    cppjieba::HMMModel *jieba_model_;
 	cppjieba::Jieba jieba_processor_;
 public:
 	CTokenizer(const char* sz_yamlfile);
@ -28,9 +35,13 @@ public:
 	string Id2Punc(int n_punc_id);
 	vector<int> Punc2Ids(vector<string> input);
 	vector<string> SplitChineseString(const string& str_info);
 	vector<string> SplitChineseJieba(const string& str_info);
 	void StrSplit(const string& str, const char split, vector<string>& res);
 	void Tokenize(const char* str_info, vector<string>& str_out, vector<int>& id_out);
 	bool IsPunc(string& Punc);
 	bool seg_jieba = false;
 	void SetJiebaRes(cppjieba::DictTrie *dict, cppjieba::HMMModel *hmm);
 	void JiebaInit(std::string punc_config);
 };
 } // namespace funasr
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/DictTrie.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/DictTrie.hpp
@ -0,0 +1,274 @@
 #ifndef CPPJIEBA_DICT_TRIE_HPP
 #define CPPJIEBA_DICT_TRIE_HPP
 #include <iostream>
 #include <fstream>
 #include <map>
 #include <string>
 #include <cstring>
 #include <cstdlib>
 #include <stdint.h>
 #include <cmath>
 #include <limits>
 #include "limonp/StringUtil.hpp"
 #include "limonp/Logging.hpp"
 #include "Unicode.hpp"
 #include "Trie.hpp"
 namespace cppjieba {
 using namespace limonp;
 const double MIN_DOUBLE = -3.14e+100;
 const double MAX_DOUBLE = 3.14e+100;
 const size_t DICT_COLUMN_NUM = 3;
 const char* const UNKNOWN_TAG = "";
 class DictTrie {
 public:
  enum UserWordWeightOption {
    WordWeightMin,
    WordWeightMedian,
    WordWeightMax,
  }; // enum UserWordWeightOption
  DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
    Init(dict_path, user_dict_paths, user_word_weight_opt);
  }
  ~DictTrie() {
    delete trie_;
  }
  bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
    DictUnit node_info;
    if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
      return false;
    }
    active_node_infos_.push_back(node_info);
    trie_->InsertNode(node_info.word, &active_node_infos_.back());
    return true;
  }
  bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
    DictUnit node_info;
    double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
    if (!MakeNodeInfo(node_info, word, weight , tag)) {
      return false;
    }
    active_node_infos_.push_back(node_info);
    trie_->InsertNode(node_info.word, &active_node_infos_.back());
    return true;
  }
  bool DeleteUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
    DictUnit node_info;
    if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
      return false;
    }
    trie_->DeleteNode(node_info.word, &node_info);
    return true;
  }
  const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
    return trie_->Find(begin, end);
  }
  void Find(RuneStrArray::const_iterator begin, 
        RuneStrArray::const_iterator end, 
        vector<struct Dag>&res,
        size_t max_word_len = MAX_WORD_LENGTH) const {
    trie_->Find(begin, end, res, max_word_len);
  }
  bool Find(const string& word)
  {
    const DictUnit *tmp = NULL;
    RuneStrArray runes;
    if (!DecodeRunesInString(word, runes))
    {
      XLOG(ERROR) << "Decode failed.";
    }
    tmp = Find(runes.begin(), runes.end());
    if (tmp == NULL)
    {
      return false;
    }
    else
    {
      return true;
    }
  }
  bool IsUserDictSingleChineseWord(const Rune& word) const {
    return IsIn(user_dict_single_chinese_word_, word);
  }
  double GetMinWeight() const {
    return min_weight_;
  }
  void InserUserDictNode(const string& line) {
    vector<string> buf;
    DictUnit node_info;
    Split(line, buf, " ");
    if(buf.size() == 1){
          MakeNodeInfo(node_info, 
                buf[0], 
                user_word_default_weight_,
                UNKNOWN_TAG);
        } else if (buf.size() == 2) {
          MakeNodeInfo(node_info, 
                buf[0], 
                user_word_default_weight_,
                buf[1]);
        } else if (buf.size() == 3) {
          int freq = atoi(buf[1].c_str());
          assert(freq_sum_ > 0.0);
          double weight = log(1.0 * freq / freq_sum_);
          MakeNodeInfo(node_info, buf[0], weight, buf[2]);
        }
        static_node_infos_.push_back(node_info);
        if (node_info.word.size() == 1) {
          user_dict_single_chinese_word_.insert(node_info.word[0]);
        }
  }
  void LoadUserDict(const vector<string>& buf) {
    for (size_t i = 0; i < buf.size(); i++) {
      InserUserDictNode(buf[i]);
    }
  }
   void LoadUserDict(const set<string>& buf) {
    std::set<string>::const_iterator iter;
    for (iter = buf.begin(); iter != buf.end(); iter++){
      InserUserDictNode(*iter);
    }
  }
  void LoadUserDict(const string& filePaths) {
    vector<string> files = limonp::Split(filePaths, "|;");
    size_t lineno = 0;
    for (size_t i = 0; i < files.size(); i++) {
      ifstream ifs(files[i].c_str());
      XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; 
      string line;
      for (; getline(ifs, line); lineno++) {
        if (line.size() == 0) {
          continue;
        }
        InserUserDictNode(line);
      }
    }
  }
 private:
  void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
    LoadDict(dict_path);
    Shrink(static_node_infos_);
    CreateTrie(static_node_infos_);
  }
  void CreateTrie(const vector<DictUnit>& dictUnits) {
    assert(dictUnits.size());
    vector<Unicode> words;
    vector<const DictUnit*> valuePointers;
    for (size_t i = 0 ; i < dictUnits.size(); i ++) {
      words.push_back(dictUnits[i].word);
      valuePointers.push_back(&dictUnits[i]);
    }
    trie_ = new Trie(words, valuePointers);
  }
  bool MakeNodeInfo(DictUnit& node_info,
        const string& word, 
        double weight, 
        const string& tag) {
    if (!DecodeRunesInString(word, node_info.word)) {
      XLOG(ERROR) << "Decode " << word << " failed.";
      return false;
    }
    node_info.weight = weight;
    node_info.tag = tag;
    return true;
  }
  void LoadDict(const string& filePath) {
    ifstream ifs(filePath.c_str());
    XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
    string line;
    vector<string> buf;
    DictUnit node_info;
    for (size_t lineno = 0; getline(ifs, line); lineno++) {
      Split(line, buf, " ");
      XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
      MakeNodeInfo(node_info, 
            buf[0], 
            atof(buf[1].c_str()), 
            buf[2]);
      static_node_infos_.push_back(node_info);
    }
  }
  static bool WeightCompare(const DictUnit& lhs, const DictUnit& rhs) {
    return lhs.weight < rhs.weight;
  }
  void SetStaticWordWeights(UserWordWeightOption option) {
    XCHECK(!static_node_infos_.empty());
    vector<DictUnit> x = static_node_infos_;
    sort(x.begin(), x.end(), WeightCompare);
    min_weight_ = x[0].weight;
    max_weight_ = x[x.size() - 1].weight;
    median_weight_ = x[x.size() / 2].weight;
    switch (option) {
     case WordWeightMin:
       user_word_default_weight_ = min_weight_;
       break;
     case WordWeightMedian:
       user_word_default_weight_ = median_weight_;
       break;
     default:
       user_word_default_weight_ = max_weight_;
       break;
    }
  }
  double CalcFreqSum(const vector<DictUnit>& node_infos) const {
    double sum = 0.0;
    for (size_t i = 0; i < node_infos.size(); i++) {
      sum += node_infos[i].weight;
    }
    return sum;
  }
  void CalculateWeight(vector<DictUnit>& node_infos, double sum) const {
    assert(sum > 0.0);
    for (size_t i = 0; i < node_infos.size(); i++) {
      DictUnit& node_info = node_infos[i];
      assert(node_info.weight > 0.0);
      node_info.weight = log(double(node_info.weight)/sum);
    }
  }
  void Shrink(vector<DictUnit>& units) const {
    vector<DictUnit>(units.begin(), units.end()).swap(units);
  }
  vector<DictUnit> static_node_infos_;
  deque<DictUnit> active_node_infos_; // must not be vector
  Trie * trie_;
  double freq_sum_;
  double min_weight_;
  double max_weight_;
  double median_weight_;
  double user_word_default_weight_;
  unordered_set<Rune> user_dict_single_chinese_word_;
 };
 }
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/FullSegment.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/FullSegment.hpp
@ -0,0 +1,102 @@
 #ifndef CPPJIEBA_FULLSEGMENT_H
 #define CPPJIEBA_FULLSEGMENT_H
 #include <algorithm>
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
 #include "DictTrie.hpp"
 #include "SegmentBase.hpp"
 #include "Unicode.hpp"
 namespace cppjieba {
 class FullSegment: public SegmentBase {
 public:
  FullSegment(const string& dictPath) {
    dictTrie_ = new DictTrie(dictPath);
    isNeedDestroy_ = true;
  }
  FullSegment(const DictTrie* dictTrie)
    : dictTrie_(dictTrie), isNeedDestroy_(false) {
    assert(dictTrie_);
  }
  FullSegment() {
    dictTrie_ = NULL;
  }
  ~FullSegment() {
    if (isNeedDestroy_) {
      delete dictTrie_;
    }
  }
  void setRes(DictTrie *&dictTrie) {
    dictTrie_ = dictTrie;
    isNeedDestroy_ = false;
    assert(dictTrie_);
  }
  void Cut(const string& sentence, 
        vector<string>& words) const {
    vector<Word> tmp;
    Cut(sentence, tmp);
    GetStringsFromWords(tmp, words);
  }
  void Cut(const string& sentence, 
        vector<Word>& words) const {
    PreFilter pre_filter(symbols_, sentence);
    PreFilter::Range range;
    vector<WordRange> wrs;
    wrs.reserve(sentence.size()/2);
    while (pre_filter.HasNext()) {
      range = pre_filter.Next();
      Cut(range.begin, range.end, wrs);
    }
    words.clear();
    words.reserve(wrs.size());
    GetWordsFromWordRanges(sentence, wrs, words);
  }
  void Cut(RuneStrArray::const_iterator begin, 
        RuneStrArray::const_iterator end, 
        vector<WordRange>& res) const {
    // result of searching in trie tree
    LocalVector<pair<size_t, const DictUnit*> > tRes;
    // max index of res's words
    size_t maxIdx = 0;
    // always equals to (uItr - begin)
    size_t uIdx = 0;
    // tmp variables
    size_t wordLen = 0;
    assert(dictTrie_);
    vector<struct Dag> dags;
    dictTrie_->Find(begin, end, dags);
    for (size_t i = 0; i < dags.size(); i++) {
      for (size_t j = 0; j < dags[i].nexts.size(); j++) {
        size_t nextoffset = dags[i].nexts[j].first;
        assert(nextoffset < dags.size());
        const DictUnit* du = dags[i].nexts[j].second;
        if (du == NULL) {
          if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
            WordRange wr(begin + i, begin + nextoffset);
            res.push_back(wr);
          }
        } else {
          wordLen = du->word.size();
          if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
            WordRange wr(begin + i, begin + nextoffset);
            res.push_back(wr);
          }
        }
        maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
      }
      uIdx++;
    }
  }
 private:
  const DictTrie* dictTrie_;
  bool isNeedDestroy_;
 };
 }
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/HMMModel.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/HMMModel.hpp
@ -0,0 +1,129 @@
 #ifndef CPPJIEBA_HMMMODEL_H
 #define CPPJIEBA_HMMMODEL_H
 #include "limonp/StringUtil.hpp"
 #include "Trie.hpp"
 namespace cppjieba {
 using namespace limonp;
 typedef unordered_map<Rune, double> EmitProbMap;
 struct HMMModel {
  /*
   * STATUS:
   * 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
   * */
  enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
  HMMModel(const string& modelPath) {
    memset(startProb, 0, sizeof(startProb));
    memset(transProb, 0, sizeof(transProb));
    statMap[0] = 'B';
    statMap[1] = 'E';
    statMap[2] = 'M';
    statMap[3] = 'S';
    emitProbVec.push_back(&emitProbB);
    emitProbVec.push_back(&emitProbE);
    emitProbVec.push_back(&emitProbM);
    emitProbVec.push_back(&emitProbS);
    LoadModel(modelPath);
  }
  ~HMMModel() {
  }
  void LoadModel(const string& filePath) {
    ifstream ifile(filePath.c_str());
    XCHECK(ifile.is_open()) << "open " << filePath << " failed";
    string line;
    vector<string> tmp;
    vector<string> tmp2;
    //Load startProb
    XCHECK(GetLine(ifile, line));
    Split(line, tmp, " ");
    XCHECK(tmp.size() == STATUS_SUM);
    for (size_t j = 0; j< tmp.size(); j++) {
      startProb[j] = atof(tmp[j].c_str());
    }
    //Load transProb
    for (size_t i = 0; i < STATUS_SUM; i++) {
      XCHECK(GetLine(ifile, line));
      Split(line, tmp, " ");
      XCHECK(tmp.size() == STATUS_SUM);
      for (size_t j =0; j < STATUS_SUM; j++) {
        transProb[i][j] = atof(tmp[j].c_str());
      }
    }
    //Load emitProbB
    XCHECK(GetLine(ifile, line));
    XCHECK(LoadEmitProb(line, emitProbB));
    //Load emitProbE
    XCHECK(GetLine(ifile, line));
    XCHECK(LoadEmitProb(line, emitProbE));
    //Load emitProbM
    XCHECK(GetLine(ifile, line));
    XCHECK(LoadEmitProb(line, emitProbM));
    //Load emitProbS
    XCHECK(GetLine(ifile, line));
    XCHECK(LoadEmitProb(line, emitProbS));
  }
  double GetEmitProb(const EmitProbMap* ptMp, Rune key, 
        double defVal)const {
    EmitProbMap::const_iterator cit = ptMp->find(key);
    if (cit == ptMp->end()) {
      return defVal;
    }
    return cit->second;
  }
  bool GetLine(ifstream& ifile, string& line) {
    while (getline(ifile, line)) {
      Trim(line);
      if (line.empty()) {
        continue;
      }
      if (StartsWith(line, "#")) {
        continue;
      }
      return true;
    }
    return false;
  }
  bool LoadEmitProb(const string& line, EmitProbMap& mp) {
    if (line.empty()) {
      return false;
    }
    vector<string> tmp, tmp2;
    Unicode unicode;
    Split(line, tmp, ",");
    for (size_t i = 0; i < tmp.size(); i++) {
      Split(tmp[i], tmp2, ":");
      if (2 != tmp2.size()) {
        XLOG(ERROR) << "emitProb illegal.";
        return false;
      }
      if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
        XLOG(ERROR) << "TransCode failed.";
        return false;
      }
      mp[unicode[0]] = atof(tmp2[1].c_str());
    }
    return true;
  }
  char statMap[STATUS_SUM];
  double startProb[STATUS_SUM];
  double transProb[STATUS_SUM][STATUS_SUM];
  EmitProbMap emitProbB;
  EmitProbMap emitProbE;
  EmitProbMap emitProbM;
  EmitProbMap emitProbS;
  vector<EmitProbMap* > emitProbVec;
 }; // struct HMMModel
 } // namespace cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/HMMSegment.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/HMMSegment.hpp
@ -0,0 +1,197 @@
 #ifndef CPPJIBEA_HMMSEGMENT_H
 #define CPPJIBEA_HMMSEGMENT_H
 #include <iostream>
 #include <fstream>
 #include <memory.h>
 #include <cassert>
 #include "HMMModel.hpp"
 #include "SegmentBase.hpp"
 namespace cppjieba {
 class HMMSegment: public SegmentBase {
 public:
  HMMSegment(const string& filePath)
  : model_(new HMMModel(filePath)), isNeedDestroy_(true) {
  }
  HMMSegment(const HMMModel* model) 
  : model_(model), isNeedDestroy_(false) {
  }
  HMMSegment() {
    model_ = NULL;
  }
  ~HMMSegment() {
    if (isNeedDestroy_) {
      delete model_;
    }
  }
  void setRes(HMMModel *&model) {
    model_ = model;
    isNeedDestroy_ = false;
  }
  void Cut(const string& sentence, 
        vector<string>& words) const {
    vector<Word> tmp;
    Cut(sentence, tmp);
    GetStringsFromWords(tmp, words);
  }
  void Cut(const string& sentence, 
        vector<Word>& words) const {
    PreFilter pre_filter(symbols_, sentence);
    PreFilter::Range range;
    vector<WordRange> wrs;
    wrs.reserve(sentence.size()/2);
    while (pre_filter.HasNext()) {
      range = pre_filter.Next();
      Cut(range.begin, range.end, wrs);
    }
    words.clear();
    words.reserve(wrs.size());
    GetWordsFromWordRanges(sentence, wrs, words);
  }
  void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
    RuneStrArray::const_iterator left = begin;
    RuneStrArray::const_iterator right = begin;
    while (right != end) {
      if (right->rune < 0x80) {
        if (left != right) {
          InternalCut(left, right, res);
        }
        left = right;
        do {
          right = SequentialLetterRule(left, end);
          if (right != left) {
            break;
          }
          right = NumbersRule(left, end);
          if (right != left) {
            break;
          }
          right ++;
        } while (false);
        WordRange wr(left, right - 1);
        res.push_back(wr);
        left = right;
      } else {
        right++;
      }
    }
    if (left != right) {
      InternalCut(left, right, res);
    }
  }
 private:
  // sequential letters rule
  RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
    Rune x = begin->rune;
    if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
      begin ++;
    } else {
      return begin;
    }
    while (begin != end) {
      x = begin->rune;
      if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
        begin ++;
      } else {
        break;
      }
    }
    return begin;
  }
  //
  RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
    Rune x = begin->rune;
    if ('0' <= x && x <= '9') {
      begin ++;
    } else {
      return begin;
    }
    while (begin != end) {
      x = begin->rune;
      if ( ('0' <= x && x <= '9') || x == '.') {
        begin++;
      } else {
        break;
      }
    }
    return begin;
  }
  void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
    vector<size_t> status;
    Viterbi(begin, end, status);
    RuneStrArray::const_iterator left = begin;
    RuneStrArray::const_iterator right;
    for (size_t i = 0; i < status.size(); i++) {
      if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
        right = begin + i + 1;
        WordRange wr(left, right - 1);
        res.push_back(wr);
        left = right;
      }
    }
  }
  void Viterbi(RuneStrArray::const_iterator begin, 
        RuneStrArray::const_iterator end, 
        vector<size_t>& status) const {
    size_t Y = HMMModel::STATUS_SUM;
    size_t X = end - begin;
    size_t XYSize = X * Y;
    size_t now, old, stat;
    double tmp, endE, endS;
    vector<int> path(XYSize);
    vector<double> weight(XYSize);
    //start
    for (size_t y = 0; y < Y; y++) {
      weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
      path[0 + y * X] = -1;
    }
    double emitProb;
    for (size_t x = 1; x < X; x++) {
      for (size_t y = 0; y < Y; y++) {
        now = x + y*X;
        weight[now] = MIN_DOUBLE;
        path[now] = HMMModel::E; // warning
        emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin+x)->rune, MIN_DOUBLE);
        for (size_t preY = 0; preY < Y; preY++) {
          old = x - 1 + preY * X;
          tmp = weight[old] + model_->transProb[preY][y] + emitProb;
          if (tmp > weight[now]) {
            weight[now] = tmp;
            path[now] = preY;
          }
        }
      }
    }
    endE = weight[X-1+HMMModel::E*X];
    endS = weight[X-1+HMMModel::S*X];
    stat = 0;
    if (endE >= endS) {
      stat = HMMModel::E;
    } else {
      stat = HMMModel::S;
    }
    status.resize(X);
    for (int x = X -1 ; x >= 0; x--) {
      status[x] = stat;
      stat = path[x + stat*X];
    }
  }
  const HMMModel* model_;
  bool isNeedDestroy_;
 }; // class HMMSegment
 } // namespace cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/Jieba.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/Jieba.hpp
@ -0,0 +1,141 @@
 #ifndef CPPJIEAB_JIEBA_H
 #define CPPJIEAB_JIEBA_H
 #include "QuerySegment.hpp"
 #include "KeywordExtractor.hpp"
 namespace cppjieba {
 class Jieba {
 public:
  Jieba(DictTrie *jieba_dict_trie, 
        HMMModel *jieba_model)
    : dict_trie_(jieba_dict_trie),
      model_(jieba_model),
      mp_seg_(dict_trie_),
      hmm_seg_(model_),
      mix_seg_(dict_trie_, model_),
      full_seg_(dict_trie_),
      query_seg_(dict_trie_, model_) {
  }
  Jieba() {
    dict_trie_ = NULL;
    model_ = NULL;
  }
  ~Jieba() {
  }
  struct LocWord {
    string word;
    size_t begin;
    size_t end;
  }; // struct LocWord
  void SetJiebaRes(cppjieba::DictTrie *&dict, cppjieba::HMMModel *&hmm) {
    dict_trie_ = dict;
    model_ = hmm;
    mp_seg_.setRes(dict);
    hmm_seg_.setRes(hmm);
    mix_seg_.setRes(dict, hmm);
    full_seg_.setRes(dict);
    query_seg_.setRes(dict, hmm);
  }
  void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
    mix_seg_.Cut(sentence, words, hmm);
  }
  void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
    mix_seg_.Cut(sentence, words, hmm);
  }
  void CutAll(const string& sentence, vector<string>& words) const {
    full_seg_.Cut(sentence, words);
  }
  void CutAll(const string& sentence, vector<Word>& words) const {
    full_seg_.Cut(sentence, words);
  }
  void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
    query_seg_.Cut(sentence, words, hmm);
  }
  void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
    query_seg_.Cut(sentence, words, hmm);
  }
  void CutHMM(const string& sentence, vector<string>& words) const {
    hmm_seg_.Cut(sentence, words);
  }
  void CutHMM(const string& sentence, vector<Word>& words) const {
    hmm_seg_.Cut(sentence, words);
  }
  void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
    mp_seg_.Cut(sentence, words, max_word_len);
  }
  void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
    mp_seg_.Cut(sentence, words, max_word_len);
  }
  void Tag(const string& sentence, vector<pair<string, string> >& words) const {
    mix_seg_.Tag(sentence, words);
  }
  string LookupTag(const string &str) const {
    return mix_seg_.LookupTag(str);
  }
  bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
    return dict_trie_->InsertUserWord(word, tag);
  }
  bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
    return dict_trie_->InsertUserWord(word,freq, tag);
  }
  bool DeleteUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
    return dict_trie_->DeleteUserWord(word, tag);
  }
  bool Find(const string& word)
  {
    return dict_trie_->Find(word);
  }
  void ResetSeparators(const string& s) {
    //TODO
    mp_seg_.ResetSeparators(s);
    hmm_seg_.ResetSeparators(s);
    mix_seg_.ResetSeparators(s);
    full_seg_.ResetSeparators(s);
    query_seg_.ResetSeparators(s);
  }
  const DictTrie* GetDictTrie() const {
    return dict_trie_;
  } 
  const HMMModel* GetHMMModel() const {
    return model_;
  }
  void LoadUserDict(const vector<string>& buf)  {
    dict_trie_->LoadUserDict(buf);
  }
  void LoadUserDict(const set<string>& buf)  {
    dict_trie_->LoadUserDict(buf);
  }
  void LoadUserDict(const string& path)  {
    dict_trie_->LoadUserDict(path);
  }
 private:
  DictTrie *dict_trie_;
  HMMModel *model_;
  // They share the same dict trie and model
  MPSegment mp_seg_;
  HMMSegment hmm_seg_;
  MixSegment mix_seg_;
  FullSegment full_seg_;
  QuerySegment query_seg_;
 public:
 }; // class Jieba
 } // namespace cppjieba
 #endif // CPPJIEAB_JIEBA_H
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/KeywordExtractor.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/KeywordExtractor.hpp
@ -0,0 +1,154 @@
 #ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
 #define CPPJIEBA_KEYWORD_EXTRACTOR_H
 #include <cmath>
 #include <set>
 #include "MixSegment.hpp"
 namespace cppjieba {
 using namespace limonp;
 using namespace std;
 /*utf8*/
 class KeywordExtractor {
 public:
  struct Word {
    string word;
    vector<size_t> offsets;
    double weight;
  }; // struct Word
  KeywordExtractor(const string& dictPath, 
        const string& hmmFilePath, 
        const string& idfPath, 
        const string& stopWordPath, 
        const string& userDict = "") 
    : segment_(dictPath, hmmFilePath, userDict) {
    LoadIdfDict(idfPath);
    LoadStopWordDict(stopWordPath);
  }
  KeywordExtractor(const DictTrie* dictTrie, 
        const HMMModel* model,
        const string& idfPath, 
        const string& stopWordPath) 
    : segment_(dictTrie, model) {
    LoadIdfDict(idfPath);
    LoadStopWordDict(stopWordPath);
  }
  KeywordExtractor() {}
  ~KeywordExtractor() {
  }
  void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
    vector<Word> topWords;
    Extract(sentence, topWords, topN);
    for (size_t i = 0; i < topWords.size(); i++) {
      keywords.push_back(topWords[i].word);
    }
  }
  void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
    vector<Word> topWords;
    Extract(sentence, topWords, topN);
    for (size_t i = 0; i < topWords.size(); i++) {
      keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
    }
  }
  void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const {
    vector<string> words;
    segment_.Cut(sentence, words);
    map<string, Word> wordmap;
    size_t offset = 0;
    for (size_t i = 0; i < words.size(); ++i) {
      size_t t = offset;
      offset += words[i].size();
      if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
        continue;
      }
      wordmap[words[i]].offsets.push_back(t);
      wordmap[words[i]].weight += 1.0;
    }
    if (offset != sentence.size()) {
      XLOG(ERROR) << "words illegal";
      return;
    }
    keywords.clear();
    keywords.reserve(wordmap.size());
    for (map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
      unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);
      if (cit != idfMap_.end()) {
        itr->second.weight *= cit->second;
      } else {
        itr->second.weight *= idfAverage_;
      }
      itr->second.word = itr->first;
      keywords.push_back(itr->second);
    }
    topN = min(topN, keywords.size());
    partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
    keywords.resize(topN);
  }
 private:
  void LoadIdfDict(const string& idfPath) {
    ifstream ifs(idfPath.c_str());
    XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
    string line ;
    vector<string> buf;
    double idf = 0.0;
    double idfSum = 0.0;
    size_t lineno = 0;
    for (; getline(ifs, line); lineno++) {
      buf.clear();
      if (line.empty()) {
        XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
        continue;
      }
      Split(line, buf, " ");
      if (buf.size() != 2) {
        XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
        continue;
      }
      idf = atof(buf[1].c_str());
      idfMap_[buf[0]] = idf;
      idfSum += idf;
    }
    assert(lineno);
    idfAverage_ = idfSum / lineno;
    assert(idfAverage_ > 0.0);
  }
  void LoadStopWordDict(const string& filePath) {
    ifstream ifs(filePath.c_str());
    XCHECK(ifs.is_open()) << "open " << filePath << " failed";
    string line ;
    while (getline(ifs, line)) {
      stopWords_.insert(line);
    }
    assert(stopWords_.size());
  }
  static bool Compare(const Word& lhs, const Word& rhs) {
    return lhs.weight > rhs.weight;
  }
  MixSegment segment_;
  unordered_map<string, double> idfMap_;
  double idfAverage_;
  unordered_set<string> stopWords_;
 }; // class KeywordExtractor
 inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) {
  return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}"; 
 }
 } // namespace cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/MPSegment.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/MPSegment.hpp
@ -0,0 +1,144 @@
 #ifndef CPPJIEBA_MPSEGMENT_H
 #define CPPJIEBA_MPSEGMENT_H
 #include <algorithm>
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
 #include "DictTrie.hpp"
 #include "SegmentTagged.hpp"
 #include "PosTagger.hpp"
 namespace cppjieba {
 class MPSegment: public SegmentTagged {
 public:
  MPSegment(const string& dictPath, const string& userDictPath = "")
    : dictTrie_(new DictTrie(dictPath, userDictPath)), isNeedDestroy_(true) {
  }
  MPSegment(const DictTrie* dictTrie)
    : dictTrie_(dictTrie), isNeedDestroy_(false) {
    assert(dictTrie_);
  }
  MPSegment() {
    dictTrie_ = NULL;
  }
  ~MPSegment() {
    if (isNeedDestroy_) {
      delete dictTrie_;
    }
  }
  void setRes(DictTrie *&dictTrie) {
    dictTrie_ = dictTrie;
    isNeedDestroy_ = false;
    assert(dictTrie_);
  }
  void Cut(const string& sentence, vector<string>& words) const {
    Cut(sentence, words, MAX_WORD_LENGTH);
  }
  void Cut(const string& sentence,
        vector<string>& words,
        size_t max_word_len) const {
    vector<Word> tmp;
    Cut(sentence, tmp, max_word_len);
    GetStringsFromWords(tmp, words);
  }
  void Cut(const string& sentence, 
        vector<Word>& words, 
        size_t max_word_len = MAX_WORD_LENGTH) const {
    PreFilter pre_filter(symbols_, sentence);
    PreFilter::Range range;
    vector<WordRange> wrs;
    wrs.reserve(sentence.size()/2);
    while (pre_filter.HasNext()) {
      range = pre_filter.Next();
      Cut(range.begin, range.end, wrs, max_word_len);
    }
    words.clear();
    words.reserve(wrs.size());
    GetWordsFromWordRanges(sentence, wrs, words);
  }
  void Cut(RuneStrArray::const_iterator begin,
           RuneStrArray::const_iterator end,
           vector<WordRange>& words,
           size_t max_word_len = MAX_WORD_LENGTH) const {
    vector<Dag> dags;
    dictTrie_->Find(begin, 
          end, 
          dags,
          max_word_len);
    CalcDP(dags);
    CutByDag(begin, end, dags, words);
  }
  const DictTrie* GetDictTrie() const {
    return dictTrie_;
  }
  bool Tag(const string& src, vector<pair<string, string> >& res) const {
    return tagger_.Tag(src, res, *this);
  }
  bool IsUserDictSingleChineseWord(const Rune& value) const {
    return dictTrie_->IsUserDictSingleChineseWord(value);
  }
 private:
  void CalcDP(vector<Dag>& dags) const {
    size_t nextPos;
    const DictUnit* p;
    double val;
    for (vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) {
      rit->pInfo = NULL;
      rit->weight = MIN_DOUBLE;
      assert(!rit->nexts.empty());
      for (LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
        nextPos = it->first;
        p = it->second;
        val = 0.0;
        if (nextPos + 1 < dags.size()) {
          val += dags[nextPos + 1].weight;
        }
        if (p) {
          val += p->weight;
        } else {
          val += dictTrie_->GetMinWeight();
        }
        if (val > rit->weight) {
          rit->pInfo = p;
          rit->weight = val;
        }
      }
    }
  }
  void CutByDag(RuneStrArray::const_iterator begin, 
        RuneStrArray::const_iterator end, 
        const vector<Dag>& dags, 
        vector<WordRange>& words) const {
    size_t i = 0;
    while (i < dags.size()) {
      const DictUnit* p = dags[i].pInfo;
      if (p) {
        assert(p->word.size() >= 1);
        WordRange wr(begin + i, begin + i + p->word.size() - 1);
        words.push_back(wr);
        i += p->word.size();
      } else { //single chinese word
        WordRange wr(begin + i, begin + i);
        words.push_back(wr);
        i++;
      }
    }
  }
  const DictTrie* dictTrie_;
  bool isNeedDestroy_;
  PosTagger tagger_;
 }; // class MPSegment
 } // namespace cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/MixSegment.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/MixSegment.hpp
@ -0,0 +1,113 @@
 #ifndef CPPJIEBA_MIXSEGMENT_H
 #define CPPJIEBA_MIXSEGMENT_H
 #include <cassert>
 #include "MPSegment.hpp"
 #include "HMMSegment.hpp"
 #include "limonp/StringUtil.hpp"
 #include "PosTagger.hpp"
 namespace cppjieba {
 class MixSegment: public SegmentTagged {
 public:
  MixSegment(const string& mpSegDict, const string& hmmSegDict, 
        const string& userDict = "") 
    : mpSeg_(mpSegDict, userDict), 
      hmmSeg_(hmmSegDict) {
  }
  MixSegment(const DictTrie* dictTrie, const HMMModel* model) 
    : mpSeg_(dictTrie), hmmSeg_(model) {
  }
  MixSegment() {}
  ~MixSegment() {
  }
  void setRes(DictTrie *&dictTrie, HMMModel *&model) {
    mpSeg_.setRes(dictTrie);
    hmmSeg_.setRes(model);
  }
  void Cut(const string& sentence, vector<string>& words) const {
    Cut(sentence, words, true);
  }
  void Cut(const string& sentence, vector<string>& words, bool hmm) const {
    vector<Word> tmp;
    Cut(sentence, tmp, hmm);
    GetStringsFromWords(tmp, words);
  }
  void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
    PreFilter pre_filter(symbols_, sentence);
    PreFilter::Range range;
    vector<WordRange> wrs;
    wrs.reserve(sentence.size() / 2);
    while (pre_filter.HasNext()) {
      range = pre_filter.Next();
      Cut(range.begin, range.end, wrs, hmm);
    }
    words.clear();
    words.reserve(wrs.size());
    GetWordsFromWordRanges(sentence, wrs, words);
  }
  void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
    if (!hmm) {
      mpSeg_.Cut(begin, end, res);
      return;
    }
    vector<WordRange> words;
    assert(end >= begin);
    words.reserve(end - begin);
    mpSeg_.Cut(begin, end, words);
    vector<WordRange> hmmRes;
    hmmRes.reserve(end - begin);
    for (size_t i = 0; i < words.size(); i++) {
      //if mp Get a word, it's ok, put it into result
      if (words[i].left != words[i].right || (words[i].left == words[i].right && mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
        res.push_back(words[i]);
        continue;
      }
      // if mp Get a single one and it is not in userdict, collect it in sequence
      size_t j = i;
      while (j < words.size() && words[j].left == words[j].right && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
        j++;
      }
      // Cut the sequence with hmm
      assert(j - 1 >= i);
      // TODO
      hmmSeg_.Cut(words[i].left, words[j - 1].left + 1, hmmRes);
      //put hmm result to result
      for (size_t k = 0; k < hmmRes.size(); k++) {
        res.push_back(hmmRes[k]);
      }
      //clear tmp vars
      hmmRes.clear();
      //let i jump over this piece
      i = j - 1;
    }
  }
  const DictTrie* GetDictTrie() const {
    return mpSeg_.GetDictTrie();
  }
  bool Tag(const string& src, vector<pair<string, string> >& res) const {
    return tagger_.Tag(src, res, *this);
  }
  string LookupTag(const string &str) const {
    return tagger_.LookupTag(str, *this);
  }
 private:
  MPSegment mpSeg_;
  HMMSegment hmmSeg_;
  PosTagger tagger_;
 }; // class MixSegment
 } // namespace cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/PosTagger.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/PosTagger.hpp
@ -0,0 +1,77 @@
 #ifndef CPPJIEBA_POS_TAGGING_H
 #define CPPJIEBA_POS_TAGGING_H
 #include "limonp/StringUtil.hpp"
 #include "SegmentTagged.hpp"
 #include "DictTrie.hpp"
 namespace cppjieba {
 using namespace limonp;
 static const char* const POS_M = "m";
 static const char* const POS_ENG = "eng";
 static const char* const POS_X = "x";
 class PosTagger {
 public:
  PosTagger() {
  }
  ~PosTagger() {
  }
  bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
    vector<string> CutRes;
    segment.Cut(src, CutRes);
    for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
      res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
    }
    return !res.empty();
  }
  string LookupTag(const string &str, const SegmentTagged& segment) const {
    const DictUnit *tmp = NULL;
    RuneStrArray runes;
    const DictTrie * dict = segment.GetDictTrie();
    assert(dict != NULL);
      if (!DecodeRunesInString(str, runes)) {
        XLOG(ERROR) << "Decode failed.";
        return POS_X;
      }
      tmp = dict->Find(runes.begin(), runes.end());
      if (tmp == NULL || tmp->tag.empty()) {
        return SpecialRule(runes);
      } else {
        return tmp->tag;
      }
  }
 private:
  const char* SpecialRule(const RuneStrArray& unicode) const {
    size_t m = 0;
    size_t eng = 0;
    for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
      if (unicode[i].rune < 0x80) {
        eng ++;
        if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
          m++;
        }
      }
    }
    // ascii char is not found
    if (eng == 0) {
      return POS_X;
    }
    // all the ascii is number char
    if (m == eng) {
      return POS_M;
    }
    // the ascii chars contain english letter
    return POS_ENG;
  }
 }; // class PosTagger
 } // namespace cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/PreFilter.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/PreFilter.hpp
@ -0,0 +1,54 @@
 #ifndef CPPJIEBA_PRE_FILTER_H
 #define CPPJIEBA_PRE_FILTER_H
 #include "Trie.hpp"
 #include "limonp/Logging.hpp"
 namespace cppjieba {
 class PreFilter {
 public:
  //TODO use WordRange instead of Range
  struct Range {
    RuneStrArray::const_iterator begin;
    RuneStrArray::const_iterator end;
  }; // struct Range
  PreFilter(const unordered_set<Rune>& symbols, 
        const string& sentence)
    : symbols_(symbols) {
    if (!DecodeRunesInString(sentence, sentence_)) {
      XLOG(ERROR) << "decode failed. "; 
    }
    cursor_ = sentence_.begin();
  }
  ~PreFilter() {
  }
  bool HasNext() const {
    return cursor_ != sentence_.end();
  }
  Range Next() {
    Range range;
    range.begin = cursor_;
    while (cursor_ != sentence_.end()) {
      if (IsIn(symbols_, cursor_->rune)) {
        if (range.begin == cursor_) {
          cursor_ ++;
        }
        range.end = cursor_;
        return range;
      }
      cursor_ ++;
    }
    range.end = sentence_.end();
    return range;
  }
 private:
  RuneStrArray::const_iterator cursor_;
  RuneStrArray sentence_;
  const unordered_set<Rune>& symbols_;
 }; // class PreFilter
 } // namespace cppjieba
 #endif // CPPJIEBA_PRE_FILTER_H
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/QuerySegment.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/QuerySegment.hpp
@ -0,0 +1,95 @@
 #ifndef CPPJIEBA_QUERYSEGMENT_H
 #define CPPJIEBA_QUERYSEGMENT_H
 #include <algorithm>
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
 #include "DictTrie.hpp"
 #include "SegmentBase.hpp"
 #include "FullSegment.hpp"
 #include "MixSegment.hpp"
 #include "Unicode.hpp"
 namespace cppjieba {
 class QuerySegment: public SegmentBase {
 public:
  QuerySegment(const string& dict, const string& model, const string& userDict = "")
    : mixSeg_(dict, model, userDict),
      trie_(mixSeg_.GetDictTrie()) {
  }
  QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
    : mixSeg_(dictTrie, model), trie_(dictTrie) {
  }
  QuerySegment() {
    trie_ = NULL;
  }
  ~QuerySegment() {
  }
  void setRes(DictTrie *&dictTrie, HMMModel *&model) {
    mixSeg_.setRes(dictTrie, model);
    trie_ = dictTrie;
  }
  void Cut(const string& sentence, vector<string>& words) const {
    Cut(sentence, words, true);
  }
  void Cut(const string& sentence, vector<string>& words, bool hmm) const {
    vector<Word> tmp;
    Cut(sentence, tmp, hmm);
    GetStringsFromWords(tmp, words);
  }
  void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
    PreFilter pre_filter(symbols_, sentence);
    PreFilter::Range range;
    vector<WordRange> wrs;
    wrs.reserve(sentence.size()/2);
    while (pre_filter.HasNext()) {
      range = pre_filter.Next();
      Cut(range.begin, range.end, wrs, hmm);
    }
    words.clear();
    words.reserve(wrs.size());
    GetWordsFromWordRanges(sentence, wrs, words);
  }
  void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
    //use mix Cut first
    vector<WordRange> mixRes;
    mixSeg_.Cut(begin, end, mixRes, hmm);
    vector<WordRange> fullRes;
    for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
      if (mixResItr->Length() > 2) {
        for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
          WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
          if (trie_->Find(wr.left, wr.right + 1) != NULL) {
            res.push_back(wr);
          }
        }
      }
      if (mixResItr->Length() > 3) {
        for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
          WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
          if (trie_->Find(wr.left, wr.right + 1) != NULL) {
            res.push_back(wr);
          }
        }
      }
      res.push_back(*mixResItr);
    }
  }
 private:
  bool IsAllAscii(const Unicode& s) const {
   for(size_t i = 0; i < s.size(); i++) {
     if (s[i] >= 0x80) {
       return false;
     }
   }
   return true;
  }
  MixSegment mixSeg_;
  const DictTrie* trie_;
 }; // QuerySegment
 } // namespace cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/SegmentBase.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/SegmentBase.hpp
@ -0,0 +1,46 @@
 #ifndef CPPJIEBA_SEGMENTBASE_H
 #define CPPJIEBA_SEGMENTBASE_H
 #include "limonp/Logging.hpp"
 #include "PreFilter.hpp"
 #include <cassert>
 namespace cppjieba {
 const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82";
 using namespace limonp;
 class SegmentBase {
 public:
  SegmentBase() {
    XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
  }
  virtual ~SegmentBase() {
  }
  virtual void Cut(const string& sentence, vector<string>& words) const = 0;
  bool ResetSeparators(const string& s) {
    symbols_.clear();
    RuneStrArray runes;
    if (!DecodeRunesInString(s, runes)) {
      XLOG(ERROR) << "decode " << s << " failed";
      return false;
    }
    for (size_t i = 0; i < runes.size(); i++) {
      if (!symbols_.insert(runes[i].rune).second) {
        XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
        return false;
      }
    }
    return true;
  }
 protected:
  unordered_set<Rune> symbols_;
 }; // class SegmentBase
 } // cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/SegmentTagged.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/SegmentTagged.hpp
@ -0,0 +1,23 @@
 #ifndef CPPJIEBA_SEGMENTTAGGED_H
 #define CPPJIEBA_SEGMENTTAGGED_H
 #include "SegmentBase.hpp"
 namespace cppjieba {
 class SegmentTagged : public SegmentBase{
 public:
  SegmentTagged() {
  }
  virtual ~SegmentTagged() {
  }
  virtual bool Tag(const string& src, vector<pair<string, string> >& res) const = 0;
  virtual const DictTrie* GetDictTrie() const = 0;
 }; // class SegmentTagged
 } // cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/TextRankExtractor.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/TextRankExtractor.hpp
@ -0,0 +1,190 @@
 #ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
 #define CPPJIEBA_TEXTRANK_EXTRACTOR_H
 #include <cmath>
 #include "Jieba.hpp"
 namespace cppjieba {
  using namespace limonp;
  using namespace std;
  class TextRankExtractor {
  public:
    typedef struct _Word {string word;vector<size_t> offsets;double weight;}    Word; // struct Word
  private:
    typedef std::map<string,Word> WordMap;
    class WordGraph{
    private:
      typedef double Score;
      typedef string Node;
      typedef std::set<Node> NodeSet;
      typedef std::map<Node,double> Edges;
      typedef std::map<Node,Edges> Graph;
      //typedef std::unordered_map<Node,double> Edges;
      //typedef std::unordered_map<Node,Edges> Graph;
      double d;
      Graph graph;
      NodeSet nodeSet;
    public:
      WordGraph(): d(0.85) {};
      WordGraph(double in_d): d(in_d) {};
      void addEdge(Node start,Node end,double weight){
        Edges temp;
        Edges::iterator gotEdges;
        nodeSet.insert(start);
        nodeSet.insert(end);
        graph[start][end]+=weight;
        graph[end][start]+=weight;
      }
      void rank(WordMap &ws,size_t rankTime=10){
        WordMap outSum;
        Score wsdef, min_rank, max_rank;
        if( graph.size() == 0)
          return;
        wsdef = 1.0 / graph.size();
        for(Graph::iterator edges=graph.begin();edges!=graph.end();++edges){
          // edges->first start节点；edge->first end节点；edge->second 权重
          ws[edges->first].word=edges->first;
          ws[edges->first].weight=wsdef;
          outSum[edges->first].weight=0;
          for(Edges::iterator edge=edges->second.begin();edge!=edges->second.end();++edge){
            outSum[edges->first].weight+=edge->second;
          }
        }
        //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
        for( size_t i=0; i<rankTime; i++ ){
          for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++ ){
            double s = 0;
            for( Edges::iterator edge= graph[*node].begin(); edge != graph[*node].end(); edge++ )
              // edge->first end节点；edge->second 权重
              s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
            ws[*node].weight = (1 - d) + d * s;
          }
        }
        min_rank=max_rank=ws.begin()->second.weight;
        for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
          if( i->second.weight < min_rank ){
            min_rank = i->second.weight;
          }
          if( i->second.weight > max_rank ){
            max_rank = i->second.weight;
          }
        }
        for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
          ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
        }
      }
    };
  public: 
  TextRankExtractor(const string& dictPath, 
        const string& hmmFilePath, 
        const string& stopWordPath, 
        const string& userDict = "") 
    : segment_(dictPath, hmmFilePath, userDict) {
    LoadStopWordDict(stopWordPath);
  }
  TextRankExtractor(const DictTrie* dictTrie, 
        const HMMModel* model,
        const string& stopWordPath) 
    : segment_(dictTrie, model) {
    LoadStopWordDict(stopWordPath);
  }
    TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
        LoadStopWordDict(stopWordPath);
    }
    ~TextRankExtractor() {
    }
    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
      vector<Word> topWords;
      Extract(sentence, topWords, topN);
      for (size_t i = 0; i < topWords.size(); i++) {
        keywords.push_back(topWords[i].word);
      }
    }
    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
      vector<Word> topWords;
      Extract(sentence, topWords, topN);
      for (size_t i = 0; i < topWords.size(); i++) {
        keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
      }
    }
    void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
      vector<string> words;
      segment_.Cut(sentence, words);
      TextRankExtractor::WordGraph graph;
      WordMap wordmap;
      size_t offset = 0;
      for(size_t i=0; i < words.size(); i++){
        size_t t = offset;
        offset += words[i].size();
        if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
          continue;
        }
        for(size_t j=i+1,skip=0;j<i+span+skip && j<words.size();j++){
          if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
            skip++;
            continue;
          }
          graph.addEdge(words[i],words[j],1);
        }
        wordmap[words[i]].offsets.push_back(t);
      }
      if (offset != sentence.size()) {
        XLOG(ERROR) << "words illegal";
        return;
      }
      graph.rank(wordmap,rankTime);
      keywords.clear();
      keywords.reserve(wordmap.size());
      for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
        keywords.push_back(itr->second);
      }
      topN = min(topN, keywords.size());
      partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
      keywords.resize(topN);
    }
  private:
    void LoadStopWordDict(const string& filePath) {
      ifstream ifs(filePath.c_str());
      XCHECK(ifs.is_open()) << "open " << filePath << " failed";
      string line ;
      while (getline(ifs, line)) {
        stopWords_.insert(line);
      }
      assert(stopWords_.size());
    }
    static bool Compare(const Word &x,const Word &y){
      return x.weight > y.weight;
    }
    MixSegment segment_;
    unordered_set<string> stopWords_;
  }; // class TextRankExtractor
  inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}"; 
  }
 } // namespace cppjieba
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/Trie.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/Trie.hpp
@ -0,0 +1,200 @@
 #ifndef CPPJIEBA_TRIE_HPP
 #define CPPJIEBA_TRIE_HPP
 #include <vector>
 #include <queue>
 #include "limonp/StdExtension.hpp"
 #include "Unicode.hpp"
 namespace cppjieba {
 using namespace std;
 const size_t MAX_WORD_LENGTH = 512;
 struct DictUnit {
  Unicode word;
  double weight;
  string tag;
 }; // struct DictUnit
 // for debugging
 // inline ostream & operator << (ostream& os, const DictUnit& unit) {
 //   string s;
 //   s << unit.word;
 //   return os << StringFormat("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
 // }
 struct Dag {
  RuneStr runestr;
  // [offset, nexts.first]
  limonp::LocalVector<pair<size_t, const DictUnit*> > nexts;
  const DictUnit * pInfo;
  double weight;
  size_t nextPos; // TODO
  Dag():runestr(), pInfo(NULL), weight(0.0), nextPos(0) {
  }
 }; // struct Dag
 typedef Rune TrieKey;
 class TrieNode {
 public :
  TrieNode(): next(NULL), ptValue(NULL) {
  }
 public:
  typedef unordered_map<TrieKey, TrieNode*> NextMap;
  NextMap *next;
  const DictUnit *ptValue;
 };
 class Trie {
 public:
  Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
   : root_(new TrieNode) {
    CreateTrie(keys, valuePointers);
  }
  ~Trie() {
    DeleteNode(root_);
  }
  const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
    if (begin == end) {
      return NULL;
    }
    const TrieNode* ptNode = root_;
    TrieNode::NextMap::const_iterator citer;
    for (RuneStrArray::const_iterator it = begin; it != end; it++) {
      if (NULL == ptNode->next) {
        return NULL;
      }
      citer = ptNode->next->find(it->rune);
      if (ptNode->next->end() == citer) {
        return NULL;
      }
      ptNode = citer->second;
    }
    return ptNode->ptValue;
  }
  void Find(RuneStrArray::const_iterator begin, 
        RuneStrArray::const_iterator end, 
        vector<struct Dag>&res, 
        size_t max_word_len = MAX_WORD_LENGTH) const {
    assert(root_ != NULL);
    res.resize(end - begin);
    const TrieNode *ptNode = NULL;
    TrieNode::NextMap::const_iterator citer;
    for (size_t i = 0; i < size_t(end - begin); i++) {
      res[i].runestr = *(begin + i);
      if (root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) {
        ptNode = citer->second;
      } else {
        ptNode = NULL;
      }
      if (ptNode != NULL) {
        res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));
      } else {
        res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, static_cast<const DictUnit*>(NULL)));
      }
      for (size_t j = i + 1; j < size_t(end - begin) && (j - i + 1) <= max_word_len; j++) {
        if (ptNode == NULL || ptNode->next == NULL) {
          break;
        }
        citer = ptNode->next->find((begin + j)->rune);
        if (ptNode->next->end() == citer) {
          break;
        }
        ptNode = citer->second;
        if (NULL != ptNode->ptValue) {
          res[i].nexts.push_back(pair<size_t, const DictUnit*>(j, ptNode->ptValue));
        }
      }
    }
  }
  void InsertNode(const Unicode& key, const DictUnit* ptValue) {
    if (key.begin() == key.end()) {
      return;
    }
    TrieNode::NextMap::const_iterator kmIter;
    TrieNode *ptNode = root_;
    for (Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
      if (NULL == ptNode->next) {
        ptNode->next = new TrieNode::NextMap;
      }
      kmIter = ptNode->next->find(*citer);
      if (ptNode->next->end() == kmIter) {
        TrieNode *nextNode = new TrieNode;
        ptNode->next->insert(make_pair(*citer, nextNode));
        ptNode = nextNode;
      } else {
        ptNode = kmIter->second;
      }
    }
    assert(ptNode != NULL);
    ptNode->ptValue = ptValue;
  }
  void DeleteNode(const Unicode& key, const DictUnit* ptValue) {
      if (key.begin() == key.end()) {
        return;
      }
      //定义一个NextMap迭代器
      TrieNode::NextMap::const_iterator kmIter;
      //定义一个指向root的TrieNode指针
      TrieNode *ptNode = root_;
      for (Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
        //链表不存在元素
        if (NULL == ptNode->next) {
          return;
        }
        kmIter = ptNode->next->find(*citer);
        //如果map中不存在,跳出循环
        if (ptNode->next->end() == kmIter) {
              break;
        }
        //从unordered_map中擦除该项
        ptNode->next->erase(*citer);
        //删除该node
        ptNode = kmIter->second;
        delete ptNode;
        break;
      }
      return;
 }
 private:
  void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
    if (valuePointers.empty() || keys.empty()) {
      return;
    }
    assert(keys.size() == valuePointers.size());
    for (size_t i = 0; i < keys.size(); i++) {
      InsertNode(keys[i], valuePointers[i]);
    }
  }
  void DeleteNode(TrieNode* node) {
    if (NULL == node) {
      return;
    }
    if (NULL != node->next) {
      for (TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) {
        DeleteNode(it->second);
      }
      delete node->next;
    }
    delete node;
  }
  TrieNode* root_;
 }; // class Trie
 } // namespace cppjieba
 #endif // CPPJIEBA_TRIE_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/Unicode.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/cppjieba/Unicode.hpp
@ -0,0 +1,227 @@
 #ifndef CPPJIEBA_UNICODE_H
 #define CPPJIEBA_UNICODE_H
 #include <stdint.h>
 #include <stdlib.h>
 #include <string>
 #include <vector>
 #include <ostream>
 #include "limonp/LocalVector.hpp"
 namespace cppjieba {
 using std::string;
 using std::vector;
 typedef uint32_t Rune;
 struct Word {
  string word;
  uint32_t offset;
  uint32_t unicode_offset;
  uint32_t unicode_length;
  Word(const string& w, uint32_t o)
   : word(w), offset(o) {
  }
  Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
          : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
  }
 }; // struct Word
 inline std::ostream& operator << (std::ostream& os, const Word& w) {
  return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
 }
 struct RuneStr {
  Rune rune;
  uint32_t offset;
  uint32_t len;
  uint32_t unicode_offset;
  uint32_t unicode_length;
  RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) {
  }
  RuneStr(Rune r, uint32_t o, uint32_t l)
    : rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) {
  }
  RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
          : rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
  }
 }; // struct RuneStr
 inline std::ostream& operator << (std::ostream& os, const RuneStr& r) {
  return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
 }
 typedef limonp::LocalVector<Rune> Unicode;
 typedef limonp::LocalVector<struct RuneStr> RuneStrArray;
 // [left, right]
 struct WordRange {
  RuneStrArray::const_iterator left;
  RuneStrArray::const_iterator right;
  WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
   : left(l), right(r) {
  }
  size_t Length() const {
    return right - left + 1;
  }
  bool IsAllAscii() const {
    for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
      if (iter->rune >= 0x80) {
        return false;
      }
    }
    return true;
  }
 }; // struct WordRange
 struct RuneStrLite {
  uint32_t rune;
  uint32_t len;
  RuneStrLite(): rune(0), len(0) {
  }
  RuneStrLite(uint32_t r, uint32_t l): rune(r), len(l) {
  }
 }; // struct RuneStrLite
 inline RuneStrLite DecodeRuneInString(const char* str, size_t len) {
  RuneStrLite rp(0, 0);
  if (str == NULL || len == 0) {
    return rp;
  }
  if (!(str[0] & 0x80)) { // 0xxxxxxx
    // 7bit, total 7bit
    rp.rune = (uint8_t)(str[0]) & 0x7f;
    rp.len = 1;
  } else if ((uint8_t)str[0] <= 0xdf &&  1 < len) { 
    // 110xxxxxx
    // 5bit, total 5bit
    rp.rune = (uint8_t)(str[0]) & 0x1f;
    // 6bit, total 11bit
    rp.rune <<= 6;
    rp.rune |= (uint8_t)(str[1]) & 0x3f;
    rp.len = 2;
  } else if((uint8_t)str[0] <= 0xef && 2 < len) { // 1110xxxxxx
    // 4bit, total 4bit
    rp.rune = (uint8_t)(str[0]) & 0x0f;
    // 6bit, total 10bit
    rp.rune <<= 6;
    rp.rune |= (uint8_t)(str[1]) & 0x3f;
    // 6bit, total 16bit
    rp.rune <<= 6;
    rp.rune |= (uint8_t)(str[2]) & 0x3f;
    rp.len = 3;
  } else if((uint8_t)str[0] <= 0xf7 && 3 < len) { // 11110xxxx
    // 3bit, total 3bit
    rp.rune = (uint8_t)(str[0]) & 0x07;
    // 6bit, total 9bit
    rp.rune <<= 6;
    rp.rune |= (uint8_t)(str[1]) & 0x3f;
    // 6bit, total 15bit
    rp.rune <<= 6;
    rp.rune |= (uint8_t)(str[2]) & 0x3f;
    // 6bit, total 21bit
    rp.rune <<= 6;
    rp.rune |= (uint8_t)(str[3]) & 0x3f;
    rp.len = 4;
  } else {
    rp.rune = 0;
    rp.len = 0;
  }
  return rp;
 }
 inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes) {
  runes.clear();
  runes.reserve(len / 2);
  for (uint32_t i = 0, j = 0; i < len;) {
    RuneStrLite rp = DecodeRuneInString(s + i, len - i);
    if (rp.len == 0) {
      runes.clear();
      return false;
    }
    RuneStr x(rp.rune, i, rp.len, j, 1);
    runes.push_back(x);
    i += rp.len;
    ++j;
  }
  return true;
 }
 inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
  return DecodeRunesInString(s.c_str(), s.size(), runes);
 }
 inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
  unicode.clear();
  RuneStrArray runes;
  if (!DecodeRunesInString(s, len, runes)) {
    return false;
  }
  unicode.reserve(runes.size());
  for (size_t i = 0; i < runes.size(); i++) {
    unicode.push_back(runes[i].rune);
  }
  return true;
 }
 inline bool IsSingleWord(const string& str) {
  RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
  return rp.len == str.size();
 }
 inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
  return DecodeRunesInString(s.c_str(), s.size(), unicode);
 }
 inline Unicode DecodeRunesInString(const string& s) {
  Unicode result;
  DecodeRunesInString(s, result);
  return result;
 }
 // [left, right]
 inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
  assert(right->offset >= left->offset);
  uint32_t len = right->offset - left->offset + right->len;
  uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
  return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length);
 }
 inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
  assert(right->offset >= left->offset);
  uint32_t len = right->offset - left->offset + right->len;
  return s.substr(left->offset, len);
 }
 inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
  for (size_t i = 0; i < wrs.size(); i++) {
    words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
  }
 }
 inline vector<Word> GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
  vector<Word> result;
  GetWordsFromWordRanges(s, wrs, result);
  return result;
 }
 inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
  strs.resize(words.size());
  for (size_t i = 0; i < words.size(); ++i) {
    strs[i] = words[i].word;
  }
 }
 } // namespace cppjieba
 #endif // CPPJIEBA_UNICODE_H
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/ArgvContext.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/ArgvContext.hpp
@ -0,0 +1,70 @@
 /************************************
 * file enc : ascii
 * author   : wuyanyi09@gmail.com
 ************************************/
 #ifndef LIMONP_ARGV_FUNCTS_H
 #define LIMONP_ARGV_FUNCTS_H
 #include <set>
 #include <sstream>
 #include "StringUtil.hpp"
 namespace limonp {
 using namespace std;
 class ArgvContext {
 public :
  ArgvContext(int argc, const char* const * argv) {
    for(int i = 0; i < argc; i++) {
      if(StartsWith(argv[i], "-")) {
        if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
          mpss_[argv[i]] = argv[i+1];
          i++;
        } else {
          sset_.insert(argv[i]);
        }
      } else {
        args_.push_back(argv[i]);
      }
    }
  }
  ~ArgvContext() {
  }
  friend ostream& operator << (ostream& os, const ArgvContext& args);
  string operator [](size_t i) const {
    if(i < args_.size()) {
      return args_[i];
    }
    return "";
  }
  string operator [](const string& key) const {
    map<string, string>::const_iterator it = mpss_.find(key);
    if(it != mpss_.end()) {
      return it->second;
    }
    return "";
  }
  bool HasKey(const string& key) const {
    if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
      return true;
    }
    return false;
  }
 private:
  vector<string> args_;
  map<string, string> mpss_;
  set<string> sset_;
 }; // class ArgvContext
 inline ostream& operator << (ostream& os, const ArgvContext& args) {
  return os<<args.args_<<args.mpss_<<args.sset_;
 }
 } // namespace limonp
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/BlockingQueue.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/BlockingQueue.hpp
@ -0,0 +1,49 @@
 #ifndef LIMONP_BLOCKINGQUEUE_HPP
 #define LIMONP_BLOCKINGQUEUE_HPP
 #include <queue>
 #include "Condition.hpp"
 namespace limonp {
 template<class T>
 class BlockingQueue: NonCopyable {
 public:
  BlockingQueue()
    : mutex_(), notEmpty_(mutex_), queue_() {
  }
  void Push(const T& x) {
    MutexLockGuard lock(mutex_);
    queue_.push(x);
    notEmpty_.Notify(); // Wait morphing saves us
  }
  T Pop() {
    MutexLockGuard lock(mutex_);
    // always use a while-loop, due to spurious wakeup
    while (queue_.empty()) {
      notEmpty_.Wait();
    }
    assert(!queue_.empty());
    T front(queue_.front());
    queue_.pop();
    return front;
  }
  size_t Size() const {
    MutexLockGuard lock(mutex_);
    return queue_.size();
  }
  bool Empty() const {
    return Size() == 0;
  }
 private:
  mutable MutexLock mutex_;
  Condition         notEmpty_;
  std::queue<T>     queue_;
 }; // class BlockingQueue
 } // namespace limonp
 #endif // LIMONP_BLOCKINGQUEUE_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/BoundedBlockingQueue.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/BoundedBlockingQueue.hpp
@ -0,0 +1,67 @@
 #ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
 #define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
 #include "BoundedQueue.hpp"
 namespace limonp {
 template<typename T>
 class BoundedBlockingQueue : NonCopyable {
 public:
  explicit BoundedBlockingQueue(size_t maxSize)
    : mutex_(),
      notEmpty_(mutex_),
      notFull_(mutex_),
      queue_(maxSize) {
  }
  void Push(const T& x) {
    MutexLockGuard lock(mutex_);
    while (queue_.Full()) {
      notFull_.Wait();
    }
    assert(!queue_.Full());
    queue_.Push(x);
    notEmpty_.Notify();
  }
  T Pop() {
    MutexLockGuard lock(mutex_);
    while (queue_.Empty()) {
      notEmpty_.Wait();
    }
    assert(!queue_.Empty());
    T res = queue_.Pop();
    notFull_.Notify();
    return res;
  }
  bool Empty() const {
    MutexLockGuard lock(mutex_);
    return queue_.Empty();
  }
  bool Full() const {
    MutexLockGuard lock(mutex_);
    return queue_.Full();
  }
  size_t size() const {
    MutexLockGuard lock(mutex_);
    return queue_.size();
  }
  size_t capacity() const {
    return queue_.capacity();
  }
 private:
  mutable MutexLock          mutex_;
  Condition                  notEmpty_;
  Condition                  notFull_;
  BoundedQueue<T>  queue_;
 }; // class BoundedBlockingQueue
 } // namespace limonp
 #endif // LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/BoundedQueue.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/BoundedQueue.hpp
@ -0,0 +1,65 @@
 #ifndef LIMONP_BOUNDED_QUEUE_HPP
 #define LIMONP_BOUNDED_QUEUE_HPP
 #include <vector>
 #include <fstream>
 #include <cassert>
 namespace limonp {
 using namespace std;
 template<class T>
 class BoundedQueue {
 public:
  explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
    head_ = 0;
    tail_ = 0;
    size_ = 0;
    assert(capacity_);
  }
  ~BoundedQueue() {
  }
  void Clear() {
    head_ = 0;
    tail_ = 0;
    size_ = 0;
  }
  bool Empty() const {
    return !size_;
  }
  bool Full() const {
    return capacity_ == size_;
  }
  size_t Size() const {
    return size_;
  }
  size_t Capacity() const {
    return capacity_;
  }
  void Push(const T& t) {
    assert(!Full());
    circular_buffer_[tail_] = t;
    tail_ = (tail_ + 1) % capacity_;
    size_ ++;
  }
  T Pop() {
    assert(!Empty());
    size_t oldPos = head_;
    head_ = (head_ + 1) % capacity_;
    size_ --;
    return circular_buffer_[oldPos];
  }
 private:
  size_t head_;
  size_t tail_;
  size_t size_;
  const size_t capacity_;
  vector<T> circular_buffer_;
 }; // class BoundedQueue
 } // namespace limonp
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Closure.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Closure.hpp
@ -0,0 +1,206 @@
 #ifndef LIMONP_CLOSURE_HPP
 #define LIMONP_CLOSURE_HPP
 namespace limonp {
 class ClosureInterface {
 public:
  virtual ~ClosureInterface() {
  }
  virtual void Run() = 0;
 };
 template <class Funct>
 class Closure0: public ClosureInterface {
 public:
  Closure0(Funct fun) {
    fun_ = fun;
  }
  virtual ~Closure0() {
  }
  virtual void Run() {
    (*fun_)();
  }
 private:
  Funct fun_;
 }; 
 template <class Funct, class Arg1>
 class Closure1: public ClosureInterface {
 public:
  Closure1(Funct fun, Arg1 arg1) {
    fun_ = fun;
    arg1_ = arg1;
  }
  virtual ~Closure1() {
  }
  virtual void Run() {
    (*fun_)(arg1_);
  }
 private:
  Funct fun_;
  Arg1 arg1_;
 }; 
 template <class Funct, class Arg1, class Arg2>
 class Closure2: public ClosureInterface {
 public:
  Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
    fun_ = fun;
    arg1_ = arg1;
    arg2_ = arg2;
  }
  virtual ~Closure2() {
  }
  virtual void Run() {
    (*fun_)(arg1_, arg2_);
  }
 private:
  Funct fun_;
  Arg1 arg1_;
  Arg2 arg2_;
 }; 
 template <class Funct, class Arg1, class Arg2, class Arg3>
 class Closure3: public ClosureInterface {
 public:
  Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
    fun_ = fun;
    arg1_ = arg1;
    arg2_ = arg2;
    arg3_ = arg3;
  }
  virtual ~Closure3() {
  }
  virtual void Run() {
    (*fun_)(arg1_, arg2_, arg3_);
  }
 private:
  Funct fun_;
  Arg1 arg1_;
  Arg2 arg2_;
  Arg3 arg3_;
 }; 
 template <class Obj, class Funct> 
 class ObjClosure0: public ClosureInterface {
 public:
  ObjClosure0(Obj* p, Funct fun) {
   p_ = p;
   fun_ = fun;
  }
  virtual ~ObjClosure0() {
  }
  virtual void Run() {
    (p_->*fun_)();
  }
 private:
  Obj* p_;
  Funct fun_;
 }; 
 template <class Obj, class Funct, class Arg1> 
 class ObjClosure1: public ClosureInterface {
 public:
  ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
   p_ = p;
   fun_ = fun;
   arg1_ = arg1;
  }
  virtual ~ObjClosure1() {
  }
  virtual void Run() {
    (p_->*fun_)(arg1_);
  }
 private:
  Obj* p_;
  Funct fun_;
  Arg1 arg1_;
 }; 
 template <class Obj, class Funct, class Arg1, class Arg2> 
 class ObjClosure2: public ClosureInterface {
 public:
  ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
   p_ = p;
   fun_ = fun;
   arg1_ = arg1;
   arg2_ = arg2;
  }
  virtual ~ObjClosure2() {
  }
  virtual void Run() {
    (p_->*fun_)(arg1_, arg2_);
  }
 private:
  Obj* p_;
  Funct fun_;
  Arg1 arg1_;
  Arg2 arg2_;
 }; 
 template <class Obj, class Funct, class Arg1, class Arg2, class Arg3> 
 class ObjClosure3: public ClosureInterface {
 public:
  ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
   p_ = p;
   fun_ = fun;
   arg1_ = arg1;
   arg2_ = arg2;
   arg3_ = arg3;
  }
  virtual ~ObjClosure3() {
  }
  virtual void Run() {
    (p_->*fun_)(arg1_, arg2_, arg3_);
  }
 private:
  Obj* p_;
  Funct fun_;
  Arg1 arg1_;
  Arg2 arg2_;
  Arg3 arg3_;
 }; 
 template<class R>
 ClosureInterface* NewClosure(R (*fun)()) {
  return new Closure0<R (*)()>(fun);
 }
 template<class R, class Arg1>
 ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
  return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
 }
 template<class R, class Arg1, class Arg2>
 ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
  return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
 }
 template<class R, class Arg1, class Arg2, class Arg3>
 ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
  return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
 }
 template<class R, class Obj>
 ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
  return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
 }
 template<class R, class Obj, class Arg1>
 ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
  return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
 }
 template<class R, class Obj, class Arg1, class Arg2>
 ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
  return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
 }
 template<class R, class Obj, class Arg1, class Arg2, class Arg3>
 ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
  return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
 }
 } // namespace limonp
 #endif // LIMONP_CLOSURE_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Colors.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Colors.hpp
@ -0,0 +1,31 @@
 #ifndef LIMONP_COLOR_PRINT_HPP
 #define LIMONP_COLOR_PRINT_HPP
 #include <string>
 #include <stdarg.h>
 namespace limonp {
 using std::string;
 enum Color {
  BLACK = 30,
  RED,
  GREEN,
  YELLOW,
  BLUE,
  PURPLE
 }; // enum Color
 static void ColorPrintln(enum Color color, const char * fmt, ...) {
  va_list ap;
  printf("\033[0;%dm", color);
  va_start(ap, fmt);
  vprintf(fmt, ap);
  va_end(ap);
  printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
 }
 } // namespace limonp
 #endif // LIMONP_COLOR_PRINT_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Condition.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Condition.hpp
@ -0,0 +1,38 @@
 #ifndef LIMONP_CONDITION_HPP
 #define LIMONP_CONDITION_HPP
 #include "MutexLock.hpp"
 namespace limonp {
 class Condition : NonCopyable {
 public:
  explicit Condition(MutexLock& mutex)
    : mutex_(mutex) {
    XCHECK(!pthread_cond_init(&pcond_, NULL));
  }
  ~Condition() {
    XCHECK(!pthread_cond_destroy(&pcond_));
  }
  void Wait() {
    XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
  }
  void Notify() {
    XCHECK(!pthread_cond_signal(&pcond_));
  }
  void NotifyAll() {
    XCHECK(!pthread_cond_broadcast(&pcond_));
  }
 private:
  MutexLock& mutex_;
  pthread_cond_t pcond_;
 }; // class Condition
 } // namespace limonp
 #endif // LIMONP_CONDITION_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Config.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Config.hpp
@ -0,0 +1,103 @@
 /************************************
 * file enc : utf8
 * author   : wuyanyi09@gmail.com
 ************************************/
 #ifndef LIMONP_CONFIG_H
 #define LIMONP_CONFIG_H
 #include <map>
 #include <fstream>
 #include <iostream>
 #include <assert.h>
 #include "StringUtil.hpp"
 namespace limonp {
 using namespace std;
 class Config {
 public:
  explicit Config(const string& filePath) {
    LoadFile(filePath);
  }
  operator bool () {
    return !map_.empty();
  }
  string Get(const string& key, const string& defaultvalue) const {
    map<string, string>::const_iterator it = map_.find(key);
    if(map_.end() != it) {
      return it->second;
    }
    return defaultvalue;
  }
  int Get(const string& key, int defaultvalue) const {
    string str = Get(key, "");
    if("" == str) {
      return defaultvalue;
    }
    return atoi(str.c_str());
  }
  const char* operator [] (const char* key) const {
    if(NULL == key) {
      return NULL;
    }
    map<string, string>::const_iterator it = map_.find(key);
    if(map_.end() != it) {
      return it->second.c_str();
    }
    return NULL;
  }
  string GetConfigInfo() const {
    string res;
    res << *this;
    return res;
  }
 private:
  void LoadFile(const string& filePath) {
    ifstream ifs(filePath.c_str());
    assert(ifs);
    string line;
    vector<string> vecBuf;
    size_t lineno = 0;
    while(getline(ifs, line)) {
      lineno ++;
      Trim(line);
      if(line.empty() || StartsWith(line, "#")) {
        continue;
      }
      vecBuf.clear();
      Split(line, vecBuf, "=");
      if(2 != vecBuf.size()) {
        fprintf(stderr, "line[%s] illegal.\n", line.c_str());
        assert(false);
        continue;
      }
      string& key = vecBuf[0];
      string& value = vecBuf[1];
      Trim(key);
      Trim(value);
      if(!map_.insert(make_pair(key, value)).second) {
        fprintf(stderr, "key[%s] already exits.\n", key.c_str());
        assert(false);
        continue;
      }
    }
    ifs.close();
  }
  friend ostream& operator << (ostream& os, const Config& config);
  map<string, string> map_;
 }; // class Config
 inline ostream& operator << (ostream& os, const Config& config) {
  return os << config.map_;
 }
 } // namespace limonp
 #endif // LIMONP_CONFIG_H
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/FileLock.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/FileLock.hpp
@ -0,0 +1,74 @@
 #ifndef LIMONP_FILELOCK_HPP
 #define LIMONP_FILELOCK_HPP
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <errno.h>
 #include <string>
 #include <string.h>
 #include <assert.h>
 namespace limonp {
 using std::string;
 class FileLock {
 public:
  FileLock() : fd_(-1), ok_(true) {
  }
  ~FileLock() {
    if(fd_ > 0) {
      Close();
    }
  }
  void Open(const string& fname) {
    assert(fd_ == -1);
    fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
    if(fd_ < 0) {
      ok_ = false;
      err_ = strerror(errno);
    }
  }
  void Close() {
    ::close(fd_);
  }
  void Lock() {
    if(LockOrUnlock(fd_, true) < 0) {
      ok_ = false;
      err_ = strerror(errno);
    }
  }
  void UnLock() {
    if(LockOrUnlock(fd_, false) < 0) {
      ok_ = false;
      err_ = strerror(errno);
    }
  }
  bool Ok() const {
    return ok_;
  }
  string Error() const {
    return err_;
  }
 private:
  static int LockOrUnlock(int fd, bool lock) {
    errno = 0;
    struct flock f;
    memset(&f, 0, sizeof(f));
    f.l_type = (lock ? F_WRLCK : F_UNLCK);
    f.l_whence = SEEK_SET;
    f.l_start = 0;
    f.l_len = 0;        // Lock/unlock entire file
    return fcntl(fd, F_SETLK, &f);
  }
  int fd_;
  bool ok_;
  string err_;
 }; // class FileLock
 }// namespace limonp
 #endif // LIMONP_FILELOCK_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/ForcePublic.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/ForcePublic.hpp
@ -0,0 +1,7 @@
 #ifndef LIMONP_FORCE_PUBLIC_H
 #define LIMONP_FORCE_PUBLIC_H
 #define private public
 #define protected public
 #endif // LIMONP_FORCE_PUBLIC_H
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/LocalVector.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/LocalVector.hpp
@ -0,0 +1,139 @@
 #ifndef LIMONP_LOCAL_VECTOR_HPP
 #define LIMONP_LOCAL_VECTOR_HPP
 #include <iostream>
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
 namespace limonp {
 using namespace std;
 /*
 * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
 * LocalVector<T> is simple and not well-tested.
 */
 const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
 template <class T>
 class LocalVector {
 public:
  typedef const T* const_iterator ;
  typedef T value_type;
  typedef size_t size_type;
 private:
  T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
  T * ptr_;
  size_t size_;
  size_t capacity_;
 public:
  LocalVector() {
    init_();
  };
  LocalVector(const LocalVector<T>& vec) {
    init_();
    *this = vec;
  }
  LocalVector(const_iterator  begin, const_iterator end) { // TODO: make it faster
    init_();
    while(begin != end) {
      push_back(*begin++);
    }
  }
  LocalVector(size_t size, const T& t) { // TODO: make it faster
    init_();
    while(size--) {
      push_back(t);
    }
  }
  ~LocalVector() {
    if(ptr_ != buffer_) {
      free(ptr_);
    }
  };
 public:
  LocalVector<T>& operator = (const LocalVector<T>& vec) {
    clear();
    size_ = vec.size();
    capacity_ = vec.capacity();
    if(vec.buffer_ == vec.ptr_) {
      memcpy(static_cast<void*>(buffer_), vec.buffer_, sizeof(T) * size_);
      ptr_ = buffer_;
    } else {
      ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
      assert(ptr_);
      memcpy(static_cast<void*>(ptr_), vec.ptr_, vec.size() * sizeof(T));
    }
    return *this;
  }
 private:
  void init_() {
    ptr_ = buffer_;
    size_ = 0;
    capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
  }
 public:
  T& operator [] (size_t i) {
    return ptr_[i];
  }
  const T& operator [] (size_t i) const {
    return ptr_[i];
  }
  void push_back(const T& t) {
    if(size_ == capacity_) {
      assert(capacity_);
      reserve(capacity_ * 2);
    }
    ptr_[size_ ++ ] = t;
  }
  void reserve(size_t size) {
    if(size <= capacity_) {
      return;
    }
    T * next =  (T*)malloc(sizeof(T) * size);
    assert(next);
    T * old = ptr_;
    ptr_ = next;
    memcpy(static_cast<void*>(ptr_), old, sizeof(T) * capacity_);
    capacity_ = size;
    if(old != buffer_) {
      free(old);
    }
  }
  bool empty() const {
    return 0 == size();
  }
  size_t size() const {
    return size_;
  }
  size_t capacity() const {
    return capacity_;
  }
  const_iterator begin() const {
    return ptr_;
  }
  const_iterator end() const {
    return ptr_ + size_;
  }
  void clear() {
    if(ptr_ != buffer_) {
      free(ptr_);
    }
    init_();
  }
 };
 template <class T>
 ostream & operator << (ostream& os, const LocalVector<T>& vec) {
  if(vec.empty()) {
    return os << "[]";
  }
  os<<"[\""<<vec[0];
  for(size_t i = 1; i < vec.size(); i++) {
    os<<"\", \""<<vec[i];
  }
  os<<"\"]";
  return os;
 }
 }
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Logging.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Logging.hpp
@ -0,0 +1,90 @@
 #ifndef LIMONP_LOGGING_HPP
 #define LIMONP_LOGGING_HPP
 #include <sstream>
 #include <iostream>
 #include <cassert>
 #include <cstdlib>
 #include <ctime>
 #ifdef XLOG
 #error "XLOG has been defined already"
 #endif // XLOG
 #ifdef XCHECK
 #error "XCHECK has been defined already"
 #endif // XCHECK
 #define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream() 
 #define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
 namespace limonp {
 enum {
  LL_DEBUG = 0, 
  LL_INFO = 1, 
  LL_WARNING = 2, 
  LL_ERROR = 3, 
  LL_FATAL = 4,
 }; // enum
 static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
 static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
 class Logger {
 public:
  Logger(size_t level, const char* filename, int lineno)
   : level_(level) {
 #ifdef LOGGING_LEVEL
     if (level_ < LOGGING_LEVEL) {
       return;
     }
 #endif
    assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
    char buf[32];
    time_t timeNow;
    time(&timeNow);
    struct tm tmNow;
    #if defined(_WIN32) || defined(_WIN64)
    errno_t e = localtime_s(&tmNow, &timeNow);
    assert(e = 0);
    #else
    struct tm * tm_tmp = localtime_r(&timeNow, &tmNow);
    assert(tm_tmp != nullptr);
    #endif
    strftime(buf, sizeof(buf), LOG_TIME_FORMAT, &tmNow);
    stream_ << buf 
      << " " << filename 
      << ":" << lineno 
      << " " << LOG_LEVEL_ARRAY[level_] 
      << " ";
  }
  ~Logger() {
 #ifdef LOGGING_LEVEL
     if (level_ < LOGGING_LEVEL) {
       return;
     }
 #endif
    std::cerr << stream_.str() << std::endl;
    if (level_ == LL_FATAL) {
      abort();
    }
  }
  std::ostream& Stream() {
    return stream_;
  }
 private:
  std::ostringstream stream_;
  size_t level_;
 }; // class Logger
 } // namespace limonp
 #endif // LIMONP_LOGGING_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Md5.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Md5.hpp
@ -0,0 +1,411 @@
 #ifndef __MD5_H__
 #define __MD5_H__
 // Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
 // rights reserved.
 // License to copy and use this software is granted provided that it
 // is identified as the "RSA Data Security, Inc. MD5 Message-Digest
 // Algorithm" in all material mentioning or referencing this software
 // or this function.
 //
 // License is also granted to make and use derivative works provided
 // that such works are identified as "derived from the RSA Data
 // Security, Inc. MD5 Message-Digest Algorithm" in all material
 // mentioning or referencing the derived work.
 //
 // RSA Data Security, Inc. makes no representations concerning either
 // the merchantability of this software or the suitability of this
 // software for any particular purpose. It is provided "as is"
 // without express or implied warranty of any kind.
 //
 // These notices must be retained in any copies of any part of this
 // documentation and/or software.
 // The original md5 implementation avoids external libraries.
 // This version has dependency on stdio.h for file input and
 // string.h for memcpy.
 #include <cstdio>
 #include <cstring>
 #include <iostream>
 namespace limonp {
 //#pragma region MD5 defines
 // Constants for MD5Transform routine.
 #define S11 7
 #define S12 12
 #define S13 17
 #define S14 22
 #define S21 5
 #define S22 9
 #define S23 14
 #define S24 20
 #define S31 4
 #define S32 11
 #define S33 16
 #define S34 23
 #define S41 6
 #define S42 10
 #define S43 15
 #define S44 21
 // F, G, H and I are basic MD5 functions.
 #define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
 #define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
 #define H(x, y, z) ((x) ^ (y) ^ (z))
 #define I(x, y, z) ((y) ^ ((x) | (~z)))
 // ROTATE_LEFT rotates x left n bits.
 #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
 // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
 // Rotation is separate from addition to prevent recomputation.
 #define FF(a, b, c, d, x, s, ac) { \
  (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
  (a) = ROTATE_LEFT ((a), (s)); \
  (a) += (b); \
  }
 #define GG(a, b, c, d, x, s, ac) { \
  (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
  (a) = ROTATE_LEFT ((a), (s)); \
  (a) += (b); \
  }
 #define HH(a, b, c, d, x, s, ac) { \
  (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
  (a) = ROTATE_LEFT ((a), (s)); \
  (a) += (b); \
  }
 #define II(a, b, c, d, x, s, ac) { \
  (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
  (a) = ROTATE_LEFT ((a), (s)); \
  (a) += (b); \
  }
 //#pragma endregion
 typedef unsigned char BYTE ;
 // POINTER defines a generic pointer type
 typedef unsigned char *POINTER;
 // UINT2 defines a two byte word
 typedef unsigned short int UINT2;
 // UINT4 defines a four byte word
 typedef unsigned int UINT4;
 static unsigned char PADDING[64] = {
  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 // convenient object that wraps
 // the C-functions for use in C++ only
 class MD5 {
 private:
  struct __context_t {
    UINT4 state[4];                                   /* state (ABCD) */
    UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
    unsigned char buffer[64];                         /* input buffer */
  } context ;
  //#pragma region static helper functions
  // The core of the MD5 algorithm is here.
  // MD5 basic transformation. Transforms state based on block.
  static void MD5Transform( UINT4 state[4], unsigned char block[64] ) {
    UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
    Decode (x, block, 64);
    /* Round 1 */
    FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
    FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
    FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
    FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
    FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
    FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
    FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
    FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
    FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
    FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
    FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
    FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
    FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
    FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
    FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
    FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
    /* Round 2 */
    GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
    GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
    GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
    GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
    GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
    GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
    GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
    GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
    GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
    GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
    GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
    GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
    GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
    GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
    GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
    GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
    /* Round 3 */
    HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
    HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
    HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
    HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
    HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
    HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
    HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
    HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
    HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
    HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
    HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
    HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
    HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
    HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
    HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
    HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
    /* Round 4 */
    II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
    II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
    II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
    II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
    II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
    II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
    II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
    II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
    II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
    II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
    II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
    II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
    II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
    II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
    II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
    II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
    state[0] += a;
    state[1] += b;
    state[2] += c;
    state[3] += d;
    // Zeroize sensitive information.
    memset((POINTER)x, 0, sizeof (x));
  }
  // Encodes input (UINT4) into output (unsigned char). Assumes len is
  // a multiple of 4.
  static void Encode( unsigned char *output, UINT4 *input, unsigned int len ) {
    unsigned int i, j;
    for (i = 0, j = 0; j < len; i++, j += 4) {
      output[j] = (unsigned char)(input[i] & 0xff);
      output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
      output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
      output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
    }
  }
  // Decodes input (unsigned char) into output (UINT4). Assumes len is
  // a multiple of 4.
  static void Decode( UINT4 *output, unsigned char *input, unsigned int len ) {
    unsigned int i, j;
    for (i = 0, j = 0; j < len; i++, j += 4)
      output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
                  (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
  }
  //#pragma endregion
 public:
  // MAIN FUNCTIONS
  MD5() {
    Init() ;
  }
  // MD5 initialization. Begins an MD5 operation, writing a new context.
  void Init() {
    context.count[0] = context.count[1] = 0;
    // Load magic initialization constants.
    context.state[0] = 0x67452301;
    context.state[1] = 0xefcdab89;
    context.state[2] = 0x98badcfe;
    context.state[3] = 0x10325476;
  }
  // MD5 block update operation. Continues an MD5 message-digest
  // operation, processing another message block, and updating the
  // context.
  void Update(
    unsigned char *input,   // input block
    unsigned int inputLen ) { // length of input block
    unsigned int i, index, partLen;
    // Compute number of bytes mod 64
    index = (unsigned int)((context.count[0] >> 3) & 0x3F);
    // Update number of bits
    if ((context.count[0] += ((UINT4)inputLen << 3))
        < ((UINT4)inputLen << 3))
      context.count[1]++;
    context.count[1] += ((UINT4)inputLen >> 29);
    partLen = 64 - index;
    // Transform as many times as possible.
    if (inputLen >= partLen) {
      memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
      MD5Transform (context.state, context.buffer);
      for (i = partLen; i + 63 < inputLen; i += 64)
        MD5Transform (context.state, &input[i]);
      index = 0;
    } else
      i = 0;
    /* Buffer remaining input */
    memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen-i);
  }
  // MD5 finalization. Ends an MD5 message-digest operation, writing the
  // the message digest and zeroizing the context.
  // Writes to digestRaw
  void Final() {
    unsigned char bits[8];
    unsigned int index, padLen;
    // Save number of bits
    Encode( bits, context.count, 8 );
    // Pad out to 56 mod 64.
    index = (unsigned int)((context.count[0] >> 3) & 0x3f);
    padLen = (index < 56) ? (56 - index) : (120 - index);
    Update( PADDING, padLen );
    // Append length (before padding)
    Update( bits, 8 );
    // Store state in digest
    Encode( digestRaw, context.state, 16);
    // Zeroize sensitive information.
    memset((POINTER)&context, 0, sizeof (context));
    writeToString() ;
  }
  /// Buffer must be 32+1 (nul) = 33 chars long at least
  void writeToString() {
    int pos ;
    for( pos = 0 ; pos < 16 ; pos++ )
      sprintf( digestChars+(pos*2), "%02x", digestRaw[pos] ) ;
  }
 public:
  // an MD5 digest is a 16-byte number (32 hex digits)
  BYTE digestRaw[ 16 ] ;
  // This version of the digest is actually
  // a "printf'd" version of the digest.
  char digestChars[ 33 ] ;
  /// Load a file from disk and digest it
  // Digests a file and returns the result.
  const char* digestFile( const char *filename ) {
    if (NULL == filename || strcmp(filename, "") == 0)
      return NULL;
    Init() ;
    FILE *file;
    unsigned char buffer[1024] ;
    if((file = fopen (filename, "rb")) == NULL) {
      return NULL;
    }
    int len;
    while( (len = fread( buffer, 1, 1024, file )) )
      Update( buffer, len ) ;
    Final();
    fclose( file );
    return digestChars ;
  }
  /// Digests a byte-array already in memory
  const char* digestMemory( BYTE *memchunk, int len ) {
    if (NULL == memchunk)
      return NULL;
    Init() ;
    Update( memchunk, len ) ;
    Final() ;
    return digestChars ;
  }
  // Digests a string and prints the result.
  const char* digestString(const char *string ) {
    if (string == NULL)
      return NULL;
    Init() ;
    Update( (unsigned char*)string, strlen(string) ) ;
    Final() ;
    return digestChars ;
  }
 };
 inline bool md5String(const char* str, std::string& res) {
  if (NULL == str) {
    res = "";
    return false;
  }
  MD5 md5;
  const char *pRes = md5.digestString(str);
  if (NULL == pRes) {
    res = "";
    return false;
  }
  res = pRes;
  return true;
 }
 inline bool md5File(const char* filepath, std::string& res) {
  if (NULL == filepath || strcmp(filepath, "") == 0) {
    res = "";
    return false;
  }
  MD5 md5;
  const char *pRes = md5.digestFile(filepath);
  if (NULL == pRes) {
    res = "";
    return false;
  }
  res = pRes;
  return true;
 }
 }
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/MutexLock.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/MutexLock.hpp
@ -0,0 +1,51 @@
 #ifndef LIMONP_MUTEX_LOCK_HPP
 #define LIMONP_MUTEX_LOCK_HPP
 #include <pthread.h>
 #include "NonCopyable.hpp"
 #include "Logging.hpp"
 namespace limonp {
 class MutexLock: NonCopyable {
 public:
  MutexLock() {
    XCHECK(!pthread_mutex_init(&mutex_, NULL));
  }
  ~MutexLock() {
    XCHECK(!pthread_mutex_destroy(&mutex_));
  }
  pthread_mutex_t* GetPthreadMutex() {
    return &mutex_;
  }
 private:
  void Lock() {
    XCHECK(!pthread_mutex_lock(&mutex_));
  }
  void Unlock() {
    XCHECK(!pthread_mutex_unlock(&mutex_));
  }
  friend class MutexLockGuard;
  pthread_mutex_t mutex_;
 }; // class MutexLock
 class MutexLockGuard: NonCopyable {
 public:
  explicit MutexLockGuard(MutexLock & mutex)
    : mutex_(mutex) {
    mutex_.Lock();
  }
  ~MutexLockGuard() {
    mutex_.Unlock();
  }
 private:
  MutexLock & mutex_;
 }; // class MutexLockGuard
 #define MutexLockGuard(x) XCHECK(false);
 } // namespace limonp
 #endif // LIMONP_MUTEX_LOCK_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/NonCopyable.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/NonCopyable.hpp
@ -0,0 +1,21 @@
 /************************************
 ************************************/
 #ifndef LIMONP_NONCOPYABLE_H
 #define LIMONP_NONCOPYABLE_H
 namespace limonp {
 class NonCopyable {
 protected:
  NonCopyable() {
  }
  ~NonCopyable() {
  }
 private:
  NonCopyable(const NonCopyable& );
  const NonCopyable& operator=(const NonCopyable& );
 }; // class NonCopyable
 } // namespace limonp
 #endif // LIMONP_NONCOPYABLE_H
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/StdExtension.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/StdExtension.hpp
@ -0,0 +1,157 @@
 #ifndef LIMONP_STD_EXTEMSION_HPP
 #define LIMONP_STD_EXTEMSION_HPP
 #include <map>
 #ifdef __APPLE__
 #include <unordered_map>
 #include <unordered_set>
 #elif(__cplusplus >= 201103L)
 #include <unordered_map>
 #include <unordered_set>
 #elif defined _MSC_VER
 #include <unordered_map>
 #include <unordered_set>
 #else
 #include <tr1/unordered_map>
 #include <tr1/unordered_set>
 namespace std {
 using std::tr1::unordered_map;
 using std::tr1::unordered_set;
 }
 #endif
 #include <set>
 #include <string>
 #include <vector>
 #include <deque>
 #include <fstream>
 #include <sstream>
 namespace std {
 template<typename T>
 ostream& operator << (ostream& os, const vector<T>& v) {
  if(v.empty()) {
    return os << "[]";
  }
  os<<"["<<v[0];
  for(size_t i = 1; i < v.size(); i++) {
    os<<", "<<v[i];
  }
  os<<"]";
  return os;
 }
 template<>
 inline ostream& operator << (ostream& os, const vector<string>& v) {
  if(v.empty()) {
    return os << "[]";
  }
  os<<"[\""<<v[0];
  for(size_t i = 1; i < v.size(); i++) {
    os<<"\", \""<<v[i];
  }
  os<<"\"]";
  return os;
 }
 template<typename T>
 ostream& operator << (ostream& os, const deque<T>& dq) {
  if(dq.empty()) {
    return os << "[]";
  }
  os<<"[\""<<dq[0];
  for(size_t i = 1; i < dq.size(); i++) {
    os<<"\", \""<<dq[i];
  }
  os<<"\"]";
  return os;
 }
 template<class T1, class T2>
 ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
  os << pr.first << ":" << pr.second ;
  return os;
 }
 template<class T>
 string& operator << (string& str, const T& obj) {
  stringstream ss;
  ss << obj; // call ostream& operator << (ostream& os,
  return str = ss.str();
 }
 template<class T1, class T2>
 ostream& operator << (ostream& os, const map<T1, T2>& mp) {
  if(mp.empty()) {
    os<<"{}";
    return os;
  }
  os<<'{';
  typename map<T1, T2>::const_iterator it = mp.begin();
  os<<*it;
  it++;
  while(it != mp.end()) {
    os<<", "<<*it;
    it++;
  }
  os<<'}';
  return os;
 }
 template<class T1, class T2>
 ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
  if(mp.empty()) {
    return os << "{}";
  }
  os<<'{';
  typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
  os<<*it;
  it++;
  while(it != mp.end()) {
    os<<", "<<*it++;
  }
  return os<<'}';
 }
 template<class T>
 ostream& operator << (ostream& os, const set<T>& st) {
  if(st.empty()) {
    os << "{}";
    return os;
  }
  os<<'{';
  typename set<T>::const_iterator it = st.begin();
  os<<*it;
  it++;
  while(it != st.end()) {
    os<<", "<<*it;
    it++;
  }
  os<<'}';
  return os;
 }
 template<class KeyType, class ContainType>
 bool IsIn(const ContainType& contain, const KeyType& key) {
  return contain.end() != contain.find(key);
 }
 template<class T>
 basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
  return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
 }
 template<class T>
 ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
  ostreambuf_iterator<T> itr (ofs);
  copy(s.begin(), s.end(), itr);
  return ofs;
 }
 } // namespace std
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/StringUtil.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/StringUtil.hpp
@ -0,0 +1,405 @@
 /************************************
 * file enc : ascii
 * author   : wuyanyi09@gmail.com
 ************************************/
 #ifndef LIMONP_STR_FUNCTS_H
 #define LIMONP_STR_FUNCTS_H
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
 #include <algorithm>
 #include <cctype>
 #include <map>
 #include <cassert>
 #include <ctime>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdarg.h>
 #include <memory.h>
 #include <functional>
 #include <locale>
 #include <sstream>
 #include <sys/types.h>
 #include <iterator>
 #include <algorithm>
 #include "StdExtension.hpp"
 namespace limonp {
 using namespace std;
 inline string StringFormat(const char* fmt, ...) {
  int size = 256;
  std::string str;
  va_list ap;
  while (1) {
    str.resize(size);
    va_start(ap, fmt);
    int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
    va_end(ap);
    if (n > -1 && n < size) {
      str.resize(n);
      return str;
    }
    if (n > -1)
      size = n + 1;
    else
      size *= 2;
  }
  return str;
 }
 template<class T>
 void Join(T begin, T end, string& res, const string& connector) {
  if(begin == end) {
    return;
  }
  stringstream ss;
  ss<<*begin;
  begin++;
  while(begin != end) {
    ss << connector << *begin;
    begin ++;
  }
  res = ss.str();
 }
 template<class T>
 string Join(T begin, T end, const string& connector) {
  string res;
  Join(begin ,end, res, connector);
  return res;
 }
 inline string& Upper(string& str) {
  transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
  return str;
 }
 inline string& Lower(string& str) {
  transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
  return str;
 }
 inline bool IsSpace(unsigned c) {
  // when passing large int as the argument of isspace, it core dump, so here need a type cast.
  return c > 0xff ? false : std::isspace(c & 0xff) != 0;
 }
 inline std::string& LTrim(std::string &s) {
 #if defined(_MSC_VER) && _MSC_VER >= 1910
    s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
        return !std::isspace(ch);
    }));
 #else
  // Use lower version of MSVC
    s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
 #endif
    return s;
 }
 inline std::string& RTrim(std::string &s) {
 #if defined(_MSC_VER) && _MSC_VER >= 1910
  // Use MSVC 2017 or higher version
    s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) {
        return !std::isspace(ch);
    }).base(), s.end());
 #else
  // Use lower version of MSVC
  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
 #endif
  return s;
 }
 inline std::string& Trim(std::string &s) {
  return LTrim(RTrim(s));
 }
 inline std::string& LTrim(std::string& s, char x) {
 #if defined(_MSC_VER) && _MSC_VER >= 1910
  s.erase(s.begin(), std::find_if(s.begin(), s.end(),
      [x](unsigned char c) { return !std::isspace(c) && c != x; }));
 #else
  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
 #endif
    return s;
 }
 inline std::string& RTrim(std::string& s, char x) {
 #if defined(_MSC_VER) && _MSC_VER >= 1910
    s.erase(std::find_if(s.rbegin(), s.rend(),
        [x](unsigned char c) { return !std::isspace(c) && c != x; }).base(), s.end());
 #else
    s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
 #endif
    return s;
 }
 inline std::string& Trim(std::string &s, char x) {
  return LTrim(RTrim(s, x), x);
 }
 inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
  res.clear();
  size_t Start = 0;
  size_t end = 0;
  string sub;
  while(Start < src.size()) {
    end = src.find_first_of(pattern, Start);
    if(string::npos == end || res.size() >= maxsplit) {
      sub = src.substr(Start);
      res.push_back(sub);
      return;
    }
    sub = src.substr(Start, end - Start);
    res.push_back(sub);
    Start = end + 1;
  }
  return;
 }
 inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
  vector<string> res;
  Split(src, res, pattern, maxsplit);
  return res;
 }
 inline bool StartsWith(const string& str, const string& prefix) {
  if(prefix.length() > str.length()) {
    return false;
  }
  return 0 == str.compare(0, prefix.length(), prefix);
 }
 inline bool EndsWith(const string& str, const string& suffix) {
  if(suffix.length() > str.length()) {
    return false;
  }
  return 0 == str.compare(str.length() -  suffix.length(), suffix.length(), suffix);
 }
 inline bool IsInStr(const string& str, char ch) {
  return str.find(ch) != string::npos;
 }
 inline uint16_t TwocharToUint16(char high, char low) {
  return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
 }
 template <class Uint16Container>
 bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
  if(!str) {
    return false;
  }
  char ch1, ch2;
  uint16_t tmp;
  vec.clear();
  for(size_t i = 0; i < len;) {
    if(!(str[i] & 0x80)) { // 0xxxxxxx
      vec.push_back(str[i]);
      i++;
    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
      ch1 = (str[i] >> 2) & 0x07;
      ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
      vec.push_back(tmp);
      i += 2;
    } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
      ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
      ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
      vec.push_back(tmp);
      i += 3;
    } else {
      return false;
    }
  }
  return true;
 }
 template <class Uint16Container>
 bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
  return Utf8ToUnicode(str.c_str(), str.size(), vec);
 }
 template <class Uint32Container>
 bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
  uint32_t tmp;
  vec.clear();
  for(size_t i = 0; i < str.size();) {
    if(!(str[i] & 0x80)) { // 0xxxxxxx
      // 7bit, total 7bit
      tmp = (uint8_t)(str[i]) & 0x7f;
      i++;
    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
      // 5bit, total 5bit
      tmp = (uint8_t)(str[i]) & 0x1f;
      // 6bit, total 11bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+1]) & 0x3f;
      i += 2;
    } else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
      // 4bit, total 4bit
      tmp = (uint8_t)(str[i]) & 0x0f;
      // 6bit, total 10bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+1]) & 0x3f;
      // 6bit, total 16bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+2]) & 0x3f;
      i += 3;
    } else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
      // 3bit, total 3bit
      tmp = (uint8_t)(str[i]) & 0x07;
      // 6bit, total 9bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+1]) & 0x3f;
      // 6bit, total 15bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+2]) & 0x3f;
      // 6bit, total 21bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+3]) & 0x3f;
      i += 4;
    } else {
      return false;
    }
    vec.push_back(tmp);
  }
  return true;
 }
 template <class Uint32ContainerConIter>
 void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
  res.clear();
  uint32_t ui;
  while(begin != end) {
    ui = *begin;
    if(ui <= 0x7f) {
      res += char(ui);
    } else if(ui <= 0x7ff) {
      res += char(((ui >> 6) & 0x1f) | 0xc0);
      res += char((ui & 0x3f) | 0x80);
    } else if(ui <= 0xffff) {
      res += char(((ui >> 12) & 0x0f) | 0xe0);
      res += char(((ui >> 6) & 0x3f) | 0x80);
      res += char((ui & 0x3f) | 0x80);
    } else {
      res += char(((ui >> 18) & 0x03) | 0xf0);
      res += char(((ui >> 12) & 0x3f) | 0x80);
      res += char(((ui >> 6) & 0x3f) | 0x80);
      res += char((ui & 0x3f) | 0x80);
    }
    begin ++;
  }
 }
 template <class Uint16ContainerConIter>
 void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
  res.clear();
  uint16_t ui;
  while(begin != end) {
    ui = *begin;
    if(ui <= 0x7f) {
      res += char(ui);
    } else if(ui <= 0x7ff) {
      res += char(((ui>>6) & 0x1f) | 0xc0);
      res += char((ui & 0x3f) | 0x80);
    } else {
      res += char(((ui >> 12) & 0x0f )| 0xe0);
      res += char(((ui>>6) & 0x3f )| 0x80 );
      res += char((ui & 0x3f) | 0x80);
    }
    begin ++;
  }
 }
 template <class Uint16Container>
 bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
  vec.clear();
  if(!str) {
    return true;
  }
  size_t i = 0;
  while(i < len) {
    if(0 == (str[i] & 0x80)) {
      vec.push_back(uint16_t(str[i]));
      i++;
    } else {
      if(i + 1 < len) { //&& (str[i+1] & 0x80))
        uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
        vec.push_back(tmp);
        i += 2;
      } else {
        return false;
      }
    }
  }
  return true;
 }
 template <class Uint16Container>
 bool GBKTrans(const string& str, Uint16Container& vec) {
  return GBKTrans(str.c_str(), str.size(), vec);
 }
 template <class Uint16ContainerConIter>
 void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
  res.clear();
  //pair<char, char> pa;
  char first, second;
  while(begin != end) {
    //pa = uint16ToChar2(*begin);
    first = ((*begin)>>8) & 0x00ff;
    second = (*begin) & 0x00ff;
    if(first & 0x80) {
      res += first;
      res += second;
    } else {
      res += second;
    }
    begin++;
  }
 }
 /*
 * format example: "%Y-%m-%d %H:%M:%S"
 */
 inline void GetTime(const string& format, string&  timeStr) {
  time_t timeNow;
  time(&timeNow);
  struct tm tmNow;
  #if defined(_WIN32) || defined(_WIN64)
  errno_t e = localtime_s(&tmNow, &timeNow);
  assert(e = 0);
  #else
  struct tm * tm_tmp = localtime_r(&timeNow, &tmNow);
  assert(tm_tmp != nullptr);
  #endif
  timeStr.resize(64);
  size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), &tmNow);
  timeStr.resize(len);
 }
 inline string PathJoin(const string& path1, const string& path2) {
  if(EndsWith(path1, "/")) {
    return path1 + path2;
  }
  return path1 + "/" + path2;
 }
 }
 #endif
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Thread.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/Thread.hpp
@ -0,0 +1,44 @@
 #ifndef LIMONP_THREAD_HPP
 #define LIMONP_THREAD_HPP
 #include "Logging.hpp"
 #include "NonCopyable.hpp"
 namespace limonp {
 class IThread: NonCopyable {
 public:
  IThread(): isStarted(false), isJoined(false) {
  }
  virtual ~IThread() {
    if(isStarted && !isJoined) {
      XCHECK(!pthread_detach(thread_));
    }
  };
  virtual void Run() = 0;
  void Start() {
    XCHECK(!isStarted);
    XCHECK(!pthread_create(&thread_, NULL, Worker, this));
    isStarted = true;
  }
  void Join() {
    XCHECK(!isJoined);
    XCHECK(!pthread_join(thread_, NULL));
    isJoined = true;
  }
 private:
  static void * Worker(void * data) {
    IThread * ptr = (IThread* ) data;
    ptr->Run();
    return NULL;
  }
  pthread_t thread_;
  bool isStarted;
  bool isJoined;
 }; // class IThread
 } // namespace limonp
 #endif // LIMONP_THREAD_HPP
--- a/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/ThreadPool.hpp
+++ b/funasr/runtime/onnxruntime/third_party/jieba/include/limonp/ThreadPool.hpp
@ -0,0 +1,86 @@
 #ifndef LIMONP_THREAD_POOL_HPP
 #define LIMONP_THREAD_POOL_HPP
 #include "Thread.hpp"
 #include "BlockingQueue.hpp"
 #include "BoundedBlockingQueue.hpp"
 #include "Closure.hpp"
 namespace limonp {
 using namespace std;
 //class ThreadPool;
 class ThreadPool: NonCopyable {
 public:
  class Worker: public IThread {
   public:
    Worker(ThreadPool* pool): ptThreadPool_(pool) {
      assert(ptThreadPool_);
    }
    virtual ~Worker() {
    }
    virtual void Run() {
      while (true) {
        ClosureInterface* closure = ptThreadPool_->queue_.Pop();
        if (closure == NULL) {
          break;
        }
        try {
          closure->Run();
        } catch(std::exception& e) {
          XLOG(ERROR) << e.what();
        } catch(...) {
          XLOG(ERROR) << " unknown exception.";
        }
        delete closure;
      }
    }
   private:
    ThreadPool * ptThreadPool_;
  }; // class Worker
  ThreadPool(size_t thread_num)
    : threads_(thread_num), 
      queue_(thread_num) {
    assert(thread_num);
    for(size_t i = 0; i < threads_.size(); i ++) {
      threads_[i] = new Worker(this);
    }
  }
  ~ThreadPool() {
    Stop();
  }
  void Start() {
    for(size_t i = 0; i < threads_.size(); i++) {
      threads_[i]->Start();
    }
  }
  void Stop() {
    for(size_t i = 0; i < threads_.size(); i ++) {
      queue_.Push(NULL);
    }
    for(size_t i = 0; i < threads_.size(); i ++) {
      threads_[i]->Join();
      delete threads_[i];
    }
    threads_.clear();
  }
  void Add(ClosureInterface* task) {
    assert(task);
    queue_.Push(task);
  }
 private:
  friend class Worker;
  vector<IThread*> threads_;
  BoundedBlockingQueue<ClosureInterface*> queue_;
 }; // class ThreadPool
 } // namespace limonp
 #endif // LIMONP_THREAD_POOL_HPP
--- a/funasr/runtime/websocket/CMakeLists.txt
+++ b/funasr/runtime/websocket/CMakeLists.txt
@ -111,6 +111,8 @@ endif()
 include_directories(${PROJECT_SOURCE_DIR}/../onnxruntime/include/)
 include_directories(${PROJECT_SOURCE_DIR}/../onnxruntime/third_party/yaml-cpp/include/)
 include_directories(${PROJECT_SOURCE_DIR}/../onnxruntime/third_party/kaldi-native-fbank)
 include_directories(${PROJECT_SOURCE_DIR}/../onnxruntime/third_party/jieba/include)
 include_directories(${PROJECT_SOURCE_DIR}/../onnxruntime/third_party/jieba/include/limonp/include)
 add_subdirectory(${PROJECT_SOURCE_DIR}/../onnxruntime/third_party/yaml-cpp yaml-cpp)
 add_subdirectory(${PROJECT_SOURCE_DIR}/../onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc csrc)