mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
* c++ runtime adapt to 1.0 (#1724) * adapt vad runtime to 1.0 * add json * change yml name * add func LoadVocabFromJson * add token file for InitAsr * add token path for OfflineStream * add funcOpenYaml * add token file for InitPunc * add token file for stream * update punc-model * update funasr-wss-server * update runtime_sdk_download_tool.py * update docker list * Delete docs/images/wechat.png * Add files via upload * Emo2Vec限定选择的情感类别 (#1730) * 限定选择的情感类别 * 使用none来禁用情感标签输出 * 修改输出接口 * 使用unuse来禁用token --------- Co-authored-by: 常材 <gaochangfeng.gcf@alibaba-inc.com> * bugfix * v1.0.27 * update docs * hf hub * Fix incorrect assignment of 'end' attribute to 'start' in sentences list comprehension (#1680) --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: gaochangfeng <54253717+gaochangfeng@users.noreply.github.com> Co-authored-by: 常材 <gaochangfeng.gcf@alibaba-inc.com> Co-authored-by: nsdou <168500039+nsdou@users.noreply.github.com>
50 lines
1.5 KiB
C++
50 lines
1.5 KiB
C++
/**
|
|
* Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
|
|
* MIT License (https://opensource.org/licenses/MIT)
|
|
*/
|
|
|
|
#pragma once
|
|
#include <yaml-cpp/yaml.h>
|
|
#include "cppjieba/DictTrie.hpp"
|
|
#include "cppjieba/HMMModel.hpp"
|
|
#include "cppjieba/Jieba.hpp"
|
|
#include "nlohmann/json.hpp"
|
|
|
|
namespace funasr {
|
|
class CTokenizer {
|
|
private:
|
|
|
|
bool m_ready = false;
|
|
vector<string> m_id2token,m_id2punc;
|
|
map<string, int> m_token2id,m_punc2id;
|
|
|
|
cppjieba::DictTrie *jieba_dict_trie_=nullptr;
|
|
cppjieba::HMMModel *jieba_model_=nullptr;
|
|
cppjieba::Jieba jieba_processor_;
|
|
|
|
public:
|
|
|
|
CTokenizer(const char* sz_yamlfile);
|
|
CTokenizer();
|
|
~CTokenizer();
|
|
bool OpenYaml(const char* sz_yamlfile);
|
|
bool OpenYaml(const char* sz_yamlfile, const char* token_file);
|
|
void ReadYaml(const YAML::Node& node);
|
|
vector<string> Id2String(vector<int> input);
|
|
vector<int> String2Ids(vector<string> input);
|
|
int String2Id(string input);
|
|
vector<string> Id2Punc(vector<int> input);
|
|
string Id2Punc(int n_punc_id);
|
|
vector<int> Punc2Ids(vector<string> input);
|
|
vector<string> SplitChineseString(const string& str_info);
|
|
vector<string> SplitChineseJieba(const string& str_info);
|
|
void StrSplit(const string& str, const char split, vector<string>& res);
|
|
void Tokenize(const char* str_info, vector<string>& str_out, vector<int>& id_out);
|
|
bool IsPunc(string& Punc);
|
|
bool seg_jieba = false;
|
|
void SetJiebaRes(cppjieba::DictTrie *dict, cppjieba::HMMModel *hmm);
|
|
void JiebaInit(std::string punc_config);
|
|
};
|
|
|
|
} // namespace funasr
|