mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
* add hotword for deploy_tools * Support wfst decoder and contextual biasing (#1039) * Support wfst decoder and contextual biasing * Turn on fstbin compilation --------- Co-authored-by: gongbo.gb <gongbo.gb@alibaba-inc.com> * mv funasr/runtime runtime * Fix crash caused by OOV in hotwords list * funasr infer * funasr infer * funasr infer * funasr infer * funasr infer * fix some bugs about fst hotword; support wfst for websocket server and clients; mv runtime out of funasr; modify relative docs * del onnxruntime/include/gflags * update tensor.h * update run_server.sh * update deploy tools * update deploy tools * update websocket-server * update funasr-wss-server * Remove self loop propagation * Update websocket_protocol_zh.md * Update websocket_protocol_zh.md * update hotword protocol * author zhaomingwork: change hotwords for h5 and java * update hotword protocol * catch exception for json_fst_hws * update hotword on message * update onnx benchmark for ngram&hotword * update docs * update funasr-wss-serve * add NONE for LM_DIR * update docs * update run_server.sh * add whats-new * modify whats-new * update whats-new * update whats-new * Support decoder option for beam searching * update benchmark_onnx_cpp * Support decoder option for websocket * fix bug of CompileHotwordEmbedding * update html client * update docs --------- Co-authored-by: gongbo.gb <35997837+aibulamusi@users.noreply.github.com> Co-authored-by: gongbo.gb <gongbo.gb@alibaba-inc.com> Co-authored-by: 游雁 <zhifu.gzf@alibaba-inc.com>
89 lines
3.2 KiB
C++
89 lines
3.2 KiB
C++
// lmbin/arpa-to-const-arpa.cc
|
|
|
|
// Copyright 2014 Guoguo Chen
|
|
|
|
// See ../../COPYING for clarification regarding multiple authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
|
// MERCHANTABILITY OR NON-INFRINGEMENT.
|
|
// See the Apache 2 License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <string>
|
|
|
|
#include "lm/const-arpa-lm.h"
|
|
#include "util/parse-options.h"
|
|
|
|
int main(int argc, char *argv[]) {
|
|
using namespace kaldi;
|
|
typedef kaldi::int32 int32;
|
|
try {
|
|
const char *usage =
|
|
"Converts an Arpa format language model into ConstArpaLm format,\n"
|
|
"which is an in-memory representation of the pre-built Arpa language\n"
|
|
"model. The output language model can then be read in by a program\n"
|
|
"that wants to rescore lattices. We assume that the words in the\n"
|
|
"input arpa language model has been converted to integers.\n"
|
|
"\n"
|
|
"The program is used jointly with utils/map_arpa_lm.pl to build\n"
|
|
"ConstArpaLm format language model. We first map the words in an Arpa\n"
|
|
"format language model to integers using utils/map_arpa_m.pl, and\n"
|
|
"then use this program to build a ConstArpaLm format language model.\n"
|
|
"\n"
|
|
"Usage: arpa-to-const-arpa [opts] <input-arpa> <const-arpa>\n"
|
|
" e.g.: arpa-to-const-arpa --bos-symbol=1 --eos-symbol=2 \\\n"
|
|
" arpa.txt const_arpa";
|
|
|
|
kaldi::ParseOptions po(usage);
|
|
|
|
ArpaParseOptions options;
|
|
options.Register(&po);
|
|
|
|
// Ideally, these registrations would be in ArpaParseOptions, but some
|
|
// programs want integers and other want symbols, so we register them
|
|
// outside instead.
|
|
po.Register("unk-symbol", &options.unk_symbol,
|
|
"Integer corresponds to unknown-word in language model. -1 if "
|
|
"no such word is provided.");
|
|
po.Register("bos-symbol", &options.bos_symbol,
|
|
"Integer corresponds to <s>. You must set this to your actual "
|
|
"BOS integer.");
|
|
po.Register("eos-symbol", &options.eos_symbol,
|
|
"Integer corresponds to </s>. You must set this to your actual "
|
|
"EOS integer.");
|
|
|
|
po.Read(argc, argv);
|
|
|
|
if (po.NumArgs() != 2) {
|
|
po.PrintUsage();
|
|
exit(1);
|
|
}
|
|
|
|
if (options.bos_symbol == -1 || options.eos_symbol == -1) {
|
|
KALDI_ERR << "Please set --bos-symbol and --eos-symbol.";
|
|
exit(1);
|
|
}
|
|
|
|
std::string arpa_rxfilename = po.GetArg(1),
|
|
const_arpa_wxfilename = po.GetOptArg(2);
|
|
|
|
bool ans = BuildConstArpaLm(options, arpa_rxfilename,
|
|
const_arpa_wxfilename);
|
|
if (ans)
|
|
return 0;
|
|
else
|
|
return 1;
|
|
} catch(const std::exception &e) {
|
|
std::cerr << e.what() << '\n';
|
|
return -1;
|
|
}
|
|
}
|