mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
* add hotword for deploy_tools * Support wfst decoder and contextual biasing (#1039) * Support wfst decoder and contextual biasing * Turn on fstbin compilation --------- Co-authored-by: gongbo.gb <gongbo.gb@alibaba-inc.com> * mv funasr/runtime runtime * Fix crash caused by OOV in hotwords list * funasr infer * funasr infer * funasr infer * funasr infer * funasr infer * fix some bugs about fst hotword; support wfst for websocket server and clients; mv runtime out of funasr; modify relative docs * del onnxruntime/include/gflags * update tensor.h * update run_server.sh * update deploy tools * update deploy tools * update websocket-server * update funasr-wss-server * Remove self loop propagation * Update websocket_protocol_zh.md * Update websocket_protocol_zh.md * update hotword protocol * author zhaomingwork: change hotwords for h5 and java * update hotword protocol * catch exception for json_fst_hws * update hotword on message * update onnx benchmark for ngram&hotword * update docs * update funasr-wss-serve * add NONE for LM_DIR * update docs * update run_server.sh * add whats-new * modify whats-new * update whats-new * update whats-new * Support decoder option for beam searching * update benchmark_onnx_cpp * Support decoder option for websocket * fix bug of CompileHotwordEmbedding * update html client * update docs --------- Co-authored-by: gongbo.gb <35997837+aibulamusi@users.noreply.github.com> Co-authored-by: gongbo.gb <gongbo.gb@alibaba-inc.com> Co-authored-by: 游雁 <zhifu.gzf@alibaba-inc.com>
61 lines
1.8 KiB
Python
61 lines
1.8 KiB
Python
import numpy as np
|
|
|
|
|
|
def _levenshtein_distance(ref, hyp):
|
|
"""Levenshtein distance is a string metric for measuring the difference
|
|
between two sequences. Informally, the levenshtein disctance is defined as
|
|
the minimum number of single-character edits (substitutions, insertions or
|
|
deletions) required to change one word into the other. We can naturally
|
|
extend the edits to word level when calculate levenshtein disctance for
|
|
two sentences.
|
|
"""
|
|
m = len(ref)
|
|
n = len(hyp)
|
|
|
|
# special case
|
|
if ref == hyp:
|
|
return 0
|
|
if m == 0:
|
|
return n
|
|
if n == 0:
|
|
return m
|
|
|
|
if m < n:
|
|
ref, hyp = hyp, ref
|
|
m, n = n, m
|
|
|
|
# use O(min(m, n)) space
|
|
distance = np.zeros((2, n + 1), dtype=np.int32)
|
|
|
|
# initialize distance matrix
|
|
for j in range(n + 1):
|
|
distance[0][j] = j
|
|
|
|
# calculate levenshtein distance
|
|
for i in range(1, m + 1):
|
|
prev_row_idx = (i - 1) % 2
|
|
cur_row_idx = i % 2
|
|
distance[cur_row_idx][0] = i
|
|
for j in range(1, n + 1):
|
|
if ref[i - 1] == hyp[j - 1]:
|
|
distance[cur_row_idx][j] = distance[prev_row_idx][j - 1]
|
|
else:
|
|
s_num = distance[prev_row_idx][j - 1] + 1
|
|
i_num = distance[cur_row_idx][j - 1] + 1
|
|
d_num = distance[prev_row_idx][j] + 1
|
|
distance[cur_row_idx][j] = min(s_num, i_num, d_num)
|
|
|
|
return distance[m % 2][n]
|
|
|
|
|
|
def cal_cer(references, predictions):
|
|
errors = 0
|
|
lengths = 0
|
|
for ref, pred in zip(references, predictions):
|
|
cur_ref = list(ref)
|
|
cur_hyp = list(pred)
|
|
cur_error = _levenshtein_distance(cur_ref, cur_hyp)
|
|
errors += cur_error
|
|
lengths += len(cur_ref)
|
|
return float(errors) / lengths
|