mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
commit
b78de3a56d
@ -41,8 +41,6 @@ from funasr.utils.types import str_or_none
|
||||
from funasr.utils import asr_utils, wav_utils, postprocess_utils
|
||||
import pdb
|
||||
|
||||
header_colors = '\033[95m'
|
||||
end_colors = '\033[0m'
|
||||
|
||||
global_asr_language: str = 'zh-cn'
|
||||
global_sample_rate: Union[int, Dict[Any, int]] = {
|
||||
|
||||
@ -47,15 +47,11 @@ def forward_segment(text, dic):
|
||||
|
||||
def seg_tokenize(txt, seg_dict):
|
||||
out_txt = ""
|
||||
pattern = re.compile(r"([\u4E00-\u9FA5A-Za-z0-9])")
|
||||
for word in txt:
|
||||
if pattern.match(word):
|
||||
if word in seg_dict:
|
||||
out_txt += seg_dict[word] + " "
|
||||
else:
|
||||
out_txt += "<unk>" + " "
|
||||
if word in seg_dict:
|
||||
out_txt += seg_dict[word] + " "
|
||||
else:
|
||||
continue
|
||||
out_txt += "<unk>" + " "
|
||||
return out_txt.strip().split()
|
||||
|
||||
def seg_tokenize_wo_pattern(txt, seg_dict):
|
||||
|
||||
@ -452,7 +452,7 @@ class TestUniasrInferencePipelines(unittest.TestCase):
|
||||
def test_uniasr_2pass_zhcn_16k_common_vocab8358_offline(self):
|
||||
inference_pipeline = pipeline(
|
||||
task=Tasks.auto_speech_recognition,
|
||||
model='damo/speech_UniASauto_speech_recognitionR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline')
|
||||
model='damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline')
|
||||
rec_result = inference_pipeline(
|
||||
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
|
||||
param_dict={"decoding_model": "offline"})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user