mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Merge pull request #167 from alibaba-damo-academy/dev_lhn
fix text postprocess bug
This commit is contained in:
commit
5d4b0c3994
@ -6,7 +6,7 @@ from typing import Any, List, Union
|
|||||||
|
|
||||||
|
|
||||||
def isChinese(ch: str):
|
def isChinese(ch: str):
|
||||||
if '\u4e00' <= ch <= '\u9fff' or '\u0030' <= ch <= '\u0039':
|
if '\u4e00' <= ch <= '\u9fff' or '\u0030' <= ch <= '\u0039' or ch == '@':
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -17,6 +17,8 @@ def isAllChinese(word: Union[List[Any], str]):
|
|||||||
cur = i.replace(' ', '')
|
cur = i.replace(' ', '')
|
||||||
cur = cur.replace('</s>', '')
|
cur = cur.replace('</s>', '')
|
||||||
cur = cur.replace('<s>', '')
|
cur = cur.replace('<s>', '')
|
||||||
|
cur = cur.replace('<unk>', '')
|
||||||
|
cur = cur.replace('<OOV>', '')
|
||||||
word_lists.append(cur)
|
word_lists.append(cur)
|
||||||
|
|
||||||
if len(word_lists) == 0:
|
if len(word_lists) == 0:
|
||||||
@ -34,6 +36,8 @@ def isAllAlpha(word: Union[List[Any], str]):
|
|||||||
cur = i.replace(' ', '')
|
cur = i.replace(' ', '')
|
||||||
cur = cur.replace('</s>', '')
|
cur = cur.replace('</s>', '')
|
||||||
cur = cur.replace('<s>', '')
|
cur = cur.replace('<s>', '')
|
||||||
|
cur = cur.replace('<unk>', '')
|
||||||
|
cur = cur.replace('<OOV>', '')
|
||||||
word_lists.append(cur)
|
word_lists.append(cur)
|
||||||
|
|
||||||
if len(word_lists) == 0:
|
if len(word_lists) == 0:
|
||||||
@ -144,7 +148,7 @@ def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
|
|||||||
else:
|
else:
|
||||||
word = i.decode('utf-8')
|
word = i.decode('utf-8')
|
||||||
|
|
||||||
if word in ['<s>', '</s>', '<unk>']:
|
if word in ['<s>', '</s>', '<unk>', '<OOV>']:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
middle_lists.append(word)
|
middle_lists.append(word)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user