diff --git a/fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py b/fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py index e8ae25307..4ceb80615 100755 --- a/fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py +++ b/fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py @@ -28,6 +28,7 @@ from fun_text_processing.inverse_text_normalization.zh.graph_utils import ( GraphFst, delete_extra_space, delete_space, + insert_space, generator_main, ) from pynini.lib import pynutil @@ -94,10 +95,10 @@ class ClassifyFst(GraphFst): punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=1.1) + pynutil.insert(" }") token = pynutil.insert("tokens { ") + classify + pynutil.insert(" }") token_plus_punct = ( - pynini.closure(punct + pynutil.insert(" ")) + token + pynini.closure(pynutil.insert(" ") + punct) + pynini.closure(punct + insert_space) + token + pynini.closure(insert_space + punct) ) - graph = token_plus_punct + pynini.closure(delete_extra_space + token_plus_punct) + graph = token_plus_punct + pynini.closure(insert_space + token_plus_punct) graph = delete_space + graph + delete_space self.fst = graph.optimize() diff --git a/fun_text_processing/inverse_text_normalization/zh/taggers/word.py b/fun_text_processing/inverse_text_normalization/zh/taggers/word.py index 315b2d959..51a83db89 100755 --- a/fun_text_processing/inverse_text_normalization/zh/taggers/word.py +++ b/fun_text_processing/inverse_text_normalization/zh/taggers/word.py @@ -11,7 +11,7 @@ # limitations under the License. import pynini -from fun_text_processing.inverse_text_normalization.zh.graph_utils import DAMO_NOT_SPACE, GraphFst +from fun_text_processing.inverse_text_normalization.zh.graph_utils import DAMO_NOT_SPACE, DAMO_CHAR, GraphFst from pynini.lib import pynutil @@ -23,5 +23,5 @@ class WordFst(GraphFst): def __init__(self): super().__init__(name="word", kind="classify") - word = pynutil.insert("name: \"") + pynini.closure(DAMO_NOT_SPACE, 1) + pynutil.insert("\"") + word = pynutil.insert("name: \"") + DAMO_NOT_SPACE + pynutil.insert("\"") self.fst = word.optimize() diff --git a/fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py b/fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py index 005e148fa..25133e9c4 100755 --- a/fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py +++ b/fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py @@ -37,5 +37,5 @@ class VerbalizeFinalFst(GraphFst): + delete_space + pynutil.delete("}") ) - graph = delete_space + pynini.closure(graph + delete_extra_space) + graph + delete_space + graph = delete_space + pynini.closure(graph + delete_space) + graph + delete_space self.fst = graph diff --git a/setup.py b/setup.py index e17c6ae27..05db7793d 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ setup( long_description=open(os.path.join(dirname, "README.md"), encoding="utf-8").read(), long_description_content_type="text/markdown", license="The MIT License", - packages=find_packages(include=["funasr*"]), + packages=find_packages(include=["funasr*", "fun_text_processing*"]), package_data={"funasr": ["version.txt"]}, install_requires=install_requires, setup_requires=setup_requires,