From 14a3da36432a89be094757677e6e7dc093fdf359 Mon Sep 17 00:00:00 2001
From: Chong Zhang <iriszhangchong@gmail.com>
Date: Mon, 27 Mar 2023 17:23:34 +0800
Subject: [PATCH 1/7] Update postprocess_utils.py

add Burmese characters
---
 funasr/utils/postprocess_utils.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
index b607e1da0..e3d8d6b47 100644
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@@ -51,6 +51,26 @@ def isAllAlpha(word: Union[List[Any], str]):
 
     return True
 
+def isMy(word: Union[List[Any], str]):
+    my_char = ['စေ', 'ကို', 'ဖြစ်', 'ဌ', 'ေါ', 'ရင်း', 'w', 'ပုံ', 'ပတ်', 'လာ', 'စီး', 'ဘက်', 'က်', 'ုံ', 'ဏာ', 'ဖော်', 'အတွင်း', 'r', '၀', 'တို', 'ပြန်', 'ီး', 'h', 'ဖြ', 'က္ခ', 'မ္', 'အထိ', 'ဆွ', 'ပေး', 'တူ', '၈', 'မ်း', 'ဠ', 'ထို', 'စွ', 'ထားသည်', 'အခြေ', 'စာ', 'တို့သည်', 'အက', 'ရဲ့', 'ွ', 'င', 'o', 'ုတ်', 'လွ', 'ပင်', 'နိုင်ငံ', 'ပါတယ်', 'ကား', 'အဖွဲ့', 'အခြား', 'ယ့်', 'ပို', 'ည်း', 'ယာ', 'ဆုံး', '၄', 'ကြောင်း', 'တပ်', 'အနေ', 'ဣ', '၏', 'ိုး', 'ချုပ်', 'နေ', 'စ', 'ဟ', 'လွှ', 'အဆ', '၌', 'ဣ', 'နိုင်', 'တည်', 'တွ', 'အချိန်', 'ပဲ', 'ဝင်', 'ဒီ', 'သူ', 'l', 'ဏ', 'ဲ့', 'အထ', 'ပည', 'စိတ်', 'ကြသည်', 'ဩ', 's', 't', 'သေ', 'လျ', 'ိုက်', 'များသည်', 'ငါ', 'ို', 'ထဲ', 'လာ', 'ဝန်', 'ဓ', 'ခဲ့', 'စွာ', 'မ', 'နှင့်', '၎', 'အစိုး', 'ရာ', '၉', 'တယ်', '၎င်း', '၅', 'ပညာ', 'ကြီး', 'သို့မဟုတ်', '၍', 'ို', 'မူ', 'f', 'ခု', 'ိမ', 'c', 'ုပ်', 'l', 'အမ', 'နောက်', 'သော', 'ုန်း', 'ှ', 'ကြ', 'တ', 'ဌာ', 'p', 'ပေါ်', 'h', 'ပင်', 'ဲ', 'ဒီ', 'ဈ', 'လက္ခ', 'r', 'ပြ', 'ဒေ', 'မှ', 'ရှ', 'လျှ', 'လေ့', 'ရောဂါ', 'ော်', 'လည်', 'ဖွဲ့', 'မ်', 'သိ', 'ထုတ်', 'ရိ', 'အား', 'ာ', 'လ', 'ုပ', 'ကျော်', 'အေ', 'g', 'နာ', 'ရီ', 'ရာ', 'v', 'သော', 'လူ', 'တာ', 'ီး', 'j', 'ကာ', 'ရွာ', 'မျက်နှ', 'ယ', 'q', 'ပ်', 'ဌ', 'ဒ', 'ဝ', 'အခြ', 'd', 'ဍ', 'လှ', 'သည်', 'မြန်မာ', 'ယ်', 'ဖ', 'ဦ', 'ါ', 'ဲ့', 'ပျ', 'ရ', 'မိ', 'ပြီး', 'ကို', 'လည်း', 'ဇ', 'မြ', 'နွေး', 'ဘ', 'အသုံးပြု', 'ော', 'ချ', 'မွ', 'လဲ', 'န့်', 'ဂ', 'ည်', 'ကန်', 'က', 'ဗ', 'ေး', 'လု', 'တီ', 'မြို့', 'ိတ်', 'ဘ', 'အရေး', 'ုပ်', 'p', 'ဖ', 'င်', 'သွား', 'တိုင်း', '၃', 'ဿ', 'စေ', 'ဖြတ်', 'ဖွ', 'k', 'သူ', 'တစ်', 'ြ', 'စက်', 'ကြီး', 'ပြည်နယ်', 'ဝါ', 'ဘူး', 'ထ', 'ငြ', 'တော်', 'ကျ', 'ကိ', 'ဈ', 'i', 'အဲ', 'o', 'ေ', 'b', 'င်္', 'ဒါ', 'ညီ', 'w', 'ငွ', 'သ', 'မှတ်', 'ြ', 'ခြား', 'ကြောင့်', 'နာ', 'မှာ', 'f', 'ပွ', 'ကျွန်ုပ်', '၁၀', 'ခေါ', 'ယ်', '၊', 'ှ', 'အဓ', 'နိုင်', 'သက်', 'ပေး', 'a', 'ကျွန်', 'd', 'ထွ', 't', 'n', 'ဠ', 'အရာ', 'ခွ', 'ထ', 'ိုင်', 'ည့်', 'ိမ်', 'သည်', 'တွေ', 'အချ', 'ကား', 'ဗ', 'သုံး', 'အ', 'သူများ', 'ိုက်', 'အမျိုး', '၇', 'စား', 'ဪ', 'တဲ့', 'များ', 's', 'ေ', 'ယ', 'အဓိ', 'နိုင်သည်', 'ဎ', '္', 'ခ', 'စည်း', '၂', 'န်', 'ရ', 'ခရ', 'နည်း', 'အကြ', 'န်', 'တိ', 'န', 'ပြော', 'မှတ်', 'ောင်း', 'န်း', 'ရေး', 'ဆို', 'ူး', 'ရောက်', 'ထို့', 'ည်', 'ပြန်', 'ဒေ', 'စစ်', 'ဟာ', 'ဏ', 'ပြင်', 'ဆိုင်ရာ', 'z', 'ခုနှစ်', 'နဲ့', 'သ', 'စ္', 'ော', 'c', 'လုပ်', 'မျိုး', 'ကေ', 'ဘာ', 'များ', 'ိတ်', 'စား', 'တို', 'ယား', 'တာ', 'q', 'k', 'ဎ', 'င်း', 'စ်', 'အားလုံး', 'အခ', 'အ', 'အသ', 'ချက်', 'ဆက်', 'ည်း', 'ို့', 'လုပ်', 'ပွဲ', 'ကု', 'စပ်', 'အန', 'ပိုင်း', 'm', 'ဖို့', 'ဃ', 'ု', 'တင်', 'ပ္', 'ပြင်', 'း', 'နယ်', 'm', 'ား', 'အနေ', 'အတွက်', 'င့်', 'ရှိ', 'ခြ', '၄', 'v', 'မဟ', 'က်', 'လေး', 'တိုက်', 'ံ', 'သမ', 'ိုင်', '၏', 'j', 'ကြား', 'ကောင်း', 'ဦး', 'တစ်ခု', 'ထုတ်', 'ကု', 'u', 'မည်', 'ရွ', 'မင်း', 'ပ', 'စ်', 'ဆိုင်', 'ဆက်', 'တွင်', 'မြို့နယ်', 'စု', 'ဟ', 'တစ်ဦး', 'လက်', 'ုတ်', 'သူတို့', '်', 'သာ', 'ဩ', 'မာ', 'ယူ', 'ဤ', '့', 'မန', 'ရောဂ', 'သွ', 'ဝင်', 'အတ', 'ရက်', 'မျက်', 'ထား', '၁', 'တ်', 'တို့', 'ဤ', 'နေ့', 'ရင်', '…', 'ထား', 'ဧ', 'ပါး', 'မာ', 'သား', 'ဆောင်', 'မှု', 'ဂ', 'င', 'အား', 'ဇ', 'ောက်', 'သိ', 'ူ', 'စ', '်', 'အတွ', 'e', 'ဉ', 'ဆို', 'ည', 'သည့်', 'က', 'ဖြစ်', 'တရား', 'ရေ', 'ရပ်', 'ပါ', 'ကူး', 'ကမ္', 'သား', 'ကျ', 'မျိုး', 'ခဲ့', 'ောင်', 'ျ', 'ို့', 'ချ', 'အစိုးရ', 'သတ', 'ပြု', 'ကျွ', 'အရ', 'ိုလ', 'ပြီး', 'လုံး', 'လို', 'z', 'ောက်', 'ဥ', 'တမ်း', 'တရ', 'ကျွန်ုပ်တို့', 'နှစ်', 'ိန်', 'ခံ', 'ကာ', 'ဥပ', 'အသုံး', 'တော်', 'ူး', 'ဘာ', 'ပါ', 'ိပ်', 'ား', 'တ', 'နွ', '္တ', 'ဝ', 'လို့', 'ေ့', 'န္', 'e', 'ေ့', 'စီး', 'y', 'ပြား', 'ပိုး', 'အရ', 'အဖြစ်', 'g', 'ဓာ', 'ပြ', 'တစ်', 'မှ', 'ဖွဲ့', '၍', 'ခြင်း', 'ုံး', 'ဆင်', 'ွန်', 'အလ', 'တော့', 'မို', 'လ', 'စာ', 'ဿ', 'အမြ', 'တင်', 'အကျ', 'ဲ', 'ူ', 'အုပ်', 'y', 'u', 'ဒါ', 'ရော', 'ပို', 'လိုအ', 'a', 'ိ', 'ဆ', '့', 'x', 'လို', '့်', 'ပြည်', 'ယူ', 'ဃ', 'ဆေး', 'ခံ', 'မွ', 'ဘဲ', 'ုံး', 'ော်', 'လိုက်', 'နေ', 'မျ', 'နိုင်င', 'ံ့', 'မှာ', 'နည်း', 'ရန်', 'လက္ခဏာ', 'ဥ', 'င့်', 'ပညာ', 'ပ်', 'အားဖြင့်', 'နှစ်', 'ဆွေး', 'ဖြစ်သည်', 'ဒ', 'ီ', 'နစ်', 'ကျင်', 'ဋ', 'အများ', 'ဉ', 'မ်း', 'န့်', 'ကွ', 'သို့', 'b', '၀', 'ခု', 'ပုံ', 'တော', 'အာ', 'ဖြင့်', 'ဧ', 'သွား', 'အခါ', 'မ', 'င်း', 'ာ', 'ဆ', 'i', 'ဓ', '၆', 'ကြော', 'ရိ', 'သြ', 'တွေ့', '၌', 'ထိ', 'က္', 'အစ', 'ကြ', 'ရွ', 'ု', 'ေး', 'ွ', 'န်း', 'း', 'ပ', 'ဋ', 'ဆာ', 'အောင်', 'မြို့', 'စိတ်', 'ျ', 'ပြင်ဆင်', 'ါ', 'မဟုတ်', 'ပြု', 'ကိုယ်', 'ရှိ', 'ည', 'ဆောင်', 'ဆွေးနွေး', 'င်', 'n', 'တ်', 'ိုင်း', 'စီ', 'လူ', 'ဍ', 'ဟု', 'ည့်', 'သို့', '္', '႓', 'ိုး', 'န', 'ရေ', 'မယ်', 'ခဲ့သည်', 'ုံ', 'ောင်း', 'ောင်', 'ဦး', 'ထိ', 'တို့', 'ိမ့်', 'x', 'နိုင်ငံ', '၊', 'အပြ', 'ံ', 'ထု', 'ရေး', 'စစ်', 'ီ', 'မှု', 'ရှင်', 'ဦ', 'ရှိသည်', 'ပေါ', 'ဂျ', 'အစား', 'မြန်', 'ခ', 'သာ', 'နှ', 'ပထ', 'ိ', 'သင်', '့်']
+
+    word_lists = []
+    for i in word:
+        cur = i.replace(' ', '')
+        cur = cur.replace('</s>', '')
+        cur = cur.replace('<s>', '')
+        word_lists.append(cur)
+
+    if len(word_lists) == 0:
+        return False
+
+    for ch in word_lists:
+        if ch.isalpha() is False and ch in my_char:
+            return True
+        elif ch.isalpha() is True and isChinese(ch) is True:
+            return False
+
+    return True
 
 # def abbr_dispose(words: List[Any]) -> List[Any]:
 def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]:
@@ -224,6 +244,17 @@ def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
                     end = time_stamp[i][1] 
                     ts_lists.append([begin, end])
                     begin = end
+             elif isMy(ch):
+                word_item += ch
+                word_lists.append(word_item)
+                word_lists.append(' ')
+                word_item = ''
+                alpha_blank = True
+                if time_stamp is not None:
+                    ts_flag = True
+                    end = time_stamp[i][1] 
+                    ts_lists.append([begin, end])
+                    begin = end
             else:
                 word_lists.append(ch)
 

From 3fe6147415a65313fdc856c94fb92a5eb65a63da Mon Sep 17 00:00:00 2001
From: "chong.zhang" <chong.zhang@alibaba-inc.com>
Date: Mon, 27 Mar 2023 17:42:35 +0800
Subject: [PATCH 2/7] update

---
 funasr/utils/postprocess_utils.py | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
index e3d8d6b47..b607e1da0 100644
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@@ -51,26 +51,6 @@ def isAllAlpha(word: Union[List[Any], str]):
 
     return True
 
-def isMy(word: Union[List[Any], str]):
-    my_char = ['စေ', 'ကို', 'ဖြစ်', 'ဌ', 'ေါ', 'ရင်း', 'w', 'ပုံ', 'ပတ်', 'လာ', 'စီး', 'ဘက်', 'က်', 'ုံ', 'ဏာ', 'ဖော်', 'အတွင်း', 'r', '၀', 'တို', 'ပြန်', 'ီး', 'h', 'ဖြ', 'က္ခ', 'မ္', 'အထိ', 'ဆွ', 'ပေး', 'တူ', '၈', 'မ်း', 'ဠ', 'ထို', 'စွ', 'ထားသည်', 'အခြေ', 'စာ', 'တို့သည်', 'အက', 'ရဲ့', 'ွ', 'င', 'o', 'ုတ်', 'လွ', 'ပင်', 'နိုင်ငံ', 'ပါတယ်', 'ကား', 'အဖွဲ့', 'အခြား', 'ယ့်', 'ပို', 'ည်း', 'ယာ', 'ဆုံး', '၄', 'ကြောင်း', 'တပ်', 'အနေ', 'ဣ', '၏', 'ိုး', 'ချုပ်', 'နေ', 'စ', 'ဟ', 'လွှ', 'အဆ', '၌', 'ဣ', 'နိုင်', 'တည်', 'တွ', 'အချိန်', 'ပဲ', 'ဝင်', 'ဒီ', 'သူ', 'l', 'ဏ', 'ဲ့', 'အထ', 'ပည', 'စိတ်', 'ကြသည်', 'ဩ', 's', 't', 'သေ', 'လျ', 'ိုက်', 'များသည်', 'ငါ', 'ို', 'ထဲ', 'လာ', 'ဝန်', 'ဓ', 'ခဲ့', 'စွာ', 'မ', 'နှင့်', '၎', 'အစိုး', 'ရာ', '၉', 'တယ်', '၎င်း', '၅', 'ပညာ', 'ကြီး', 'သို့မဟုတ်', '၍', 'ို', 'မူ', 'f', 'ခု', 'ိမ', 'c', 'ုပ်', 'l', 'အမ', 'နောက်', 'သော', 'ုန်း', 'ှ', 'ကြ', 'တ', 'ဌာ', 'p', 'ပေါ်', 'h', 'ပင်', 'ဲ', 'ဒီ', 'ဈ', 'လက္ခ', 'r', 'ပြ', 'ဒေ', 'မှ', 'ရှ', 'လျှ', 'လေ့', 'ရောဂါ', 'ော်', 'လည်', 'ဖွဲ့', 'မ်', 'သိ', 'ထုတ်', 'ရိ', 'အား', 'ာ', 'လ', 'ုပ', 'ကျော်', 'အေ', 'g', 'နာ', 'ရီ', 'ရာ', 'v', 'သော', 'လူ', 'တာ', 'ီး', 'j', 'ကာ', 'ရွာ', 'မျက်နှ', 'ယ', 'q', 'ပ်', 'ဌ', 'ဒ', 'ဝ', 'အခြ', 'd', 'ဍ', 'လှ', 'သည်', 'မြန်မာ', 'ယ်', 'ဖ', 'ဦ', 'ါ', 'ဲ့', 'ပျ', 'ရ', 'မိ', 'ပြီး', 'ကို', 'လည်း', 'ဇ', 'မြ', 'နွေး', 'ဘ', 'အသုံးပြု', 'ော', 'ချ', 'မွ', 'လဲ', 'န့်', 'ဂ', 'ည်', 'ကန်', 'က', 'ဗ', 'ေး', 'လု', 'တီ', 'မြို့', 'ိတ်', 'ဘ', 'အရေး', 'ုပ်', 'p', 'ဖ', 'င်', 'သွား', 'တိုင်း', '၃', 'ဿ', 'စေ', 'ဖြတ်', 'ဖွ', 'k', 'သူ', 'တစ်', 'ြ', 'စက်', 'ကြီး', 'ပြည်နယ်', 'ဝါ', 'ဘူး', 'ထ', 'ငြ', 'တော်', 'ကျ', 'ကိ', 'ဈ', 'i', 'အဲ', 'o', 'ေ', 'b', 'င်္', 'ဒါ', 'ညီ', 'w', 'ငွ', 'သ', 'မှတ်', 'ြ', 'ခြား', 'ကြောင့်', 'နာ', 'မှာ', 'f', 'ပွ', 'ကျွန်ုပ်', '၁၀', 'ခေါ', 'ယ်', '၊', 'ှ', 'အဓ', 'နိုင်', 'သက်', 'ပေး', 'a', 'ကျွန်', 'd', 'ထွ', 't', 'n', 'ဠ', 'အရာ', 'ခွ', 'ထ', 'ိုင်', 'ည့်', 'ိမ်', 'သည်', 'တွေ', 'အချ', 'ကား', 'ဗ', 'သုံး', 'အ', 'သူများ', 'ိုက်', 'အမျိုး', '၇', 'စား', 'ဪ', 'တဲ့', 'များ', 's', 'ေ', 'ယ', 'အဓိ', 'နိုင်သည်', 'ဎ', '္', 'ခ', 'စည်း', '၂', 'န်', 'ရ', 'ခရ', 'နည်း', 'အကြ', 'န်', 'တိ', 'န', 'ပြော', 'မှတ်', 'ောင်း', 'န်း', 'ရေး', 'ဆို', 'ူး', 'ရောက်', 'ထို့', 'ည်', 'ပြန်', 'ဒေ', 'စစ်', 'ဟာ', 'ဏ', 'ပြင်', 'ဆိုင်ရာ', 'z', 'ခုနှစ်', 'နဲ့', 'သ', 'စ္', 'ော', 'c', 'လုပ်', 'မျိုး', 'ကေ', 'ဘာ', 'များ', 'ိတ်', 'စား', 'တို', 'ယား', 'တာ', 'q', 'k', 'ဎ', 'င်း', 'စ်', 'အားလုံး', 'အခ', 'အ', 'အသ', 'ချက်', 'ဆက်', 'ည်း', 'ို့', 'လုပ်', 'ပွဲ', 'ကု', 'စပ်', 'အန', 'ပိုင်း', 'm', 'ဖို့', 'ဃ', 'ု', 'တင်', 'ပ္', 'ပြင်', 'း', 'နယ်', 'm', 'ား', 'အနေ', 'အတွက်', 'င့်', 'ရှိ', 'ခြ', '၄', 'v', 'မဟ', 'က်', 'လေး', 'တိုက်', 'ံ', 'သမ', 'ိုင်', '၏', 'j', 'ကြား', 'ကောင်း', 'ဦး', 'တစ်ခု', 'ထုတ်', 'ကု', 'u', 'မည်', 'ရွ', 'မင်း', 'ပ', 'စ်', 'ဆိုင်', 'ဆက်', 'တွင်', 'မြို့နယ်', 'စု', 'ဟ', 'တစ်ဦး', 'လက်', 'ုတ်', 'သူတို့', '်', 'သာ', 'ဩ', 'မာ', 'ယူ', 'ဤ', '့', 'မန', 'ရောဂ', 'သွ', 'ဝင်', 'အတ', 'ရက်', 'မျက်', 'ထား', '၁', 'တ်', 'တို့', 'ဤ', 'နေ့', 'ရင်', '…', 'ထား', 'ဧ', 'ပါး', 'မာ', 'သား', 'ဆောင်', 'မှု', 'ဂ', 'င', 'အား', 'ဇ', 'ောက်', 'သိ', 'ူ', 'စ', '်', 'အတွ', 'e', 'ဉ', 'ဆို', 'ည', 'သည့်', 'က', 'ဖြစ်', 'တရား', 'ရေ', 'ရပ်', 'ပါ', 'ကူး', 'ကမ္', 'သား', 'ကျ', 'မျိုး', 'ခဲ့', 'ောင်', 'ျ', 'ို့', 'ချ', 'အစိုးရ', 'သတ', 'ပြု', 'ကျွ', 'အရ', 'ိုလ', 'ပြီး', 'လုံး', 'လို', 'z', 'ောက်', 'ဥ', 'တမ်း', 'တရ', 'ကျွန်ုပ်တို့', 'နှစ်', 'ိန်', 'ခံ', 'ကာ', 'ဥပ', 'အသုံး', 'တော်', 'ူး', 'ဘာ', 'ပါ', 'ိပ်', 'ား', 'တ', 'နွ', '္တ', 'ဝ', 'လို့', 'ေ့', 'န္', 'e', 'ေ့', 'စီး', 'y', 'ပြား', 'ပိုး', 'အရ', 'အဖြစ်', 'g', 'ဓာ', 'ပြ', 'တစ်', 'မှ', 'ဖွဲ့', '၍', 'ခြင်း', 'ုံး', 'ဆင်', 'ွန်', 'အလ', 'တော့', 'မို', 'လ', 'စာ', 'ဿ', 'အမြ', 'တင်', 'အကျ', 'ဲ', 'ူ', 'အုပ်', 'y', 'u', 'ဒါ', 'ရော', 'ပို', 'လိုအ', 'a', 'ိ', 'ဆ', '့', 'x', 'လို', '့်', 'ပြည်', 'ယူ', 'ဃ', 'ဆေး', 'ခံ', 'မွ', 'ဘဲ', 'ုံး', 'ော်', 'လိုက်', 'နေ', 'မျ', 'နိုင်င', 'ံ့', 'မှာ', 'နည်း', 'ရန်', 'လက္ခဏာ', 'ဥ', 'င့်', 'ပညာ', 'ပ်', 'အားဖြင့်', 'နှစ်', 'ဆွေး', 'ဖြစ်သည်', 'ဒ', 'ီ', 'နစ်', 'ကျင်', 'ဋ', 'အများ', 'ဉ', 'မ်း', 'န့်', 'ကွ', 'သို့', 'b', '၀', 'ခု', 'ပုံ', 'တော', 'အာ', 'ဖြင့်', 'ဧ', 'သွား', 'အခါ', 'မ', 'င်း', 'ာ', 'ဆ', 'i', 'ဓ', '၆', 'ကြော', 'ရိ', 'သြ', 'တွေ့', '၌', 'ထိ', 'က္', 'အစ', 'ကြ', 'ရွ', 'ု', 'ေး', 'ွ', 'န်း', 'း', 'ပ', 'ဋ', 'ဆာ', 'အောင်', 'မြို့', 'စိတ်', 'ျ', 'ပြင်ဆင်', 'ါ', 'မဟုတ်', 'ပြု', 'ကိုယ်', 'ရှိ', 'ည', 'ဆောင်', 'ဆွေးနွေး', 'င်', 'n', 'တ်', 'ိုင်း', 'စီ', 'လူ', 'ဍ', 'ဟု', 'ည့်', 'သို့', '္', '႓', 'ိုး', 'န', 'ရေ', 'မယ်', 'ခဲ့သည်', 'ုံ', 'ောင်း', 'ောင်', 'ဦး', 'ထိ', 'တို့', 'ိမ့်', 'x', 'နိုင်ငံ', '၊', 'အပြ', 'ံ', 'ထု', 'ရေး', 'စစ်', 'ီ', 'မှု', 'ရှင်', 'ဦ', 'ရှိသည်', 'ပေါ', 'ဂျ', 'အစား', 'မြန်', 'ခ', 'သာ', 'နှ', 'ပထ', 'ိ', 'သင်', '့်']
-
-    word_lists = []
-    for i in word:
-        cur = i.replace(' ', '')
-        cur = cur.replace('</s>', '')
-        cur = cur.replace('<s>', '')
-        word_lists.append(cur)
-
-    if len(word_lists) == 0:
-        return False
-
-    for ch in word_lists:
-        if ch.isalpha() is False and ch in my_char:
-            return True
-        elif ch.isalpha() is True and isChinese(ch) is True:
-            return False
-
-    return True
 
 # def abbr_dispose(words: List[Any]) -> List[Any]:
 def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]:
@@ -244,17 +224,6 @@ def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
                     end = time_stamp[i][1] 
                     ts_lists.append([begin, end])
                     begin = end
-             elif isMy(ch):
-                word_item += ch
-                word_lists.append(word_item)
-                word_lists.append(' ')
-                word_item = ''
-                alpha_blank = True
-                if time_stamp is not None:
-                    ts_flag = True
-                    end = time_stamp[i][1] 
-                    ts_lists.append([begin, end])
-                    begin = end
             else:
                 word_lists.append(ch)
 

From 5d7ccbfc012905b0fd067da337321b06f66ee3bc Mon Sep 17 00:00:00 2001
From: Chong Zhang <iriszhangchong@gmail.com>
Date: Mon, 27 Mar 2023 17:23:34 +0800
Subject: [PATCH 3/7] Update postprocess_utils.py

add Burmese characters
---
 funasr/utils/postprocess_utils.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
index b607e1da0..e3d8d6b47 100644
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@@ -51,6 +51,26 @@ def isAllAlpha(word: Union[List[Any], str]):
 
     return True
 
+def isMy(word: Union[List[Any], str]):
+    my_char = ['စေ', 'ကို', 'ဖြစ်', 'ဌ', 'ေါ', 'ရင်း', 'w', 'ပုံ', 'ပတ်', 'လာ', 'စီး', 'ဘက်', 'က်', 'ုံ', 'ဏာ', 'ဖော်', 'အတွင်း', 'r', '၀', 'တို', 'ပြန်', 'ီး', 'h', 'ဖြ', 'က္ခ', 'မ္', 'အထိ', 'ဆွ', 'ပေး', 'တူ', '၈', 'မ်း', 'ဠ', 'ထို', 'စွ', 'ထားသည်', 'အခြေ', 'စာ', 'တို့သည်', 'အက', 'ရဲ့', 'ွ', 'င', 'o', 'ုတ်', 'လွ', 'ပင်', 'နိုင်ငံ', 'ပါတယ်', 'ကား', 'အဖွဲ့', 'အခြား', 'ယ့်', 'ပို', 'ည်း', 'ယာ', 'ဆုံး', '၄', 'ကြောင်း', 'တပ်', 'အနေ', 'ဣ', '၏', 'ိုး', 'ချုပ်', 'နေ', 'စ', 'ဟ', 'လွှ', 'အဆ', '၌', 'ဣ', 'နိုင်', 'တည်', 'တွ', 'အချိန်', 'ပဲ', 'ဝင်', 'ဒီ', 'သူ', 'l', 'ဏ', 'ဲ့', 'အထ', 'ပည', 'စိတ်', 'ကြသည်', 'ဩ', 's', 't', 'သေ', 'လျ', 'ိုက်', 'များသည်', 'ငါ', 'ို', 'ထဲ', 'လာ', 'ဝန်', 'ဓ', 'ခဲ့', 'စွာ', 'မ', 'နှင့်', '၎', 'အစိုး', 'ရာ', '၉', 'တယ်', '၎င်း', '၅', 'ပညာ', 'ကြီး', 'သို့မဟုတ်', '၍', 'ို', 'မူ', 'f', 'ခု', 'ိမ', 'c', 'ုပ်', 'l', 'အမ', 'နောက်', 'သော', 'ုန်း', 'ှ', 'ကြ', 'တ', 'ဌာ', 'p', 'ပေါ်', 'h', 'ပင်', 'ဲ', 'ဒီ', 'ဈ', 'လက္ခ', 'r', 'ပြ', 'ဒေ', 'မှ', 'ရှ', 'လျှ', 'လေ့', 'ရောဂါ', 'ော်', 'လည်', 'ဖွဲ့', 'မ်', 'သိ', 'ထုတ်', 'ရိ', 'အား', 'ာ', 'လ', 'ုပ', 'ကျော်', 'အေ', 'g', 'နာ', 'ရီ', 'ရာ', 'v', 'သော', 'လူ', 'တာ', 'ီး', 'j', 'ကာ', 'ရွာ', 'မျက်နှ', 'ယ', 'q', 'ပ်', 'ဌ', 'ဒ', 'ဝ', 'အခြ', 'd', 'ဍ', 'လှ', 'သည်', 'မြန်မာ', 'ယ်', 'ဖ', 'ဦ', 'ါ', 'ဲ့', 'ပျ', 'ရ', 'မိ', 'ပြီး', 'ကို', 'လည်း', 'ဇ', 'မြ', 'နွေး', 'ဘ', 'အသုံးပြု', 'ော', 'ချ', 'မွ', 'လဲ', 'န့်', 'ဂ', 'ည်', 'ကန်', 'က', 'ဗ', 'ေး', 'လု', 'တီ', 'မြို့', 'ိတ်', 'ဘ', 'အရေး', 'ုပ်', 'p', 'ဖ', 'င်', 'သွား', 'တိုင်း', '၃', 'ဿ', 'စေ', 'ဖြတ်', 'ဖွ', 'k', 'သူ', 'တစ်', 'ြ', 'စက်', 'ကြီး', 'ပြည်နယ်', 'ဝါ', 'ဘူး', 'ထ', 'ငြ', 'တော်', 'ကျ', 'ကိ', 'ဈ', 'i', 'အဲ', 'o', 'ေ', 'b', 'င်္', 'ဒါ', 'ညီ', 'w', 'ငွ', 'သ', 'မှတ်', 'ြ', 'ခြား', 'ကြောင့်', 'နာ', 'မှာ', 'f', 'ပွ', 'ကျွန်ုပ်', '၁၀', 'ခေါ', 'ယ်', '၊', 'ှ', 'အဓ', 'နိုင်', 'သက်', 'ပေး', 'a', 'ကျွန်', 'd', 'ထွ', 't', 'n', 'ဠ', 'အရာ', 'ခွ', 'ထ', 'ိုင်', 'ည့်', 'ိမ်', 'သည်', 'တွေ', 'အချ', 'ကား', 'ဗ', 'သုံး', 'အ', 'သူများ', 'ိုက်', 'အမျိုး', '၇', 'စား', 'ဪ', 'တဲ့', 'များ', 's', 'ေ', 'ယ', 'အဓိ', 'နိုင်သည်', 'ဎ', '္', 'ခ', 'စည်း', '၂', 'န်', 'ရ', 'ခရ', 'နည်း', 'အကြ', 'န်', 'တိ', 'န', 'ပြော', 'မှတ်', 'ောင်း', 'န်း', 'ရေး', 'ဆို', 'ူး', 'ရောက်', 'ထို့', 'ည်', 'ပြန်', 'ဒေ', 'စစ်', 'ဟာ', 'ဏ', 'ပြင်', 'ဆိုင်ရာ', 'z', 'ခုနှစ်', 'နဲ့', 'သ', 'စ္', 'ော', 'c', 'လုပ်', 'မျိုး', 'ကေ', 'ဘာ', 'များ', 'ိတ်', 'စား', 'တို', 'ယား', 'တာ', 'q', 'k', 'ဎ', 'င်း', 'စ်', 'အားလုံး', 'အခ', 'အ', 'အသ', 'ချက်', 'ဆက်', 'ည်း', 'ို့', 'လုပ်', 'ပွဲ', 'ကု', 'စပ်', 'အန', 'ပိုင်း', 'm', 'ဖို့', 'ဃ', 'ု', 'တင်', 'ပ္', 'ပြင်', 'း', 'နယ်', 'm', 'ား', 'အနေ', 'အတွက်', 'င့်', 'ရှိ', 'ခြ', '၄', 'v', 'မဟ', 'က်', 'လေး', 'တိုက်', 'ံ', 'သမ', 'ိုင်', '၏', 'j', 'ကြား', 'ကောင်း', 'ဦး', 'တစ်ခု', 'ထုတ်', 'ကု', 'u', 'မည်', 'ရွ', 'မင်း', 'ပ', 'စ်', 'ဆိုင်', 'ဆက်', 'တွင်', 'မြို့နယ်', 'စု', 'ဟ', 'တစ်ဦး', 'လက်', 'ုတ်', 'သူတို့', '်', 'သာ', 'ဩ', 'မာ', 'ယူ', 'ဤ', '့', 'မန', 'ရောဂ', 'သွ', 'ဝင်', 'အတ', 'ရက်', 'မျက်', 'ထား', '၁', 'တ်', 'တို့', 'ဤ', 'နေ့', 'ရင်', '…', 'ထား', 'ဧ', 'ပါး', 'မာ', 'သား', 'ဆောင်', 'မှု', 'ဂ', 'င', 'အား', 'ဇ', 'ောက်', 'သိ', 'ူ', 'စ', '်', 'အတွ', 'e', 'ဉ', 'ဆို', 'ည', 'သည့်', 'က', 'ဖြစ်', 'တရား', 'ရေ', 'ရပ်', 'ပါ', 'ကူး', 'ကမ္', 'သား', 'ကျ', 'မျိုး', 'ခဲ့', 'ောင်', 'ျ', 'ို့', 'ချ', 'အစိုးရ', 'သတ', 'ပြု', 'ကျွ', 'အရ', 'ိုလ', 'ပြီး', 'လုံး', 'လို', 'z', 'ောက်', 'ဥ', 'တမ်း', 'တရ', 'ကျွန်ုပ်တို့', 'နှစ်', 'ိန်', 'ခံ', 'ကာ', 'ဥပ', 'အသုံး', 'တော်', 'ူး', 'ဘာ', 'ပါ', 'ိပ်', 'ား', 'တ', 'နွ', '္တ', 'ဝ', 'လို့', 'ေ့', 'န္', 'e', 'ေ့', 'စီး', 'y', 'ပြား', 'ပိုး', 'အရ', 'အဖြစ်', 'g', 'ဓာ', 'ပြ', 'တစ်', 'မှ', 'ဖွဲ့', '၍', 'ခြင်း', 'ုံး', 'ဆင်', 'ွန်', 'အလ', 'တော့', 'မို', 'လ', 'စာ', 'ဿ', 'အမြ', 'တင်', 'အကျ', 'ဲ', 'ူ', 'အုပ်', 'y', 'u', 'ဒါ', 'ရော', 'ပို', 'လိုအ', 'a', 'ိ', 'ဆ', '့', 'x', 'လို', '့်', 'ပြည်', 'ယူ', 'ဃ', 'ဆေး', 'ခံ', 'မွ', 'ဘဲ', 'ုံး', 'ော်', 'လိုက်', 'နေ', 'မျ', 'နိုင်င', 'ံ့', 'မှာ', 'နည်း', 'ရန်', 'လက္ခဏာ', 'ဥ', 'င့်', 'ပညာ', 'ပ်', 'အားဖြင့်', 'နှစ်', 'ဆွေး', 'ဖြစ်သည်', 'ဒ', 'ီ', 'နစ်', 'ကျင်', 'ဋ', 'အများ', 'ဉ', 'မ်း', 'န့်', 'ကွ', 'သို့', 'b', '၀', 'ခု', 'ပုံ', 'တော', 'အာ', 'ဖြင့်', 'ဧ', 'သွား', 'အခါ', 'မ', 'င်း', 'ာ', 'ဆ', 'i', 'ဓ', '၆', 'ကြော', 'ရိ', 'သြ', 'တွေ့', '၌', 'ထိ', 'က္', 'အစ', 'ကြ', 'ရွ', 'ု', 'ေး', 'ွ', 'န်း', 'း', 'ပ', 'ဋ', 'ဆာ', 'အောင်', 'မြို့', 'စိတ်', 'ျ', 'ပြင်ဆင်', 'ါ', 'မဟုတ်', 'ပြု', 'ကိုယ်', 'ရှိ', 'ည', 'ဆောင်', 'ဆွေးနွေး', 'င်', 'n', 'တ်', 'ိုင်း', 'စီ', 'လူ', 'ဍ', 'ဟု', 'ည့်', 'သို့', '္', '႓', 'ိုး', 'န', 'ရေ', 'မယ်', 'ခဲ့သည်', 'ုံ', 'ောင်း', 'ောင်', 'ဦး', 'ထိ', 'တို့', 'ိမ့်', 'x', 'နိုင်ငံ', '၊', 'အပြ', 'ံ', 'ထု', 'ရေး', 'စစ်', 'ီ', 'မှု', 'ရှင်', 'ဦ', 'ရှိသည်', 'ပေါ', 'ဂျ', 'အစား', 'မြန်', 'ခ', 'သာ', 'နှ', 'ပထ', 'ိ', 'သင်', '့်']
+
+    word_lists = []
+    for i in word:
+        cur = i.replace(' ', '')
+        cur = cur.replace('</s>', '')
+        cur = cur.replace('<s>', '')
+        word_lists.append(cur)
+
+    if len(word_lists) == 0:
+        return False
+
+    for ch in word_lists:
+        if ch.isalpha() is False and ch in my_char:
+            return True
+        elif ch.isalpha() is True and isChinese(ch) is True:
+            return False
+
+    return True
 
 # def abbr_dispose(words: List[Any]) -> List[Any]:
 def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]:
@@ -224,6 +244,17 @@ def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
                     end = time_stamp[i][1] 
                     ts_lists.append([begin, end])
                     begin = end
+             elif isMy(ch):
+                word_item += ch
+                word_lists.append(word_item)
+                word_lists.append(' ')
+                word_item = ''
+                alpha_blank = True
+                if time_stamp is not None:
+                    ts_flag = True
+                    end = time_stamp[i][1] 
+                    ts_lists.append([begin, end])
+                    begin = end
             else:
                 word_lists.append(ch)
 

From 09de0c653ebec3b802de807f9705802adcc46896 Mon Sep 17 00:00:00 2001
From: "chong.zhang" <chong.zhang@alibaba-inc.com>
Date: Mon, 27 Mar 2023 17:42:35 +0800
Subject: [PATCH 4/7] update

---
 funasr/utils/postprocess_utils.py | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
index e3d8d6b47..b607e1da0 100644
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@@ -51,26 +51,6 @@ def isAllAlpha(word: Union[List[Any], str]):
 
     return True
 
-def isMy(word: Union[List[Any], str]):
-    my_char = ['စေ', 'ကို', 'ဖြစ်', 'ဌ', 'ေါ', 'ရင်း', 'w', 'ပုံ', 'ပတ်', 'လာ', 'စီး', 'ဘက်', 'က်', 'ုံ', 'ဏာ', 'ဖော်', 'အတွင်း', 'r', '၀', 'တို', 'ပြန်', 'ီး', 'h', 'ဖြ', 'က္ခ', 'မ္', 'အထိ', 'ဆွ', 'ပေး', 'တူ', '၈', 'မ်း', 'ဠ', 'ထို', 'စွ', 'ထားသည်', 'အခြေ', 'စာ', 'တို့သည်', 'အက', 'ရဲ့', 'ွ', 'င', 'o', 'ုတ်', 'လွ', 'ပင်', 'နိုင်ငံ', 'ပါတယ်', 'ကား', 'အဖွဲ့', 'အခြား', 'ယ့်', 'ပို', 'ည်း', 'ယာ', 'ဆုံး', '၄', 'ကြောင်း', 'တပ်', 'အနေ', 'ဣ', '၏', 'ိုး', 'ချုပ်', 'နေ', 'စ', 'ဟ', 'လွှ', 'အဆ', '၌', 'ဣ', 'နိုင်', 'တည်', 'တွ', 'အချိန်', 'ပဲ', 'ဝင်', 'ဒီ', 'သူ', 'l', 'ဏ', 'ဲ့', 'အထ', 'ပည', 'စိတ်', 'ကြသည်', 'ဩ', 's', 't', 'သေ', 'လျ', 'ိုက်', 'များသည်', 'ငါ', 'ို', 'ထဲ', 'လာ', 'ဝန်', 'ဓ', 'ခဲ့', 'စွာ', 'မ', 'နှင့်', '၎', 'အစိုး', 'ရာ', '၉', 'တယ်', '၎င်း', '၅', 'ပညာ', 'ကြီး', 'သို့မဟုတ်', '၍', 'ို', 'မူ', 'f', 'ခု', 'ိမ', 'c', 'ုပ်', 'l', 'အမ', 'နောက်', 'သော', 'ုန်း', 'ှ', 'ကြ', 'တ', 'ဌာ', 'p', 'ပေါ်', 'h', 'ပင်', 'ဲ', 'ဒီ', 'ဈ', 'လက္ခ', 'r', 'ပြ', 'ဒေ', 'မှ', 'ရှ', 'လျှ', 'လေ့', 'ရောဂါ', 'ော်', 'လည်', 'ဖွဲ့', 'မ်', 'သိ', 'ထုတ်', 'ရိ', 'အား', 'ာ', 'လ', 'ုပ', 'ကျော်', 'အေ', 'g', 'နာ', 'ရီ', 'ရာ', 'v', 'သော', 'လူ', 'တာ', 'ီး', 'j', 'ကာ', 'ရွာ', 'မျက်နှ', 'ယ', 'q', 'ပ်', 'ဌ', 'ဒ', 'ဝ', 'အခြ', 'd', 'ဍ', 'လှ', 'သည်', 'မြန်မာ', 'ယ်', 'ဖ', 'ဦ', 'ါ', 'ဲ့', 'ပျ', 'ရ', 'မိ', 'ပြီး', 'ကို', 'လည်း', 'ဇ', 'မြ', 'နွေး', 'ဘ', 'အသုံးပြု', 'ော', 'ချ', 'မွ', 'လဲ', 'န့်', 'ဂ', 'ည်', 'ကန်', 'က', 'ဗ', 'ေး', 'လု', 'တီ', 'မြို့', 'ိတ်', 'ဘ', 'အရေး', 'ုပ်', 'p', 'ဖ', 'င်', 'သွား', 'တိုင်း', '၃', 'ဿ', 'စေ', 'ဖြတ်', 'ဖွ', 'k', 'သူ', 'တစ်', 'ြ', 'စက်', 'ကြီး', 'ပြည်နယ်', 'ဝါ', 'ဘူး', 'ထ', 'ငြ', 'တော်', 'ကျ', 'ကိ', 'ဈ', 'i', 'အဲ', 'o', 'ေ', 'b', 'င်္', 'ဒါ', 'ညီ', 'w', 'ငွ', 'သ', 'မှတ်', 'ြ', 'ခြား', 'ကြောင့်', 'နာ', 'မှာ', 'f', 'ပွ', 'ကျွန်ုပ်', '၁၀', 'ခေါ', 'ယ်', '၊', 'ှ', 'အဓ', 'နိုင်', 'သက်', 'ပေး', 'a', 'ကျွန်', 'd', 'ထွ', 't', 'n', 'ဠ', 'အရာ', 'ခွ', 'ထ', 'ိုင်', 'ည့်', 'ိမ်', 'သည်', 'တွေ', 'အချ', 'ကား', 'ဗ', 'သုံး', 'အ', 'သူများ', 'ိုက်', 'အမျိုး', '၇', 'စား', 'ဪ', 'တဲ့', 'များ', 's', 'ေ', 'ယ', 'အဓိ', 'နိုင်သည်', 'ဎ', '္', 'ခ', 'စည်း', '၂', 'န်', 'ရ', 'ခရ', 'နည်း', 'အကြ', 'န်', 'တိ', 'န', 'ပြော', 'မှတ်', 'ောင်း', 'န်း', 'ရေး', 'ဆို', 'ူး', 'ရောက်', 'ထို့', 'ည်', 'ပြန်', 'ဒေ', 'စစ်', 'ဟာ', 'ဏ', 'ပြင်', 'ဆိုင်ရာ', 'z', 'ခုနှစ်', 'နဲ့', 'သ', 'စ္', 'ော', 'c', 'လုပ်', 'မျိုး', 'ကေ', 'ဘာ', 'များ', 'ိတ်', 'စား', 'တို', 'ယား', 'တာ', 'q', 'k', 'ဎ', 'င်း', 'စ်', 'အားလုံး', 'အခ', 'အ', 'အသ', 'ချက်', 'ဆက်', 'ည်း', 'ို့', 'လုပ်', 'ပွဲ', 'ကု', 'စပ်', 'အန', 'ပိုင်း', 'm', 'ဖို့', 'ဃ', 'ု', 'တင်', 'ပ္', 'ပြင်', 'း', 'နယ်', 'm', 'ား', 'အနေ', 'အတွက်', 'င့်', 'ရှိ', 'ခြ', '၄', 'v', 'မဟ', 'က်', 'လေး', 'တိုက်', 'ံ', 'သမ', 'ိုင်', '၏', 'j', 'ကြား', 'ကောင်း', 'ဦး', 'တစ်ခု', 'ထုတ်', 'ကု', 'u', 'မည်', 'ရွ', 'မင်း', 'ပ', 'စ်', 'ဆိုင်', 'ဆက်', 'တွင်', 'မြို့နယ်', 'စု', 'ဟ', 'တစ်ဦး', 'လက်', 'ုတ်', 'သူတို့', '်', 'သာ', 'ဩ', 'မာ', 'ယူ', 'ဤ', '့', 'မန', 'ရောဂ', 'သွ', 'ဝင်', 'အတ', 'ရက်', 'မျက်', 'ထား', '၁', 'တ်', 'တို့', 'ဤ', 'နေ့', 'ရင်', '…', 'ထား', 'ဧ', 'ပါး', 'မာ', 'သား', 'ဆောင်', 'မှု', 'ဂ', 'င', 'အား', 'ဇ', 'ောက်', 'သိ', 'ူ', 'စ', '်', 'အတွ', 'e', 'ဉ', 'ဆို', 'ည', 'သည့်', 'က', 'ဖြစ်', 'တရား', 'ရေ', 'ရပ်', 'ပါ', 'ကူး', 'ကမ္', 'သား', 'ကျ', 'မျိုး', 'ခဲ့', 'ောင်', 'ျ', 'ို့', 'ချ', 'အစိုးရ', 'သတ', 'ပြု', 'ကျွ', 'အရ', 'ိုလ', 'ပြီး', 'လုံး', 'လို', 'z', 'ောက်', 'ဥ', 'တမ်း', 'တရ', 'ကျွန်ုပ်တို့', 'နှစ်', 'ိန်', 'ခံ', 'ကာ', 'ဥပ', 'အသုံး', 'တော်', 'ူး', 'ဘာ', 'ပါ', 'ိပ်', 'ား', 'တ', 'နွ', '္တ', 'ဝ', 'လို့', 'ေ့', 'န္', 'e', 'ေ့', 'စီး', 'y', 'ပြား', 'ပိုး', 'အရ', 'အဖြစ်', 'g', 'ဓာ', 'ပြ', 'တစ်', 'မှ', 'ဖွဲ့', '၍', 'ခြင်း', 'ုံး', 'ဆင်', 'ွန်', 'အလ', 'တော့', 'မို', 'လ', 'စာ', 'ဿ', 'အမြ', 'တင်', 'အကျ', 'ဲ', 'ူ', 'အုပ်', 'y', 'u', 'ဒါ', 'ရော', 'ပို', 'လိုအ', 'a', 'ိ', 'ဆ', '့', 'x', 'လို', '့်', 'ပြည်', 'ယူ', 'ဃ', 'ဆေး', 'ခံ', 'မွ', 'ဘဲ', 'ုံး', 'ော်', 'လိုက်', 'နေ', 'မျ', 'နိုင်င', 'ံ့', 'မှာ', 'နည်း', 'ရန်', 'လက္ခဏာ', 'ဥ', 'င့်', 'ပညာ', 'ပ်', 'အားဖြင့်', 'နှစ်', 'ဆွေး', 'ဖြစ်သည်', 'ဒ', 'ီ', 'နစ်', 'ကျင်', 'ဋ', 'အများ', 'ဉ', 'မ်း', 'န့်', 'ကွ', 'သို့', 'b', '၀', 'ခု', 'ပုံ', 'တော', 'အာ', 'ဖြင့်', 'ဧ', 'သွား', 'အခါ', 'မ', 'င်း', 'ာ', 'ဆ', 'i', 'ဓ', '၆', 'ကြော', 'ရိ', 'သြ', 'တွေ့', '၌', 'ထိ', 'က္', 'အစ', 'ကြ', 'ရွ', 'ု', 'ေး', 'ွ', 'န်း', 'း', 'ပ', 'ဋ', 'ဆာ', 'အောင်', 'မြို့', 'စိတ်', 'ျ', 'ပြင်ဆင်', 'ါ', 'မဟုတ်', 'ပြု', 'ကိုယ်', 'ရှိ', 'ည', 'ဆောင်', 'ဆွေးနွေး', 'င်', 'n', 'တ်', 'ိုင်း', 'စီ', 'လူ', 'ဍ', 'ဟု', 'ည့်', 'သို့', '္', '႓', 'ိုး', 'န', 'ရေ', 'မယ်', 'ခဲ့သည်', 'ုံ', 'ောင်း', 'ောင်', 'ဦး', 'ထိ', 'တို့', 'ိမ့်', 'x', 'နိုင်ငံ', '၊', 'အပြ', 'ံ', 'ထု', 'ရေး', 'စစ်', 'ီ', 'မှု', 'ရှင်', 'ဦ', 'ရှိသည်', 'ပေါ', 'ဂျ', 'အစား', 'မြန်', 'ခ', 'သာ', 'နှ', 'ပထ', 'ိ', 'သင်', '့်']
-
-    word_lists = []
-    for i in word:
-        cur = i.replace(' ', '')
-        cur = cur.replace('</s>', '')
-        cur = cur.replace('<s>', '')
-        word_lists.append(cur)
-
-    if len(word_lists) == 0:
-        return False
-
-    for ch in word_lists:
-        if ch.isalpha() is False and ch in my_char:
-            return True
-        elif ch.isalpha() is True and isChinese(ch) is True:
-            return False
-
-    return True
 
 # def abbr_dispose(words: List[Any]) -> List[Any]:
 def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]:
@@ -244,17 +224,6 @@ def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
                     end = time_stamp[i][1] 
                     ts_lists.append([begin, end])
                     begin = end
-             elif isMy(ch):
-                word_item += ch
-                word_lists.append(word_item)
-                word_lists.append(' ')
-                word_item = ''
-                alpha_blank = True
-                if time_stamp is not None:
-                    ts_flag = True
-                    end = time_stamp[i][1] 
-                    ts_lists.append([begin, end])
-                    begin = end
             else:
                 word_lists.append(ch)
 

From 6be52a387938e40961194dfb79d079ab24137b32 Mon Sep 17 00:00:00 2001
From: "chong.zhang" <chong.zhang@alibaba-inc.com>
Date: Wed, 12 Apr 2023 17:37:57 +0800
Subject: [PATCH 5/7] update

---
 funasr/utils/postprocess_utils.py | 245 ------------------------------
 1 file changed, 245 deletions(-)
 delete mode 100644 funasr/utils/postprocess_utils.py

diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
deleted file mode 100644
index b607e1da0..000000000
--- a/funasr/utils/postprocess_utils.py
+++ /dev/null
@@ -1,245 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-
-import string
-import logging
-from typing import Any, List, Union
-
-
-def isChinese(ch: str):
-    if '\u4e00' <= ch <= '\u9fff' or '\u0030' <= ch <= '\u0039' or ch == '@':
-        return True
-    return False
-
-
-def isAllChinese(word: Union[List[Any], str]):
-    word_lists = []
-    for i in word:
-        cur = i.replace(' ', '')
-        cur = cur.replace('</s>', '')
-        cur = cur.replace('<s>', '')
-        cur = cur.replace('<unk>', '')
-        cur = cur.replace('<OOV>', '')
-        word_lists.append(cur)
-
-    if len(word_lists) == 0:
-        return False
-
-    for ch in word_lists:
-        if isChinese(ch) is False:
-            return False
-    return True
-
-
-def isAllAlpha(word: Union[List[Any], str]):
-    word_lists = []
-    for i in word:
-        cur = i.replace(' ', '')
-        cur = cur.replace('</s>', '')
-        cur = cur.replace('<s>', '')
-        cur = cur.replace('<unk>', '')
-        cur = cur.replace('<OOV>', '')
-        word_lists.append(cur)
-
-    if len(word_lists) == 0:
-        return False
-
-    for ch in word_lists:
-        if ch.isalpha() is False and ch != "'":
-            return False
-        elif ch.isalpha() is True and isChinese(ch) is True:
-            return False
-
-    return True
-
-
-# def abbr_dispose(words: List[Any]) -> List[Any]:
-def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]:
-    words_size = len(words)
-    word_lists = []
-    abbr_begin = []
-    abbr_end = []
-    last_num = -1
-    ts_lists = []
-    ts_nums = []
-    ts_index = 0
-    for num in range(words_size):
-        if num <= last_num:
-            continue
-
-        if len(words[num]) == 1 and words[num].encode('utf-8').isalpha():
-            if num + 1 < words_size and words[
-                    num + 1] == ' ' and num + 2 < words_size and len(
-                        words[num +
-                              2]) == 1 and words[num +
-                                                 2].encode('utf-8').isalpha():
-                # found the begin of abbr
-                abbr_begin.append(num)
-                num += 2
-                abbr_end.append(num)
-                # to find the end of abbr
-                while True:
-                    num += 1
-                    if num < words_size and words[num] == ' ':
-                        num += 1
-                        if num < words_size and len(
-                                words[num]) == 1 and words[num].encode(
-                                    'utf-8').isalpha():
-                            abbr_end.pop()
-                            abbr_end.append(num)
-                            last_num = num
-                        else:
-                            break
-                    else:
-                        break
-
-    for num in range(words_size):
-        if words[num] == ' ':
-            ts_nums.append(ts_index)
-        else:
-            ts_nums.append(ts_index)
-            ts_index += 1 
-    last_num = -1
-    for num in range(words_size):
-        if num <= last_num:
-            continue
-
-        if num in abbr_begin:
-            if time_stamp is not None:
-                begin = time_stamp[ts_nums[num]][0]
-            abbr_word = words[num].upper()
-            num += 1
-            while num < words_size:
-                if num in abbr_end:
-                    abbr_word += words[num].upper()
-                    last_num = num
-                    break
-                else:
-                    if words[num].encode('utf-8').isalpha():
-                        abbr_word += words[num].upper()
-                num += 1
-            word_lists.append(abbr_word)
-            if time_stamp is not None:
-                end = time_stamp[ts_nums[num]][1]
-                ts_lists.append([begin, end])
-        else:
-            word_lists.append(words[num])
-            if time_stamp is not None and words[num] != ' ':
-                begin = time_stamp[ts_nums[num]][0]
-                end = time_stamp[ts_nums[num]][1]
-                ts_lists.append([begin, end])
-                begin = end
-
-    if time_stamp is not None:
-        return word_lists, ts_lists
-    else:
-        return word_lists
-
-
-def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
-    middle_lists = []
-    word_lists = []
-    word_item = ''
-    ts_lists = []
-
-    # wash words lists
-    for i in words:
-        word = ''
-        if isinstance(i, str):
-            word = i
-        else:
-            word = i.decode('utf-8')
-
-        if word in ['<s>', '</s>', '<unk>', '<OOV>']:
-            continue
-        else:
-            middle_lists.append(word)
-
-    # all chinese characters
-    if isAllChinese(middle_lists):
-        for i, ch in enumerate(middle_lists):
-            word_lists.append(ch.replace(' ', ''))
-        if time_stamp is not None:
-            ts_lists = time_stamp
-
-    # all alpha characters
-    elif isAllAlpha(middle_lists):
-        ts_flag = True
-        for i, ch in enumerate(middle_lists):
-            if ts_flag and time_stamp is not None:
-                begin = time_stamp[i][0]
-                end = time_stamp[i][1]
-            word = ''
-            if '@@' in ch:
-                word = ch.replace('@@', '')
-                word_item += word
-                if time_stamp is not None:
-                    ts_flag = False
-                    end = time_stamp[i][1]
-            else:
-                word_item += ch
-                word_lists.append(word_item)
-                word_lists.append(' ')
-                word_item = ''
-                if time_stamp is not None:
-                    ts_flag = True
-                    end = time_stamp[i][1]
-                    ts_lists.append([begin, end])
-                    begin = end
-
-    # mix characters
-    else:
-        alpha_blank = False
-        ts_flag = True
-        begin = -1
-        end = -1
-        for i, ch in enumerate(middle_lists):
-            if ts_flag and time_stamp is not None:
-                begin = time_stamp[i][0]
-                end = time_stamp[i][1]
-            word = ''
-            if isAllChinese(ch):
-                if alpha_blank is True:
-                    word_lists.pop()
-                word_lists.append(ch)
-                alpha_blank = False
-                if time_stamp is not None:
-                    ts_flag = True
-                    ts_lists.append([begin, end])
-                    begin = end
-            elif '@@' in ch:
-                word = ch.replace('@@', '')
-                word_item += word
-                alpha_blank = False
-                if time_stamp is not None:
-                    ts_flag = False
-                    end = time_stamp[i][1]
-            elif isAllAlpha(ch):
-                word_item += ch
-                word_lists.append(word_item)
-                word_lists.append(' ')
-                word_item = ''
-                alpha_blank = True
-                if time_stamp is not None:
-                    ts_flag = True
-                    end = time_stamp[i][1] 
-                    ts_lists.append([begin, end])
-                    begin = end
-            else:
-                word_lists.append(ch)
-
-    if time_stamp is not None: 
-        word_lists, ts_lists = abbr_dispose(word_lists, ts_lists)
-        real_word_lists = []
-        for ch in word_lists:
-            if ch != ' ':
-                real_word_lists.append(ch)
-        sentence = ' '.join(real_word_lists).strip()
-        return sentence, ts_lists, real_word_lists
-    else:
-        word_lists = abbr_dispose(word_lists)
-        real_word_lists = []
-        for ch in word_lists:
-            if ch != ' ':
-                real_word_lists.append(ch)
-        sentence = ''.join(word_lists).strip()
-        return sentence, real_word_lists

From 8e202636ac9d0621f26645c58e19bf1416ffa077 Mon Sep 17 00:00:00 2001
From: "chong.zhang" <chong.zhang@alibaba-inc.com>
Date: Wed, 12 Apr 2023 17:40:08 +0800
Subject: [PATCH 6/7] udpate

---
 funasr/utils/postprocess_utils.py | 245 ++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100644 funasr/utils/postprocess_utils.py

diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
new file mode 100644
index 000000000..b607e1da0
--- /dev/null
+++ b/funasr/utils/postprocess_utils.py
@@ -0,0 +1,245 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import string
+import logging
+from typing import Any, List, Union
+
+
+def isChinese(ch: str):
+    if '\u4e00' <= ch <= '\u9fff' or '\u0030' <= ch <= '\u0039' or ch == '@':
+        return True
+    return False
+
+
+def isAllChinese(word: Union[List[Any], str]):
+    word_lists = []
+    for i in word:
+        cur = i.replace(' ', '')
+        cur = cur.replace('</s>', '')
+        cur = cur.replace('<s>', '')
+        cur = cur.replace('<unk>', '')
+        cur = cur.replace('<OOV>', '')
+        word_lists.append(cur)
+
+    if len(word_lists) == 0:
+        return False
+
+    for ch in word_lists:
+        if isChinese(ch) is False:
+            return False
+    return True
+
+
+def isAllAlpha(word: Union[List[Any], str]):
+    word_lists = []
+    for i in word:
+        cur = i.replace(' ', '')
+        cur = cur.replace('</s>', '')
+        cur = cur.replace('<s>', '')
+        cur = cur.replace('<unk>', '')
+        cur = cur.replace('<OOV>', '')
+        word_lists.append(cur)
+
+    if len(word_lists) == 0:
+        return False
+
+    for ch in word_lists:
+        if ch.isalpha() is False and ch != "'":
+            return False
+        elif ch.isalpha() is True and isChinese(ch) is True:
+            return False
+
+    return True
+
+
+# def abbr_dispose(words: List[Any]) -> List[Any]:
+def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]:
+    words_size = len(words)
+    word_lists = []
+    abbr_begin = []
+    abbr_end = []
+    last_num = -1
+    ts_lists = []
+    ts_nums = []
+    ts_index = 0
+    for num in range(words_size):
+        if num <= last_num:
+            continue
+
+        if len(words[num]) == 1 and words[num].encode('utf-8').isalpha():
+            if num + 1 < words_size and words[
+                    num + 1] == ' ' and num + 2 < words_size and len(
+                        words[num +
+                              2]) == 1 and words[num +
+                                                 2].encode('utf-8').isalpha():
+                # found the begin of abbr
+                abbr_begin.append(num)
+                num += 2
+                abbr_end.append(num)
+                # to find the end of abbr
+                while True:
+                    num += 1
+                    if num < words_size and words[num] == ' ':
+                        num += 1
+                        if num < words_size and len(
+                                words[num]) == 1 and words[num].encode(
+                                    'utf-8').isalpha():
+                            abbr_end.pop()
+                            abbr_end.append(num)
+                            last_num = num
+                        else:
+                            break
+                    else:
+                        break
+
+    for num in range(words_size):
+        if words[num] == ' ':
+            ts_nums.append(ts_index)
+        else:
+            ts_nums.append(ts_index)
+            ts_index += 1 
+    last_num = -1
+    for num in range(words_size):
+        if num <= last_num:
+            continue
+
+        if num in abbr_begin:
+            if time_stamp is not None:
+                begin = time_stamp[ts_nums[num]][0]
+            abbr_word = words[num].upper()
+            num += 1
+            while num < words_size:
+                if num in abbr_end:
+                    abbr_word += words[num].upper()
+                    last_num = num
+                    break
+                else:
+                    if words[num].encode('utf-8').isalpha():
+                        abbr_word += words[num].upper()
+                num += 1
+            word_lists.append(abbr_word)
+            if time_stamp is not None:
+                end = time_stamp[ts_nums[num]][1]
+                ts_lists.append([begin, end])
+        else:
+            word_lists.append(words[num])
+            if time_stamp is not None and words[num] != ' ':
+                begin = time_stamp[ts_nums[num]][0]
+                end = time_stamp[ts_nums[num]][1]
+                ts_lists.append([begin, end])
+                begin = end
+
+    if time_stamp is not None:
+        return word_lists, ts_lists
+    else:
+        return word_lists
+
+
+def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
+    middle_lists = []
+    word_lists = []
+    word_item = ''
+    ts_lists = []
+
+    # wash words lists
+    for i in words:
+        word = ''
+        if isinstance(i, str):
+            word = i
+        else:
+            word = i.decode('utf-8')
+
+        if word in ['<s>', '</s>', '<unk>', '<OOV>']:
+            continue
+        else:
+            middle_lists.append(word)
+
+    # all chinese characters
+    if isAllChinese(middle_lists):
+        for i, ch in enumerate(middle_lists):
+            word_lists.append(ch.replace(' ', ''))
+        if time_stamp is not None:
+            ts_lists = time_stamp
+
+    # all alpha characters
+    elif isAllAlpha(middle_lists):
+        ts_flag = True
+        for i, ch in enumerate(middle_lists):
+            if ts_flag and time_stamp is not None:
+                begin = time_stamp[i][0]
+                end = time_stamp[i][1]
+            word = ''
+            if '@@' in ch:
+                word = ch.replace('@@', '')
+                word_item += word
+                if time_stamp is not None:
+                    ts_flag = False
+                    end = time_stamp[i][1]
+            else:
+                word_item += ch
+                word_lists.append(word_item)
+                word_lists.append(' ')
+                word_item = ''
+                if time_stamp is not None:
+                    ts_flag = True
+                    end = time_stamp[i][1]
+                    ts_lists.append([begin, end])
+                    begin = end
+
+    # mix characters
+    else:
+        alpha_blank = False
+        ts_flag = True
+        begin = -1
+        end = -1
+        for i, ch in enumerate(middle_lists):
+            if ts_flag and time_stamp is not None:
+                begin = time_stamp[i][0]
+                end = time_stamp[i][1]
+            word = ''
+            if isAllChinese(ch):
+                if alpha_blank is True:
+                    word_lists.pop()
+                word_lists.append(ch)
+                alpha_blank = False
+                if time_stamp is not None:
+                    ts_flag = True
+                    ts_lists.append([begin, end])
+                    begin = end
+            elif '@@' in ch:
+                word = ch.replace('@@', '')
+                word_item += word
+                alpha_blank = False
+                if time_stamp is not None:
+                    ts_flag = False
+                    end = time_stamp[i][1]
+            elif isAllAlpha(ch):
+                word_item += ch
+                word_lists.append(word_item)
+                word_lists.append(' ')
+                word_item = ''
+                alpha_blank = True
+                if time_stamp is not None:
+                    ts_flag = True
+                    end = time_stamp[i][1] 
+                    ts_lists.append([begin, end])
+                    begin = end
+            else:
+                word_lists.append(ch)
+
+    if time_stamp is not None: 
+        word_lists, ts_lists = abbr_dispose(word_lists, ts_lists)
+        real_word_lists = []
+        for ch in word_lists:
+            if ch != ' ':
+                real_word_lists.append(ch)
+        sentence = ' '.join(real_word_lists).strip()
+        return sentence, ts_lists, real_word_lists
+    else:
+        word_lists = abbr_dispose(word_lists)
+        real_word_lists = []
+        for ch in word_lists:
+            if ch != ' ':
+                real_word_lists.append(ch)
+        sentence = ''.join(word_lists).strip()
+        return sentence, real_word_lists

From fc95f1b35e3bc65c070a96a673f7099d5f255d38 Mon Sep 17 00:00:00 2001
From: "chong.zhang" <chong.zhang@alibaba-inc.com>
Date: Fri, 5 May 2023 13:31:45 +0800
Subject: [PATCH 7/7] update docs/modelscope_models.md

---
 docs/modelscope_models.md | 43 ++++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/docs/modelscope_models.md b/docs/modelscope_models.md
index 5f94a09e4..e7c754c8a 100644
--- a/docs/modelscope_models.md
+++ b/docs/modelscope_models.md
@@ -25,13 +25,27 @@ Here we provided several pretrained models on different datasets. The details of
 
 #### UniASR Models
 
-|                                                               Model Name                                                               | Language |          Training Data           | Vocab Size | Parameter | Offline/Online | Notes                                                                                                                           |
-|:--------------------------------------------------------------------------------------------------------------------------------------:|:--------:|:--------------------------------:|:----------:|:---------:|:--------------:|:--------------------------------------------------------------------------------------------------------------------------------|
-|       [UniASR](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-online/summary)        | CN & EN  | Alibaba Speech Data (60000hours) |    8358    |   100M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
-| [UniASR-large](https://modelscope.cn/models/damo/speech_UniASR-large_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline/summary) | CN & EN  | Alibaba Speech Data (60000hours) |    8358    |   220M    |    Offline     | UniASR streaming offline unifying models                                                                                                    |
-|           [UniASR Burmese](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-my-16k-common-vocab696-pytorch/summary)           | Burmese  |  Alibaba Speech Data (? hours)   |    696     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
-|           [UniASR Hebrew](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-he-16k-common-vocab1085-pytorch/summary)           |  Hebrew  |  Alibaba Speech Data (? hours)   |    1085    |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
-|       [UniASR Urdu](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-ur-16k-common-vocab877-pytorch/summary)                  |   Urdu   |  Alibaba Speech Data (? hours)   |    877     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|                                                                    Model Name                                                                     |    Language     |           Training Data           | Vocab Size | Parameter | Offline/Online | Notes                                                                                                                           |
+|:-------------------------------------------------------------------------------------------------------------------------------------------------:|:---------------:|:---------------------------------:|:----------:|:---------:|:--------------:|:--------------------------------------------------------------------------------------------------------------------------------|
+|             [UniASR](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-online/summary)             |     CN & EN     | Alibaba Speech Data (60000 hours) |    8358    |   100M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|      [UniASR-large](https://modelscope.cn/models/damo/speech_UniASR-large_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline/summary)       |     CN & EN     | Alibaba Speech Data (60000 hours) |    8358    |   220M    |    Offline     | UniASR streaming offline unifying models                                                                                                    |
+|          [UniASR English](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-online/summary)           |       EN        | Alibaba Speech Data (10000 hours) |    1080     |    95M    |     Online     | UniASR streaming online unifying models                                                                                                    |
+|          [UniASR Russian](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-online/summary)           |       RU        | Alibaba Speech Data (5000 hours)  |    1664     |    95M    |     Online     | UniASR streaming online unifying models                                                                                                    |
+|           [UniASR Japanese](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-online/summary)           |       JA        | Alibaba Speech Data (5000 hours)  |    5977     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|           [UniASR Korean](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-online/summary)           |       KO        | Alibaba Speech Data (2000 hours)  |    6400     |    95M    |     Online     | UniASR streaming online unifying models                                                                                                    |
+| [UniASR Cantonese (CHS)](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online/summary) | Cantonese (CHS) | Alibaba Speech Data (5000 hours)  |    1468     |    95M    |     Online     | UniASR streaming online unifying models                                                                                                    |
+|         [UniASR Indonesian](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-online/summary)         |       ID        | Alibaba Speech Data (1000 hours)  |    1067     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|           [UniASR Vietnamese](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online/summary)           |       VI        | Alibaba Speech Data (1000 hours)  |    1001     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|          [UniASR Spanish](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-online/summary)           |       ES        | Alibaba Speech Data (1000 hours)  |    3445     |    95M    |     Online     | UniASR streaming online unifying models                                                                                                    |
+|         [UniASR Portuguese](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-online/summary)         |       PT        | Alibaba Speech Data (1000 hours)  |    1617     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|          [UniASR French](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-online/summary)           |       FR        | Alibaba Speech Data (1000 hours)  |    3472     |    95M    |     Online     | UniASR streaming online unifying models                                                                                                    |
+|          [UniASR German](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-online/summary)           |       GE        | Alibaba Speech Data (1000 hours)  |    3690     |    95M    |     Online     | UniASR streaming online unifying models                                                                                                    |
+|            [UniASR Persian](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-online/summary)             |       FA        | Alibaba Speech Data (1000 hours)  |    1257     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|                [UniASR Burmese](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-my-16k-common-vocab696-pytorch/summary)                 |       MY        | Alibaba Speech Data (1000 hours)  |    696     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|                [UniASR Hebrew](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-he-16k-common-vocab1085-pytorch/summary)                 |       HE        | Alibaba Speech Data (1000 hours)  |    1085    |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+|                  [UniASR Urdu](https://modelscope.cn/models/damo/speech_UniASR_asr_2pass-ur-16k-common-vocab877-pytorch/summary)                  |       UR        | Alibaba Speech Data (1000 hours)  |    877     |    95M    |     Online     | UniASR streaming offline unifying models                                                                                                    |
+
+
 
 #### Conformer Models
 
@@ -92,3 +106,18 @@ Here we provided several pretrained models on different datasets. The details of
 |                                                    Model Name                                     |  Language  |    Training Data    | Parameters | Notes |
 |:--------------------------------------------------------------------------------------------------:|:--------------:|:-------------------:|:----------:|:------|
 | [TP-Aligner](https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) | CN | Alibaba Speech Data (50000hours) |   37.8M    |    Timestamp prediction, Mandarin, middle size |
+
+### Inverse Text Normalization (ITN) Models
+|                                                    Model Name                                                    | Language | Parameters | Notes |
+|:----------------------------------------------------------------------------------------------------------------:|:--------:|:----------:|:------|
+| [English](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-en/summary) |    EN    | 1.54M | ITN, ASR post processing |
+| [Russian](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-ru/summary) |    RU    | 1.28M | ITN, ASR post processing |
+| [Japanese](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-ja/summary) |    JA    | 6.8M | ITN, ASR post processing |
+| [Korean](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-ko/summary) |    KO    | 1.28M | InverASR post processing |
+| [Indonesian](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-id/summary) |    ID    | 2.06M | ITN, ASR post processing |
+| [Vietnamese](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-vi/summary) |    VI    | 0.92M | ITN, ASR post processing |
+| [Tagalog](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-tl/summary) |    TL    | 1.28M | ITN, ASR post processing |
+| [Spanish](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-es/summary) |    ES    | 1.28M | ITN, ASR post processing |
+| [Portuguese](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-pt/summary) |    PT    | 1.28M | ITN, ASR post processing |
+| [French](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-fr/summary) |    FR    | 1.28M | InverASR post processing |
+| [German](https://modelscope.cn/models/damo/speech_inverse_text_processing_fun-text-processing-itn-de/summary)|    GE    | 1.28M | ITN, ASR post processing |