From 2330e58f5ffa6d78cbdc099ac1a7e0aa1bd6a09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B8=B8=E9=9B=81?= Date: Thu, 10 Oct 2024 15:32:08 +0800 Subject: [PATCH] bugfix v1.1.11 --- funasr/utils/postprocess_utils.py | 186 ++++++++++++++++-------------- funasr/version.txt | 2 +- 2 files changed, 99 insertions(+), 89 deletions(-) diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py index 10aa29f7d..dcc713f36 100644 --- a/funasr/utils/postprocess_utils.py +++ b/funasr/utils/postprocess_utils.py @@ -125,11 +125,11 @@ def abbr_dispose(words: List[Any], time_stamp: List[List] = None) -> List[Any]: if time_stamp is not None: end = time_stamp[ts_nums[num]][1] ts_lists.append([begin, end]) - else: + else: + word_lists.append(words[num]) # length of time_stamp may not equal to length of words because of the (somehow improper) threshold set in timestamp_tools.py line 46, e.g., length of time_stamp can be zero but length of words is not. # Moreover, move "word_lists.append(words[num])" into if clause, to keep length of word_lists and length of ts_lists equal. - if time_stamp is not None and ts_nums[num]": "😊", - "<|SAD|>": "😔", - "<|ANGRY|>": "😡", - "<|NEUTRAL|>": "", - "<|FEARFUL|>": "😰", - "<|DISGUSTED|>": "🤢", - "<|SURPRISED|>": "😮", + "<|HAPPY|>": "😊", + "<|SAD|>": "😔", + "<|ANGRY|>": "😡", + "<|NEUTRAL|>": "", + "<|FEARFUL|>": "😰", + "<|DISGUSTED|>": "🤢", + "<|SURPRISED|>": "😮", } event_dict = { - "<|BGM|>": "🎼", - "<|Speech|>": "", - "<|Applause|>": "👏", - "<|Laughter|>": "😀", - "<|Cry|>": "😭", - "<|Sneeze|>": "🤧", - "<|Breath|>": "", - "<|Cough|>": "🤧", + "<|BGM|>": "🎼", + "<|Speech|>": "", + "<|Applause|>": "👏", + "<|Laughter|>": "😀", + "<|Cry|>": "😭", + "<|Sneeze|>": "🤧", + "<|Breath|>": "", + "<|Cough|>": "🤧", } -lang_dict = { +lang_dict = { "<|zh|>": "<|lang|>", "<|en|>": "<|lang|>", "<|yue|>": "<|lang|>", @@ -333,81 +334,90 @@ lang_dict = { } emoji_dict = { - "<|nospeech|><|Event_UNK|>": "❓", - "<|zh|>": "", - "<|en|>": "", - "<|yue|>": "", - "<|ja|>": "", - "<|ko|>": "", - "<|nospeech|>": "", - "<|HAPPY|>": "😊", - "<|SAD|>": "😔", - "<|ANGRY|>": "😡", - "<|NEUTRAL|>": "", - "<|BGM|>": "🎼", - "<|Speech|>": "", - "<|Applause|>": "👏", - "<|Laughter|>": "😀", - "<|FEARFUL|>": "😰", - "<|DISGUSTED|>": "🤢", - "<|SURPRISED|>": "😮", - "<|Cry|>": "😭", - "<|EMO_UNKNOWN|>": "", - "<|Sneeze|>": "🤧", - "<|Breath|>": "", - "<|Cough|>": "😷", - "<|Sing|>": "", - "<|Speech_Noise|>": "", - "<|withitn|>": "", - "<|woitn|>": "", - "<|GBG|>": "", - "<|Event_UNK|>": "", + "<|nospeech|><|Event_UNK|>": "❓", + "<|zh|>": "", + "<|en|>": "", + "<|yue|>": "", + "<|ja|>": "", + "<|ko|>": "", + "<|nospeech|>": "", + "<|HAPPY|>": "😊", + "<|SAD|>": "😔", + "<|ANGRY|>": "😡", + "<|NEUTRAL|>": "", + "<|BGM|>": "🎼", + "<|Speech|>": "", + "<|Applause|>": "👏", + "<|Laughter|>": "😀", + "<|FEARFUL|>": "😰", + "<|DISGUSTED|>": "🤢", + "<|SURPRISED|>": "😮", + "<|Cry|>": "😭", + "<|EMO_UNKNOWN|>": "", + "<|Sneeze|>": "🤧", + "<|Breath|>": "", + "<|Cough|>": "😷", + "<|Sing|>": "", + "<|Speech_Noise|>": "", + "<|withitn|>": "", + "<|woitn|>": "", + "<|GBG|>": "", + "<|Event_UNK|>": "", } emo_set = {"😊", "😔", "😡", "😰", "🤢", "😮"} -event_set = {"🎼", "👏", "😀", "😭", "🤧", "😷",} +event_set = { + "🎼", + "👏", + "😀", + "😭", + "🤧", + "😷", +} + def format_str_v2(s): - sptk_dict = {} - for sptk in emoji_dict: - sptk_dict[sptk] = s.count(sptk) - s = s.replace(sptk, "") - emo = "<|NEUTRAL|>" - for e in emo_dict: - if sptk_dict[e] > sptk_dict[emo]: - emo = e - for e in event_dict: - if sptk_dict[e] > 0: - s = event_dict[e] + s - s = s + emo_dict[emo] + sptk_dict = {} + for sptk in emoji_dict: + sptk_dict[sptk] = s.count(sptk) + s = s.replace(sptk, "") + emo = "<|NEUTRAL|>" + for e in emo_dict: + if sptk_dict[e] > sptk_dict[emo]: + emo = e + for e in event_dict: + if sptk_dict[e] > 0: + s = event_dict[e] + s + s = s + emo_dict[emo] + + for emoji in emo_set.union(event_set): + s = s.replace(" " + emoji, emoji) + s = s.replace(emoji + " ", emoji) + return s.strip() - for emoji in emo_set.union(event_set): - s = s.replace(" " + emoji, emoji) - s = s.replace(emoji + " ", emoji) - return s.strip() def rich_transcription_postprocess(s): - def get_emo(s): - return s[-1] if s[-1] in emo_set else None - def get_event(s): - return s[0] if s[0] in event_set else None + def get_emo(s): + return s[-1] if s[-1] in emo_set else None - s = s.replace("<|nospeech|><|Event_UNK|>", "❓") - for lang in lang_dict: - s = s.replace(lang, "<|lang|>") - s_list = [format_str_v2(s_i).strip(" ") for s_i in s.split("<|lang|>")] - new_s = " " + s_list[0] - cur_ent_event = get_event(new_s) - for i in range(1, len(s_list)): - if len(s_list[i]) == 0: - continue - if get_event(s_list[i]) == cur_ent_event and get_event(s_list[i]) != None: - s_list[i] = s_list[i][1:] - #else: - cur_ent_event = get_event(s_list[i]) - if get_emo(s_list[i]) != None and get_emo(s_list[i]) == get_emo(new_s): - new_s = new_s[:-1] - new_s += s_list[i].strip().lstrip() - new_s = new_s.replace("The.", " ") - return new_s.strip() - + def get_event(s): + return s[0] if s[0] in event_set else None + + s = s.replace("<|nospeech|><|Event_UNK|>", "❓") + for lang in lang_dict: + s = s.replace(lang, "<|lang|>") + s_list = [format_str_v2(s_i).strip(" ") for s_i in s.split("<|lang|>")] + new_s = " " + s_list[0] + cur_ent_event = get_event(new_s) + for i in range(1, len(s_list)): + if len(s_list[i]) == 0: + continue + if get_event(s_list[i]) == cur_ent_event and get_event(s_list[i]) != None: + s_list[i] = s_list[i][1:] + # else: + cur_ent_event = get_event(s_list[i]) + if get_emo(s_list[i]) != None and get_emo(s_list[i]) == get_emo(new_s): + new_s = new_s[:-1] + new_s += s_list[i].strip().lstrip() + new_s = new_s.replace("The.", " ") + return new_s.strip() diff --git a/funasr/version.txt b/funasr/version.txt index 9695327b3..516530316 100644 --- a/funasr/version.txt +++ b/funasr/version.txt @@ -1 +1 @@ -1.1.10 \ No newline at end of file +1.1.11 \ No newline at end of file