feat(mn6): Update model and multinet_g2p.py

2025-09-15 15:28:44 +08:00 · 2023-01-11 17:17:27 +08:00 · 2023-01-11 17:17:27 +08:00 · 6242e861c1
commit 6242e861c1
parent b4cb56808e
16 changed files with 1098 additions and 13 deletions
--- a/Kconfig.projbuild
+++ b/Kconfig.projbuild
--- a/include/esp32s3/esp_mn_iface.h
+++ b/include/esp32s3/esp_mn_iface.h
@ -18,9 +18,9 @@ typedef enum {
 } esp_mn_state_t;

 typedef enum {
-	ESP_MN_GREEDY_SEARCH = 0,        // greedy search
-	ESP_MN_BEAM_SEARCH = 1,          // beam search
-    ESP_MN_BEAM_SEARCH_WIRH_LM = 2,  // beam search with language model
+	ESP_MN_GREEDY_SEARCH = 0,          // greedy search
+	ESP_MN_BEAM_SEARCH = 1,            // beam search
+    ESP_MN_BEAM_SEARCH_WITH_TRIE = 2,  // beam search with trie language model
 } esp_mn_search_method_t;

 typedef enum {
--- a/lib/esp32s3/libc_speech_features.a
+++ b/lib/esp32s3/libc_speech_features.a
--- a/lib/esp32s3/libdl_lib.a
+++ b/lib/esp32s3/libdl_lib.a
--- a/lib/esp32s3/libesp_audio_front_end.a
+++ b/lib/esp32s3/libesp_audio_front_end.a
--- a/lib/esp32s3/libhufzip.a
+++ b/lib/esp32s3/libhufzip.a
--- a/lib/esp32s3/libmultinet.a
+++ b/lib/esp32s3/libmultinet.a
--- a/lib/esp32s3/libwakenet.a
+++ b/lib/esp32s3/libwakenet.a
--- a/model/wakenet_model/wn9_customword/_MODEL_INFO_
+++ b/model/wakenet_model/wn9_customword/_MODEL_INFO_
@ -1,2 +1,2 @@
 # (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
-wakeNet9l_v2h8_bangwotiaocheng_3_0.98_0.99_bangwodakai_3_0.98_0.99_bangwoguanbi_3_0.98_0.99
+wakenet9l_v5h8_jiuming_3_0.630_0.634
--- a/model/wakenet_model/wn9_customword/wn9_data
+++ b/model/wakenet_model/wn9_customword/wn9_data
--- a/model/wakenet_model/wn9_customword/wn9_index
+++ b/model/wakenet_model/wn9_customword/wn9_index
--- a/src/esp_process_sdkconfig.c
+++ b/src/esp_process_sdkconfig.c
@ -464,7 +464,7 @@ char *get_id_name_cn(int i)

 char *get_id_name_en(int i)
 {
-#if defined CONFIG_USE_MULTINET && defined CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8
+#if defined CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8 || defined CONFIG_SR_MN_EN_MULTINET6_QUANT
    if (i == 0)
        return CONFIG_EN_SPEECH_COMMAND_ID0;
    else if (i == 1)
--- a/tool/README.md
+++ b/tool/README.md
@ -2,14 +2,17 @@



-####  1. Install g2p_en, please refer to https://pypi.org/project/g2p-en/ 
+####  1. Install g2p_en and sentencepiece

 ```
 pip install g2p_en
+pip install sentencepiece
 ```

 #### 2. Run multinet_g2p.py

+##### 2.1 multinet5 (units:phoneme)
+
 ```
 python multinet_g2p.py -t "hello world,hi ESP;turn on the light;turn off the light"

@ -18,6 +21,13 @@ in: hello world,hi ESP;turn on the light;turn off the light
 out: hcLb WkLD,hi fST;TkN nN jc LiT;TkN eF jc LiT;
 ```

+##### 2.2 multinet6 (units:subword)
+```
+python multinet_g2p.py -m bpe.model -t "hello world,hi ESP;turn on the light;turn off the light"
+in: hello world,hi ESP;turn on the light;turn off the light
+out: _HE LL O _WORLD , H I _E S P ; T UR N _ON _THE _ L IGHT ; T UR N _OF F _THE _ L IGHT
+```
+
 #### 3. Add speech commands 

 ##### 3.1 add speech commands by menuconfig
--- a/tool/bpe.model
+++ b/tool/bpe.model
--- a/tool/multinet_g2p.py
+++ b/tool/multinet_g2p.py
@ -1,7 +1,8 @@
 from g2p_en import G2p
 import argparse
 import numpy as np
-import pandas
+import pandas as pd
+import sentencepiece as spm

 def english_g2p(text, alphabet=None):
    g2p = G2p()
@ -16,7 +17,7 @@ def english_g2p(text, alphabet=None):
        for phrase in item:
            labels = g2p(phrase)
            for char in labels:
-                if char not in alphabet:
+                if item not in alphabet:
                    print("skip %s, not found in alphabet")
                    continue
                else:
@ -30,16 +31,65 @@ def english_g2p(text, alphabet=None):
    
    return out

+
+def spm_encode_text(text, sp, enable_sampling=False):
+    text = text.upper()
+    def text_norm(str_list):
+        out_str = ''
+        for i in range(len(out)):
+            out_str += out[i].replace("▁", "_")
+            out_str += ' '
+        return out_str[:-1]
+    out_str = ''
+    if enable_sampling:
+        for i in range(5):
+            out = sp.encode(text, out_type=str, nbest_size=-1, enable_sampling=True, alpha=0.01)
+            out_str += text_norm(out) + ","
+    else:
+        out = sp.encode(text, out_type=str, enable_sampling=enable_sampling)
+        out_str += text_norm(out)
+
+    return out_str
+
+
+def encode_text(text, bpe_model_file, enable_sampling=False):
+    sp = spm.SentencePieceProcessor()
+    sp.load(bpe_model_file)
+    text = text
+    out_str = spm_encode_text(text, sp, enable_sampling)
+    print("in:", text)
+    print("out:", out_str)
+
+    return out_str
+
+def encode_csv(csv_file, bpe_model_file, enable_sampling=False):
+    text_list = pd.read_csv(csv_file).values
+    sp = spm.SentencePieceProcessor()
+    sp.load(bpe_model_file)
+    out_str = ""
+
+    for text in text_list:
+        in_text = text[1]
+        out_str += spm_encode_text(in_text, sp, enable_sampling)
+        out_str += "\n"
+
+    print(out_str)
+
+
 if __name__ == "__main__":

    parser = argparse.ArgumentParser(prog="English Speech Commands G2P")

    parser.add_argument("--text", "-t", type=str,  default=None, help="input text")
-    parser.add_argument("--alphabet_map", "-a", type=str,  default=None, help="the json file to map label into classes of model")
-    parser.add_argument('-c', '--c_file', help="name of .c files")
-    parser.add_argument('-head', '--h_file', help="name of .h files")
+    parser.add_argument("--bpe_model", "-m", type=str,  default=None, help="bpe model path")
+    parser.add_argument("--enable_sampling", "-s", action="store_true", help="enable encode sampling, output more segmentation")
+    parser.add_argument("--csv_file", "-c", type=str,  default=None, help="text csv file")
    args = parser.parse_args()
    
-    if args.text is not None:
+    if args.bpe_model is not None:
+        if args.text is not None:
+            encode_text(args.text, args.bpe_model, args.enable_sampling)
+        else:
+            encode_csv(args.csv_file, args.bpe_model, args.enable_sampling)
+    else:
        english_g2p(args.text, args.alphabet_map)
-
--- a/tool/requirements
+++ b/tool/requirements
@ -1 +1,2 @@
 g2p-en
+sentencepiece