diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 3611383..b21c262 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -58,7 +58,8 @@ build_esp_sr_pdf:
   script:
     - cd $DOCS_DIR
     - ./check_lang_folder_sync.sh
-    - build-docs -bs latex -l $DOCLANG -t $DOCTGT
+    - pip install -r requirements.txt
+    - build-docs --skip-reqs-check -bs latex -l $DOCLANG -t $DOCTGT
   parallel:
     matrix:
       - DOCLANG: ["en", "zh_CN"]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index da313c6..33ef93c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,12 +2,13 @@
 
 ## Known issues: 
 - Available storage is less than the remaining flash space on IDF v5.0.   
-If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)`
+If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later.
 
-## unreleased
+## 1.3.1
 - Bugfix: remove all cxx11:string
-- Bugfix: remove esp-partition require for esp32s2 & esp32c3 on idf v4.4
-- Add more loader option for multinet to blance CPU and memory consumption
+- Bugfix: remove esp-partition for esp32s2 & esp32c3 on idf v4.4
+- Update multinet API to add/modify/check new commands in the code
+- Update documents to introduce how to use multinet API
 
 ## 1.3.0 
 - Update the partition APIs to keep compatible with both IDF v4.4 and IDF v5.0
diff --git a/Kconfig.projbuild b/Kconfig.projbuild
index b9118d9..dd7af8c 100644
--- a/Kconfig.projbuild
+++ b/Kconfig.projbuild
@@ -153,6 +153,7 @@ choice CHINESE_SR_MN_MODEL_SEL
     config SR_MN_CN_MULTINET6_AC_QUANT
         bool "chinese recognition for air conditioner controller (mn6_cn_ac)"
         depends on IDF_TARGET_ESP32S3
+
 endchoice
 
 choice ENGLISH_SR_MN_MODEL_SEL
diff --git a/docs/en/speech_command_recognition/README.rst b/docs/en/speech_command_recognition/README.rst
index 1bd9250..65e4d1c 100644
--- a/docs/en/speech_command_recognition/README.rst
+++ b/docs/en/speech_command_recognition/README.rst
@@ -68,9 +68,8 @@ MultiNet5 customize speech commands
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 MultiNet5 use phonemes for English speech commands. For simplicity, we use characters to denote different phonemes. Please use :project_file:`tool/multinet_g2p.py` to do the convention.  
-There are two methods to customize speech commands offline:
 
--  Via ``menuconfig``
+- Via ``menuconfig``
 
     1. Navigate to ``idf.py menuconfig`` > ``ESP Speech Recognition`` > ``Add Chinese speech commands/Add English speech commands`` to add speech commands. For details, please refer to the example in ESP-Skainet.
 
@@ -86,19 +85,122 @@ There are two methods to customize speech commands offline:
     ::
 
         /**
-        * @brief Update the speech commands of MultiNet by menuconfig
+            * @brief Update the speech commands of MultiNet by menuconfig
+            *
+            * @param multinet            The multinet handle
+            *
+            * @param model_data          The model object to query
+            *
+            * @param langugae            The language of MultiNet
+            *
+            * @return
+            *     - ESP_OK                  Success
+            *     - ESP_ERR_INVALID_STATE   Fail
+            */
+            esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data);
+
+Customize Speech Commands Via API calls
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Alternatively, speech commands can be modified via API calls, this method works for both MultiNet5 and MultiNet6.
+
+- Print active speech commands, this function will print out all speech commands that are active.
+
+    ::
+
+        /**
+        * @brief Update the speech commands of MultiNet
+        * 
+        * @Warning: Must be used after [add/remove/modify/clear] function, 
+        *           otherwise the language model of multinet can not be updated.
         *
         * @param multinet            The multinet handle
-        *
         * @param model_data          The model object to query
         *
-        * @param langugae            The language of MultiNet
+        * @return
+        *     - NULL                 Success
+        *     - others               The list of error phrase which can not be parsed by multinet.
+        */
+        esp_mn_error_t *esp_mn_commands_update();
+
+    .. note::
+        The modifications will not be applied, thus not printed out, until you call ``esp_mn_commands_update()``.
+
+- Apply new changes, the add/remove/modify/clear actions will not take effect util this function is called.
+
+    ::
+
+        /**
+        * @brief Update the speech commands of MultiNet
+        * 
+        * @Warning: Must be used after [add/remove/modify/clear] function, 
+        *           otherwise the language model of multinet can not be updated.
+        *
+        * @return
+        *     - NULL                 Success
+        *     - others               The list of error phrase which can not be parsed by multinet.
+        */
+        esp_mn_error_t *esp_mn_commands_update();
+
+
+- Add a new speech command, will return ``ESP_ERR_INVALID_STATE`` if the input string is not in the correct format.
+
+    ::
+
+        /**
+        * @brief Add one speech commands with command string and command ID
+        *
+        * @param command_id      The command ID
+        * @param string  The command string of the speech commands
         *
         * @return
         *     - ESP_OK                  Success
         *     - ESP_ERR_INVALID_STATE   Fail
         */
-        esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data);
+        esp_err_t esp_mn_commands_add(int command_id, char *string);
+
+- Remove a speech command, will return ``ESP_ERR_INVALID_STATE`` if the command does not exist.
+
+    ::
+
+        /**
+        * @brief Remove one speech commands by command string
+        *
+        * @param string  The command string of the speech commands
+        *
+        * @return
+        *     - ESP_OK                  Success
+        *     - ESP_ERR_INVALID_STATE   Fail
+        */
+        esp_err_t esp_mn_commands_remove(char *string);
+
+- Modify a speech command, will return ``ESP_ERR_INVALID_STATE`` if the command does not exist.
+
+    ::
+
+        /**
+        * @brief Modify one speech commands with new command string
+        *
+        * @param old_string  The old command string of the speech commands
+        * @param new_string  The new command string of the speech commands
+        *
+        * @return
+        *     - ESP_OK                  Success
+        *     - ESP_ERR_INVALID_STATE   Fail
+        */
+        esp_err_t esp_mn_commands_modify(char *old_string, char *new_string);
+
+- Clear all speech commands.
+
+    ::
+
+        /**
+        * @brief Clear all speech commands in linked list
+        *
+        * @return
+        *     - ESP_OK                  Success
+        *     - ESP_ERR_INVALID_STATE   Fail
+        */
+        esp_err_t esp_mn_commands_clear(void);
 
 Use MultiNet
 ------------
diff --git a/docs/zh_CN/speech_command_recognition/README.rst b/docs/zh_CN/speech_command_recognition/README.rst
index cecafa1..df04735 100644
--- a/docs/zh_CN/speech_command_recognition/README.rst
+++ b/docs/zh_CN/speech_command_recognition/README.rst
@@ -110,6 +110,109 @@ MultiNet5 定义方法：
         esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data);
 
 
+通过调用 API 修改
+~~~~~~~~~~~~~~~~~
+指令还可以通过调用 API 修改，这种方法对于 MultiNet5 和 MultiNet6 都适用。
+
+- 打印现有指令。
+
+    ::
+
+        /**
+        * @brief Update the speech commands of MultiNet
+        * 
+        * @Warning: Must be used after [add/remove/modify/clear] function, 
+        *           otherwise the language model of multinet can not be updated.
+        *
+        * @param multinet            The multinet handle
+        * @param model_data          The model object to query
+        *
+        * @return
+        *     - NULL                 Success
+        *     - others               The list of error phrase which can not be parsed by multinet.
+        */
+        esp_mn_error_t *esp_mn_commands_update();
+
+    .. note::
+        所有修改操作在调用 ``esp_mn_commands_update()`` 后才会被打印出来。
+
+- 应用新的修改操作，所有添加、移除、修改及清空操作在调用后才会被应用。
+
+    ::
+
+        /**
+        * @brief Update the speech commands of MultiNet
+        * 
+        * @Warning: Must be used after [add/remove/modify/clear] function, 
+        *           otherwise the language model of multinet can not be updated.
+        *
+        * @return
+        *     - NULL                 Success
+        *     - others               The list of error phrase which can not be parsed by multinet.
+        */
+        esp_mn_error_t *esp_mn_commands_update();
+
+
+- 添加一条新指令，如果指令格式不正确则返回 ``ESP_ERR_INVALID_STATE``。
+
+    ::
+
+        /**
+        * @brief Add one speech commands with command string and command ID
+        *
+        * @param command_id      The command ID
+        * @param string  The command string of the speech commands
+        *
+        * @return
+        *     - ESP_OK                  Success
+        *     - ESP_ERR_INVALID_STATE   Fail
+        */
+        esp_err_t esp_mn_commands_add(int command_id, char *string);
+
+- 移除一条指令，如果该指令不存在则返回 ``ESP_ERR_INVALID_STATE``。
+
+    ::
+
+        /**
+        * @brief Remove one speech commands by command string
+        *
+        * @param string  The command string of the speech commands
+        *
+        * @return
+        *     - ESP_OK                  Success
+        *     - ESP_ERR_INVALID_STATE   Fail
+        */
+        esp_err_t esp_mn_commands_remove(char *string);
+
+- 修改一条指令，如果该指令不存在则返回 ``ESP_ERR_INVALID_STATE``。
+
+    ::
+
+        /**
+        * @brief Modify one speech commands with new command string
+        *
+        * @param old_string  The old command string of the speech commands
+        * @param new_string  The new command string of the speech commands
+        *
+        * @return
+        *     - ESP_OK                  Success
+        *     - ESP_ERR_INVALID_STATE   Fail
+        */
+        esp_err_t esp_mn_commands_modify(char *old_string, char *new_string);
+
+- 清空所有指令。
+
+    ::
+
+        /**
+        * @brief Clear all speech commands in linked list
+        *
+        * @return
+        *     - ESP_OK                  Success
+        *     - ESP_ERR_INVALID_STATE   Fail
+        */
+        esp_err_t esp_mn_commands_clear(void);
+        
 MultiNet 的使用
 ----------------
 
diff --git a/idf_component.yml b/idf_component.yml
index 4fd65a6..03d8e89 100644
--- a/idf_component.yml
+++ b/idf_component.yml
@@ -1,4 +1,4 @@
-version: "1.3.0"
+version: "1.3.1"
 description: esp_sr provides basic algorithms for Speech Recognition applications
 url: https://github.com/espressif/esp-sr
 dependencies:
diff --git a/include/esp32s3/esp_mn_iface.h b/include/esp32s3/esp_mn_iface.h
index 3c6b186..faccea1 100644
--- a/include/esp32s3/esp_mn_iface.h
+++ b/include/esp32s3/esp_mn_iface.h
@@ -3,7 +3,7 @@
 #include "esp_wn_iface.h"
 
 #define ESP_MN_RESULT_MAX_NUM 5
-#define ESP_MN_MAX_PHRASE_NUM 200
+#define ESP_MN_MAX_PHRASE_NUM 400
 #define ESP_MN_MAX_PHRASE_LEN 63
 #define ESP_MN_MIN_PHRASE_LEN 2
 
@@ -12,8 +12,8 @@
 #define ESP_MN_CHINESE "cn"
 
 typedef enum {
-	ESP_MN_STATE_DETECTING = 0,     // detecting
-	ESP_MN_STATE_DETECTED = 1,      // detected
+    ESP_MN_STATE_DETECTING = 0,     // detecting
+    ESP_MN_STATE_DETECTED = 1,      // detected
     ESP_MN_STATE_TIMEOUT = 2,       // time out
 } esp_mn_state_t;
 
@@ -21,20 +21,20 @@ typedef enum {
 //The memory comsumption is decreased with increasing mode, 
 //As a consequence also the CPU loading rate goes up
 typedef enum {
-	ESP_MN_LOAD_FROM_PSRAM = 0,          // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
-	ESP_MN_LOAD_FROM_PSRAM_FLASH = 1,    // Load some weights from PSRAM and laod the rest from FLASH (default)
+    ESP_MN_LOAD_FROM_PSRAM = 0,          // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
+    ESP_MN_LOAD_FROM_PSRAM_FLASH = 1,    // Load some weights from PSRAM and laod the rest from FLASH (default)
     ESP_MN_LOAD_FROM_FLASH = 2,          // Load more weights from FLASH. Minimum memory consumption with slowest computation
 } esp_mn_loader_mode_t;
 
 typedef enum {
-	ESP_MN_GREEDY_SEARCH = 0,          // greedy search
-	ESP_MN_BEAM_SEARCH = 1,            // beam search
+    ESP_MN_GREEDY_SEARCH = 0,          // greedy search
+    ESP_MN_BEAM_SEARCH = 1,            // beam search
     ESP_MN_BEAM_SEARCH_WITH_FST = 2,  // beam search with trie language model
 } esp_mn_search_method_t;
 
 typedef enum {
-	CHINESE_ID = 1,       // Chinese language
-	ENGLISH_ID = 2,       // English language
+    CHINESE_ID = 1,       // Chinese language
+    ENGLISH_ID = 2,       // English language
 } language_id_t;
 
 // Return all possible recognition results
@@ -47,17 +47,11 @@ typedef struct{
     char string[256];
 } esp_mn_results_t;
 
-
-typedef struct{
-    int16_t num;                                // The number of error phrases, which can not added into model
-    int16_t phrase_idx[ESP_MN_MAX_PHRASE_NUM];  // The error phrase index in singly linked list．
-} esp_mn_error_t;
-
 typedef struct {
-    char phoneme_string[ESP_MN_MAX_PHRASE_LEN + 1];  // phoneme string
-    int16_t command_id;                              // the command id
-    float threshold;                                 // trigger threshold, default: 0
-    int16_t *wave;                                   // prompt wave data of the phrase
+    char *string;                               // command string
+    int16_t command_id;                         // the command id
+    float threshold;                            // trigger threshold, default: 0
+    int16_t *wave;                              // prompt wave data of the phrase
 } esp_mn_phrase_t;
 
 typedef struct _mn_node_ {
@@ -65,6 +59,11 @@ typedef struct _mn_node_ {
     struct _mn_node_ *next;
 } esp_mn_node_t;
 
+typedef struct{
+    int16_t num;                                // The number of error phrases, which can not added into model
+    esp_mn_phrase_t **phrases;                  // The array of error phrase pointer
+} esp_mn_error_t;
+
 /**
  * @brief Initialze a model instance with specified model name.
  *
@@ -181,6 +180,22 @@ typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
  */
 typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
 
+
+/**
+ * @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
+ * 
+ * @param model_data     The model object to query
+*/
+typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
+
+/**
+ * @brief Check if input string can be tokenized
+ * 
+ * @param model_data     The model object to query
+ * @param str            The input string
+*/
+typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, char *str);
+
 typedef struct {
     esp_mn_iface_op_create_t create;
     esp_mn_iface_op_get_samp_rate_t get_samp_rate;
@@ -195,4 +210,6 @@ typedef struct {
     esp_mn_iface_op_clean_t clean;
     esp_wn_iface_op_set_speech_commands set_speech_commands;
     esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
+    esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
+    esp_mn_iface_op_check_speech_command check_speech_command;
 } esp_mn_iface_t;
diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a
index 73d0429..29dd985 100644
Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ
diff --git a/lib/esp32s3/libfst.a b/lib/esp32s3/libfst.a
index b4b2f99..644c4e6 100644
Binary files a/lib/esp32s3/libfst.a and b/lib/esp32s3/libfst.a differ
diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a
index 545433e..84aa19e 100644
Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ
diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a
index 2cde79b..6dc5573 100644
Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ
diff --git a/model/multinet_model/mn6_cn/_MODEL_INFO_ b/model/multinet_model/mn6_cn/_MODEL_INFO_
index dd9fa1f..626a840 100644
--- a/model/multinet_model/mn6_cn/_MODEL_INFO_
+++ b/model/multinet_model/mn6_cn/_MODEL_INFO_
@@ -1,2 +1,2 @@
 # (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
-MN6_v1_chinese_8_0.9_0.90
+MN6_v3_chinese_8_0.9_0.90
diff --git a/model/multinet_model/mn6_cn/mn6_data b/model/multinet_model/mn6_cn/mn6_data
index 88a6e06..b0e33f6 100644
Binary files a/model/multinet_model/mn6_cn/mn6_data and b/model/multinet_model/mn6_cn/mn6_data differ
diff --git a/model/multinet_model/mn6_cn/mn6_index b/model/multinet_model/mn6_cn/mn6_index
index 6a6c504..cecbd2f 100644
Binary files a/model/multinet_model/mn6_cn/mn6_index and b/model/multinet_model/mn6_cn/mn6_index differ
diff --git a/model/multinet_model/mn6_cn_ac/mn6_data b/model/multinet_model/mn6_cn_ac/mn6_data
index b57da4c..da18bd4 100644
Binary files a/model/multinet_model/mn6_cn_ac/mn6_data and b/model/multinet_model/mn6_cn_ac/mn6_data differ
diff --git a/model/multinet_model/mn6_cn_ac/mn6_index b/model/multinet_model/mn6_cn_ac/mn6_index
index fe8da15..1cdf2f5 100644
Binary files a/model/multinet_model/mn6_cn_ac/mn6_index and b/model/multinet_model/mn6_cn_ac/mn6_index differ
diff --git a/src/esp_mn_speech_commands.c b/src/esp_mn_speech_commands.c
index 710bcf0..bcd99b8 100644
--- a/src/esp_mn_speech_commands.c
+++ b/src/esp_mn_speech_commands.c
@@ -4,9 +4,13 @@
 #include "esp_log.h"
 #include "esp_heap_caps.h"
 #include "esp_mn_speech_commands.h"
+#include "esp_mn_iface.h"
 
 static char *TAG = "MN_COMMAND";
 static esp_mn_node_t *esp_mn_root = NULL;
+static esp_mn_iface_t *esp_mn_model_handle = NULL;
+static model_iface_data_t *esp_mn_model_data = NULL;
+
 
 #define ESP_RETURN_ON_FALSE(a, err_code, log_tag, format, ...) do {                             \
         if (!(a)) {                                                                             \
@@ -15,10 +19,14 @@ static esp_mn_node_t *esp_mn_root = NULL;
         }                                                                                       \
     } while(0)
 
-esp_err_t esp_mn_commands_alloc(void)
+esp_err_t esp_mn_commands_alloc(esp_mn_iface_t *multinet, model_iface_data_t *model_data)
 {
-    ESP_RETURN_ON_FALSE(NULL == esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands already initialized");
+    if (esp_mn_root != NULL) {
+        esp_mn_commands_free();
+    }
     esp_mn_root = esp_mn_node_alloc(NULL);
+    esp_mn_model_handle = multinet;
+    esp_mn_model_data = model_data;
     return ESP_OK;
 }
 
@@ -27,6 +35,8 @@ esp_err_t esp_mn_commands_free(void)
     esp_mn_commands_clear();
     esp_mn_node_free(esp_mn_root);
     esp_mn_root = NULL;
+    esp_mn_model_handle = NULL;
+    esp_mn_model_data = NULL;
 
     return ESP_OK;
 }
@@ -57,14 +67,53 @@ esp_err_t esp_mn_commands_clear(void)
     return ESP_OK;
 }
 
-esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string)
-{
+esp_mn_node_t *esp_mn_command_search(char *string) {
+    int command_id;
     esp_mn_node_t *temp = esp_mn_root;
     ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized");
-    int last_node_elem_num = esp_mn_commands_num();
-    ESP_RETURN_ON_FALSE(ESP_MN_MAX_PHRASE_NUM >= last_node_elem_num, ESP_ERR_INVALID_STATE, TAG, "The number of speech commands phrase must less than 200");
 
-    esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, phoneme_string);
+    while (temp->next) {
+        temp = temp->next;
+        if (strcmp(string, temp->phrase->string) == 0) {
+            return temp;
+        }
+    }
+    return NULL;
+}
+
+esp_err_t esp_mn_commands_add(int command_id, char *string)
+{
+    if (NULL == esp_mn_root || esp_mn_model_handle == NULL || esp_mn_model_data == NULL) {
+        ESP_LOGE(TAG, "Please create mn model first.\n");
+        return ESP_ERR_INVALID_STATE;
+    }
+    esp_mn_node_t *temp = esp_mn_root;
+    int last_node_elem_num = esp_mn_commands_num();
+    ESP_RETURN_ON_FALSE(ESP_MN_MAX_PHRASE_NUM >= last_node_elem_num, ESP_ERR_INVALID_STATE, TAG, "The number of speech commands exceed ESP_MN_MAX_PHRASE_NUM");
+
+    if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, string) == 0) {
+        // error message is printed inside check_speech_command
+        ESP_LOGE(TAG, "invalid command, please check format, %s.\n", string);
+        return ESP_ERR_INVALID_STATE;
+    }
+
+    temp = esp_mn_command_search(string);
+
+    if (temp != NULL) {
+        // command already exists
+        if (command_id != temp->phrase->command_id) {
+            // change command id
+            temp->phrase->command_id = command_id;
+        } else {
+            // it's exactly the same, do nothing
+            ESP_LOGI(TAG, "command %d: (%s) already exists.", command_id, string);
+        }
+        return ESP_OK;
+    }
+
+    temp = esp_mn_root;
+
+    esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, string);
     if (phrase == NULL) {
         return ESP_ERR_INVALID_STATE;
     }
@@ -78,48 +127,43 @@ esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string)
     return ESP_OK;
 }
 
-esp_err_t esp_mn_commands_modify(char *old_phoneme_string, char *new_phoneme_string)
+esp_err_t esp_mn_commands_modify(char *old_string, char *new_string)
 {
+    if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, new_string) == 0) {
+        // error message is printed inside check_speech_command
+        return ESP_ERR_INVALID_STATE;
+    }
     esp_mn_node_t *temp = esp_mn_root;
     ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized");
 
-    // search old phoneme_string to get command id
-    bool flag = false;
-    int command_id;
-    while (temp->next) {
-        temp = temp->next;
-        if (strcmp(old_phoneme_string, temp->phrase->phoneme_string) == 0) {
-            command_id = temp->phrase->command_id;
-            flag = true;
-            break;
-        }
-    }
+    // search old string to get command id
+    temp = esp_mn_command_search(old_string);
 
     // replace old phrase with new phrase
-    if (flag) {
-        esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, new_phoneme_string);
+    if (temp != NULL) {
+        esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(temp->phrase->command_id, new_string);
         if (phrase == NULL) {
             return ESP_ERR_INVALID_STATE;
         }
         esp_mn_phrase_free(temp->phrase);
         temp->phrase = phrase;
     } else {
-        ESP_LOGE(TAG, "No such speech command: \"%s\"", old_phoneme_string);
+        ESP_LOGE(TAG, "No such speech command: \"%s\"", old_string);
         return ESP_ERR_INVALID_STATE;
     }
 
     return ESP_OK;
 }
 
-esp_err_t esp_mn_commands_remove(char *phoneme_string)
+esp_err_t esp_mn_commands_remove(char *string)
 {
     esp_mn_node_t *temp = esp_mn_root;
     ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized");
 
-    // search phoneme_string to get node point
+    // search string to get node point
     bool flag = false;
     while (temp->next) {
-        if (strcmp(phoneme_string, temp->next->phrase->phoneme_string) == 0) {
+        if (strcmp(string, temp->next->phrase->string) == 0) {
             flag = true;
             break;
         }
@@ -132,7 +176,7 @@ esp_err_t esp_mn_commands_remove(char *phoneme_string)
         esp_mn_node_free(rm_node);
         return ESP_OK;
     } else {
-        ESP_LOGE(TAG, "No such speech command: \"%s\"", phoneme_string);
+        ESP_LOGE(TAG, "No such speech command: \"%s\"", string);
         return ESP_ERR_INVALID_STATE;
     }
 
@@ -155,14 +199,14 @@ esp_mn_phrase_t *esp_mn_commands_get_from_index(int index)
     return temp->phrase;
 }
 
-esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string)
+esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *string)
 {
     ESP_RETURN_ON_FALSE(NULL != esp_mn_root, NULL, TAG, "The mn commands is not initialized");
 
     // phrase index also is phrase id, which is the depth from this phrase node to root node
     esp_mn_node_t *temp = esp_mn_root;
     while (temp->next) {
-        if (strcmp(phoneme_string, temp->next->phrase->phoneme_string) == 0) {
+        if (strcmp(string, temp->next->phrase->string) == 0) {
             return temp->next->phrase;
         }
         temp = temp->next;
@@ -171,10 +215,10 @@ esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string)
     return NULL;
 }
 
-esp_mn_error_t *esp_mn_commands_update(const esp_mn_iface_t *multinet, model_iface_data_t *model_data)
+esp_mn_error_t *esp_mn_commands_update()
 {
     ESP_RETURN_ON_FALSE(NULL != esp_mn_root, NULL, TAG, "The mn commands is not initialize");
-    esp_mn_error_t *error = multinet->set_speech_commands(model_data, esp_mn_root);
+    esp_mn_error_t *error = esp_mn_model_handle->set_speech_commands(esp_mn_model_data, esp_mn_root);
 
     if (error->num == 0) {
         return NULL;
@@ -190,7 +234,7 @@ void esp_mn_commands_print(void)
     int phrase_id = 0;
     while (temp->next) {
         temp = temp->next;
-        ESP_LOGI(TAG, "Command ID%d, phrase ID%d: %s", temp->phrase->command_id, phrase_id, temp->phrase->phoneme_string);
+        ESP_LOGI(TAG, "Command ID%d, phrase ID%d: %s", temp->phrase->command_id, phrase_id, temp->phrase->string);
         phrase_id++;
     }
     ESP_LOGI(TAG, "---------------------------------------------------------\n");
@@ -205,21 +249,21 @@ void *_esp_mn_calloc_(int n, int size)
 #endif
 }
 
-esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string)
+esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *string)
 {
 
-    int phoneme_string_len = strlen(phoneme_string);
-    if (phoneme_string_len > ESP_MN_MAX_PHRASE_LEN || phoneme_string_len < 1) {
-        ESP_LOGE(TAG, "The Length of \"%s\" > ESP_MN_MAX_PHRASE_LEN", phoneme_string);
-        return NULL;
-    }
+    int string_len = strlen(string);
+    ESP_RETURN_ON_FALSE( string_len > 0, NULL, TAG, "input string is empty");
 
     esp_mn_phrase_t *phrase = _esp_mn_calloc_(1, sizeof(esp_mn_phrase_t));
     ESP_RETURN_ON_FALSE(NULL != phrase, NULL, TAG, "Fail to alloc mn phrase");
+
+    phrase->string = malloc((string_len+1) * sizeof(char));
+    memcpy(phrase->string, string, string_len);
+    phrase->string[string_len] = '\0';
     phrase->command_id = command_id;
     phrase->threshold = 0;
     phrase->wave = NULL;
-    memcpy(phrase->phoneme_string, phoneme_string, phoneme_string_len);
 
     return phrase;
 }
@@ -227,6 +271,12 @@ esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string)
 void esp_mn_phrase_free(esp_mn_phrase_t *phrase)
 {
     if (phrase != NULL) {
+        if (phrase->wave != NULL) {
+            free(phrase->wave);
+        }
+        if (phrase->string != NULL) {
+            free(phrase->string);
+        }
         free(phrase);
     }
 }
diff --git a/src/esp_process_sdkconfig.c b/src/esp_process_sdkconfig.c
index f38aa40..5b19fb1 100644
--- a/src/esp_process_sdkconfig.c
+++ b/src/esp_process_sdkconfig.c
@@ -876,11 +876,11 @@ char *get_id_name_en(int i)
 
 esp_mn_error_t *esp_mn_commands_update_from_sdkconfig(const esp_mn_iface_t *multinet,  model_iface_data_t *model_data)
 {
-#if defined CONFIG_SR_MN_CN_MULTINET6_QUANT || defined CONFIG_SR_MN_EN_MULTINET6_QUANT
+#if defined CONFIG_SR_MN_CN_MULTINET6_QUANT || defined CONFIG_SR_MN_EN_MULTINET6_QUANT || defined CONFIG_SR_MN_CN_MULTINET6_AC_QUANT
     return NULL;
 #endif
 
-    esp_mn_commands_alloc();
+    esp_mn_commands_alloc(multinet, model_data);
     printf("esp_mn_commands_update_from_sdkconfig\n");
     int total_phrase_num = 0;
     int language_id = 1; // 0: Chinese, 1:English
@@ -939,4 +939,4 @@ end:
     esp_mn_commands_print();
 
     return esp_mn_commands_update(multinet, model_data);
-}
\ No newline at end of file
+}
diff --git a/src/include/esp_mn_speech_commands.h b/src/include/esp_mn_speech_commands.h
index c7b2927..ba99c55 100644
--- a/src/include/esp_mn_speech_commands.h
+++ b/src/include/esp_mn_speech_commands.h
@@ -29,7 +29,7 @@ It is easy to add one speech command into linked list and remove one speech comm
  *     - ESP_ERR_NO_MEM          No memory
  *     - ESP_ERR_INVALID_STATE   The Speech Commands link has been initialized
  */
-esp_err_t esp_mn_commands_alloc(void);
+esp_err_t esp_mn_commands_alloc(esp_mn_iface_t *multinet, model_iface_data_t *model_data);
 
 /**
  * @brief Clear the speech commands linked list and free root node.
@@ -41,39 +41,39 @@ esp_err_t esp_mn_commands_alloc(void);
 esp_err_t esp_mn_commands_free(void);
 
 /**
- * @brief Add one speech commands with phoneme string and command ID
+ * @brief Add one speech commands with command string and command ID
  *
  * @param command_id      The command ID
- * @param phoneme_string  The phoneme string of the speech commands
+ * @param string  The command string of the speech commands
  *
  * @return
  *     - ESP_OK                  Success
  *     - ESP_ERR_INVALID_STATE   Fail
  */
-esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string);
+esp_err_t esp_mn_commands_add(int command_id, char *string);
 
 /**
- * @brief Modify one speech commands with new phoneme string
+ * @brief Modify one speech commands with new command string
  *
- * @param old_phoneme_string  The old phoneme string of the speech commands
- * @param new_phoneme_string  The new phoneme string of the speech commands
+ * @param old_string  The old command string of the speech commands
+ * @param new_string  The new command string of the speech commands
  *
  * @return
  *     - ESP_OK                  Success
  *     - ESP_ERR_INVALID_STATE   Fail
  */
-esp_err_t esp_mn_commands_modify(char *old_phoneme_string, char *new_phoneme_string);
+esp_err_t esp_mn_commands_modify(char *old_string, char *new_string);
 
 /**
- * @brief Remove one speech commands by phoneme string
+ * @brief Remove one speech commands by command string
  *
- * @param phoneme_string  The phoneme string of the speech commands
+ * @param string  The command string of the speech commands
  *
  * @return
  *     - ESP_OK                  Success
  *     - ESP_ERR_INVALID_STATE   Fail
  */
-esp_err_t esp_mn_commands_remove(char *phoneme_string);
+esp_err_t esp_mn_commands_remove(char *string);
 
 /**
  * @brief Clear all speech commands in linked list
@@ -96,40 +96,32 @@ esp_err_t esp_mn_commands_clear(void);
 esp_mn_phrase_t *esp_mn_commands_get_from_index(int index);
 
 /**
- * @brief Get phrase from phoneme string
+ * @brief Get phrase from command string
  *
  * @return
  *     - esp_mn_phrase_t*        Success
  *     - NULL                    Fail
  */
-esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string);
+esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *string);
 
 /**
  * @brief Update the speech commands of MultiNet
  * 
  * @Warning: Must be used after [add/remove/modify/clear] function, 
  *           otherwise the language model of multinet can not be updated.
- *
- * @param multinet            The multinet handle
- * @param model_data          The model object to query
- *
+ * 
  * @return
  *     - NULL                 Success
  *     - others               The list of error phrase which can not be parsed by multinet.
  */
-esp_mn_error_t *esp_mn_commands_update(const esp_mn_iface_t *multinet, model_iface_data_t *model_data);
+esp_mn_error_t *esp_mn_commands_update();
 
 /**
- * @brief Print the MultiNet Speech Commands.
- */
-void esp_mn_print_commands(void);
-
-/**
- * @brief Initialze the esp_mn_phrase_t struct by command id and phoneme string .
+ * @brief Initialze the esp_mn_phrase_t struct by command id and command string .
  *
  * @return the pointer of esp_mn_phrase_t
  */
-esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string);
+esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *string);
 
 /**
  * @brief Free esp_mn_phrase_t pointer.
diff --git a/src/include/model_path.h b/src/include/model_path.h
index 9f4f6de..944d76a 100644
--- a/src/include/model_path.h
+++ b/src/include/model_path.h
@@ -16,7 +16,9 @@ typedef struct {
 
 typedef struct {
     char **model_name;                        // the name of models, like "wn9_hilexin"(wakenet9, hilexin), "mn5_en"(multinet5, english)
+#ifdef ESP_PLATFORM
     esp_partition_t *partition;               // partition label used to save the files of model
+#endif
     void * mmap_handle;                       // mmap_handle if using esp_partition_mmap else NULL; 
     int num;                                  // the number of models
     srmodel_data_t **model_data;              // the model data , NULL if spiffs format
@@ -75,7 +77,9 @@ int esp_srmodel_exists(srmodel_list_t *models, char *model_name);
  *
  * @return all avaliable models in spiffs,save as srmodel_list_t.
  */
+#ifdef ESP_PLATFORM
 srmodel_list_t *srmodel_spiffs_init(const esp_partition_t *part);
+#endif
 
 /**
  * @brief unregister SPIFFS filesystem and free srmodel_list_t.
diff --git a/src/model_path.c b/src/model_path.c
index da9916a..1258e0c 100644
--- a/src/model_path.c
+++ b/src/model_path.c
@@ -32,7 +32,9 @@ static srmodel_list_t *srmodel_list_alloc(void)
     models->model_data = NULL;
     models->model_name = NULL;
     models->num = 0;
+#ifdef ESP_PLATFORM
     models->partition = NULL;
+#endif
     models->mmap_handle = NULL;
 
     return models;
@@ -431,7 +433,9 @@ srmodel_list_t *srmodel_sdcard_init(const char *base_path)
             return models;
         } else {
             models->num = model_num;
+#ifdef ESP_PLATFORM
             models->partition = NULL;
+#endif
             models->model_name = malloc(models->num * sizeof(char *));
             for (int i = 0; i < models->num; i++) {
                 models->model_name[i] = (char *) calloc(MODEL_NAME_MAX_LENGTH, sizeof(char));