diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3611383..b21c262 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -58,7 +58,8 @@ build_esp_sr_pdf: script: - cd $DOCS_DIR - ./check_lang_folder_sync.sh - - build-docs -bs latex -l $DOCLANG -t $DOCTGT + - pip install -r requirements.txt + - build-docs --skip-reqs-check -bs latex -l $DOCLANG -t $DOCTGT parallel: matrix: - DOCLANG: ["en", "zh_CN"] diff --git a/CHANGELOG.md b/CHANGELOG.md index da313c6..33ef93c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,13 @@ ## Known issues: - Available storage is less than the remaining flash space on IDF v5.0. -If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` +If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later. -## unreleased +## 1.3.1 - Bugfix: remove all cxx11:string -- Bugfix: remove esp-partition require for esp32s2 & esp32c3 on idf v4.4 -- Add more loader option for multinet to blance CPU and memory consumption +- Bugfix: remove esp-partition for esp32s2 & esp32c3 on idf v4.4 +- Update multinet API to add/modify/check new commands in the code +- Update documents to introduce how to use multinet API ## 1.3.0 - Update the partition APIs to keep compatible with both IDF v4.4 and IDF v5.0 diff --git a/Kconfig.projbuild b/Kconfig.projbuild index b9118d9..dd7af8c 100644 --- a/Kconfig.projbuild +++ b/Kconfig.projbuild @@ -153,6 +153,7 @@ choice CHINESE_SR_MN_MODEL_SEL config SR_MN_CN_MULTINET6_AC_QUANT bool "chinese recognition for air conditioner controller (mn6_cn_ac)" depends on IDF_TARGET_ESP32S3 + endchoice choice ENGLISH_SR_MN_MODEL_SEL diff --git a/docs/en/speech_command_recognition/README.rst b/docs/en/speech_command_recognition/README.rst index 1bd9250..65e4d1c 100644 --- a/docs/en/speech_command_recognition/README.rst +++ b/docs/en/speech_command_recognition/README.rst @@ -68,9 +68,8 @@ MultiNet5 customize speech commands ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MultiNet5 use phonemes for English speech commands. For simplicity, we use characters to denote different phonemes. Please use :project_file:`tool/multinet_g2p.py` to do the convention. -There are two methods to customize speech commands offline: -- Via ``menuconfig`` +- Via ``menuconfig`` 1. Navigate to ``idf.py menuconfig`` > ``ESP Speech Recognition`` > ``Add Chinese speech commands/Add English speech commands`` to add speech commands. For details, please refer to the example in ESP-Skainet. @@ -86,19 +85,122 @@ There are two methods to customize speech commands offline: :: /** - * @brief Update the speech commands of MultiNet by menuconfig + * @brief Update the speech commands of MultiNet by menuconfig + * + * @param multinet The multinet handle + * + * @param model_data The model object to query + * + * @param langugae The language of MultiNet + * + * @return + * - ESP_OK Success + * - ESP_ERR_INVALID_STATE Fail + */ + esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data); + +Customize Speech Commands Via API calls +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Alternatively, speech commands can be modified via API calls, this method works for both MultiNet5 and MultiNet6. + +- Print active speech commands, this function will print out all speech commands that are active. + + :: + + /** + * @brief Update the speech commands of MultiNet + * + * @Warning: Must be used after [add/remove/modify/clear] function, + * otherwise the language model of multinet can not be updated. * * @param multinet The multinet handle - * * @param model_data The model object to query * - * @param langugae The language of MultiNet + * @return + * - NULL Success + * - others The list of error phrase which can not be parsed by multinet. + */ + esp_mn_error_t *esp_mn_commands_update(); + + .. note:: + The modifications will not be applied, thus not printed out, until you call ``esp_mn_commands_update()``. + +- Apply new changes, the add/remove/modify/clear actions will not take effect util this function is called. + + :: + + /** + * @brief Update the speech commands of MultiNet + * + * @Warning: Must be used after [add/remove/modify/clear] function, + * otherwise the language model of multinet can not be updated. + * + * @return + * - NULL Success + * - others The list of error phrase which can not be parsed by multinet. + */ + esp_mn_error_t *esp_mn_commands_update(); + + +- Add a new speech command, will return ``ESP_ERR_INVALID_STATE`` if the input string is not in the correct format. + + :: + + /** + * @brief Add one speech commands with command string and command ID + * + * @param command_id The command ID + * @param string The command string of the speech commands * * @return * - ESP_OK Success * - ESP_ERR_INVALID_STATE Fail */ - esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data); + esp_err_t esp_mn_commands_add(int command_id, char *string); + +- Remove a speech command, will return ``ESP_ERR_INVALID_STATE`` if the command does not exist. + + :: + + /** + * @brief Remove one speech commands by command string + * + * @param string The command string of the speech commands + * + * @return + * - ESP_OK Success + * - ESP_ERR_INVALID_STATE Fail + */ + esp_err_t esp_mn_commands_remove(char *string); + +- Modify a speech command, will return ``ESP_ERR_INVALID_STATE`` if the command does not exist. + + :: + + /** + * @brief Modify one speech commands with new command string + * + * @param old_string The old command string of the speech commands + * @param new_string The new command string of the speech commands + * + * @return + * - ESP_OK Success + * - ESP_ERR_INVALID_STATE Fail + */ + esp_err_t esp_mn_commands_modify(char *old_string, char *new_string); + +- Clear all speech commands. + + :: + + /** + * @brief Clear all speech commands in linked list + * + * @return + * - ESP_OK Success + * - ESP_ERR_INVALID_STATE Fail + */ + esp_err_t esp_mn_commands_clear(void); Use MultiNet ------------ diff --git a/docs/zh_CN/speech_command_recognition/README.rst b/docs/zh_CN/speech_command_recognition/README.rst index cecafa1..df04735 100644 --- a/docs/zh_CN/speech_command_recognition/README.rst +++ b/docs/zh_CN/speech_command_recognition/README.rst @@ -110,6 +110,109 @@ MultiNet5 定义方法: esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data); +通过调用 API 修改 +~~~~~~~~~~~~~~~~~ +指令还可以通过调用 API 修改,这种方法对于 MultiNet5 和 MultiNet6 都适用。 + +- 打印现有指令。 + + :: + + /** + * @brief Update the speech commands of MultiNet + * + * @Warning: Must be used after [add/remove/modify/clear] function, + * otherwise the language model of multinet can not be updated. + * + * @param multinet The multinet handle + * @param model_data The model object to query + * + * @return + * - NULL Success + * - others The list of error phrase which can not be parsed by multinet. + */ + esp_mn_error_t *esp_mn_commands_update(); + + .. note:: + 所有修改操作在调用 ``esp_mn_commands_update()`` 后才会被打印出来。 + +- 应用新的修改操作,所有添加、移除、修改及清空操作在调用后才会被应用。 + + :: + + /** + * @brief Update the speech commands of MultiNet + * + * @Warning: Must be used after [add/remove/modify/clear] function, + * otherwise the language model of multinet can not be updated. + * + * @return + * - NULL Success + * - others The list of error phrase which can not be parsed by multinet. + */ + esp_mn_error_t *esp_mn_commands_update(); + + +- 添加一条新指令,如果指令格式不正确则返回 ``ESP_ERR_INVALID_STATE``。 + + :: + + /** + * @brief Add one speech commands with command string and command ID + * + * @param command_id The command ID + * @param string The command string of the speech commands + * + * @return + * - ESP_OK Success + * - ESP_ERR_INVALID_STATE Fail + */ + esp_err_t esp_mn_commands_add(int command_id, char *string); + +- 移除一条指令,如果该指令不存在则返回 ``ESP_ERR_INVALID_STATE``。 + + :: + + /** + * @brief Remove one speech commands by command string + * + * @param string The command string of the speech commands + * + * @return + * - ESP_OK Success + * - ESP_ERR_INVALID_STATE Fail + */ + esp_err_t esp_mn_commands_remove(char *string); + +- 修改一条指令,如果该指令不存在则返回 ``ESP_ERR_INVALID_STATE``。 + + :: + + /** + * @brief Modify one speech commands with new command string + * + * @param old_string The old command string of the speech commands + * @param new_string The new command string of the speech commands + * + * @return + * - ESP_OK Success + * - ESP_ERR_INVALID_STATE Fail + */ + esp_err_t esp_mn_commands_modify(char *old_string, char *new_string); + +- 清空所有指令。 + + :: + + /** + * @brief Clear all speech commands in linked list + * + * @return + * - ESP_OK Success + * - ESP_ERR_INVALID_STATE Fail + */ + esp_err_t esp_mn_commands_clear(void); + MultiNet 的使用 ---------------- diff --git a/idf_component.yml b/idf_component.yml index 4fd65a6..03d8e89 100644 --- a/idf_component.yml +++ b/idf_component.yml @@ -1,4 +1,4 @@ -version: "1.3.0" +version: "1.3.1" description: esp_sr provides basic algorithms for Speech Recognition applications url: https://github.com/espressif/esp-sr dependencies: diff --git a/include/esp32s3/esp_mn_iface.h b/include/esp32s3/esp_mn_iface.h index 3c6b186..faccea1 100644 --- a/include/esp32s3/esp_mn_iface.h +++ b/include/esp32s3/esp_mn_iface.h @@ -3,7 +3,7 @@ #include "esp_wn_iface.h" #define ESP_MN_RESULT_MAX_NUM 5 -#define ESP_MN_MAX_PHRASE_NUM 200 +#define ESP_MN_MAX_PHRASE_NUM 400 #define ESP_MN_MAX_PHRASE_LEN 63 #define ESP_MN_MIN_PHRASE_LEN 2 @@ -12,8 +12,8 @@ #define ESP_MN_CHINESE "cn" typedef enum { - ESP_MN_STATE_DETECTING = 0, // detecting - ESP_MN_STATE_DETECTED = 1, // detected + ESP_MN_STATE_DETECTING = 0, // detecting + ESP_MN_STATE_DETECTED = 1, // detected ESP_MN_STATE_TIMEOUT = 2, // time out } esp_mn_state_t; @@ -21,20 +21,20 @@ typedef enum { //The memory comsumption is decreased with increasing mode, //As a consequence also the CPU loading rate goes up typedef enum { - ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption - ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default) + ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption + ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default) ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation } esp_mn_loader_mode_t; typedef enum { - ESP_MN_GREEDY_SEARCH = 0, // greedy search - ESP_MN_BEAM_SEARCH = 1, // beam search + ESP_MN_GREEDY_SEARCH = 0, // greedy search + ESP_MN_BEAM_SEARCH = 1, // beam search ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model } esp_mn_search_method_t; typedef enum { - CHINESE_ID = 1, // Chinese language - ENGLISH_ID = 2, // English language + CHINESE_ID = 1, // Chinese language + ENGLISH_ID = 2, // English language } language_id_t; // Return all possible recognition results @@ -47,17 +47,11 @@ typedef struct{ char string[256]; } esp_mn_results_t; - -typedef struct{ - int16_t num; // The number of error phrases, which can not added into model - int16_t phrase_idx[ESP_MN_MAX_PHRASE_NUM]; // The error phrase index in singly linked list. -} esp_mn_error_t; - typedef struct { - char phoneme_string[ESP_MN_MAX_PHRASE_LEN + 1]; // phoneme string - int16_t command_id; // the command id - float threshold; // trigger threshold, default: 0 - int16_t *wave; // prompt wave data of the phrase + char *string; // command string + int16_t command_id; // the command id + float threshold; // trigger threshold, default: 0 + int16_t *wave; // prompt wave data of the phrase } esp_mn_phrase_t; typedef struct _mn_node_ { @@ -65,6 +59,11 @@ typedef struct _mn_node_ { struct _mn_node_ *next; } esp_mn_node_t; +typedef struct{ + int16_t num; // The number of error phrases, which can not added into model + esp_mn_phrase_t **phrases; // The array of error phrase pointer +} esp_mn_error_t; + /** * @brief Initialze a model instance with specified model name. * @@ -181,6 +180,22 @@ typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data); */ typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root); + +/** + * @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here + * + * @param model_data The model object to query +*/ +typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data); + +/** + * @brief Check if input string can be tokenized + * + * @param model_data The model object to query + * @param str The input string +*/ +typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, char *str); + typedef struct { esp_mn_iface_op_create_t create; esp_mn_iface_op_get_samp_rate_t get_samp_rate; @@ -195,4 +210,6 @@ typedef struct { esp_mn_iface_op_clean_t clean; esp_wn_iface_op_set_speech_commands set_speech_commands; esp_mn_iface_op_switch_loader_mode_t switch_loader_mode; + esp_mn_iface_op_print_active_speech_commands print_active_speech_commands; + esp_mn_iface_op_check_speech_command check_speech_command; } esp_mn_iface_t; diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a index 73d0429..29dd985 100644 Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ diff --git a/lib/esp32s3/libfst.a b/lib/esp32s3/libfst.a index b4b2f99..644c4e6 100644 Binary files a/lib/esp32s3/libfst.a and b/lib/esp32s3/libfst.a differ diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a index 545433e..84aa19e 100644 Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a index 2cde79b..6dc5573 100644 Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ diff --git a/model/multinet_model/mn6_cn/_MODEL_INFO_ b/model/multinet_model/mn6_cn/_MODEL_INFO_ index dd9fa1f..626a840 100644 --- a/model/multinet_model/mn6_cn/_MODEL_INFO_ +++ b/model/multinet_model/mn6_cn/_MODEL_INFO_ @@ -1,2 +1,2 @@ # (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_... -MN6_v1_chinese_8_0.9_0.90 +MN6_v3_chinese_8_0.9_0.90 diff --git a/model/multinet_model/mn6_cn/mn6_data b/model/multinet_model/mn6_cn/mn6_data index 88a6e06..b0e33f6 100644 Binary files a/model/multinet_model/mn6_cn/mn6_data and b/model/multinet_model/mn6_cn/mn6_data differ diff --git a/model/multinet_model/mn6_cn/mn6_index b/model/multinet_model/mn6_cn/mn6_index index 6a6c504..cecbd2f 100644 Binary files a/model/multinet_model/mn6_cn/mn6_index and b/model/multinet_model/mn6_cn/mn6_index differ diff --git a/model/multinet_model/mn6_cn_ac/mn6_data b/model/multinet_model/mn6_cn_ac/mn6_data index b57da4c..da18bd4 100644 Binary files a/model/multinet_model/mn6_cn_ac/mn6_data and b/model/multinet_model/mn6_cn_ac/mn6_data differ diff --git a/model/multinet_model/mn6_cn_ac/mn6_index b/model/multinet_model/mn6_cn_ac/mn6_index index fe8da15..1cdf2f5 100644 Binary files a/model/multinet_model/mn6_cn_ac/mn6_index and b/model/multinet_model/mn6_cn_ac/mn6_index differ diff --git a/src/esp_mn_speech_commands.c b/src/esp_mn_speech_commands.c index 710bcf0..bcd99b8 100644 --- a/src/esp_mn_speech_commands.c +++ b/src/esp_mn_speech_commands.c @@ -4,9 +4,13 @@ #include "esp_log.h" #include "esp_heap_caps.h" #include "esp_mn_speech_commands.h" +#include "esp_mn_iface.h" static char *TAG = "MN_COMMAND"; static esp_mn_node_t *esp_mn_root = NULL; +static esp_mn_iface_t *esp_mn_model_handle = NULL; +static model_iface_data_t *esp_mn_model_data = NULL; + #define ESP_RETURN_ON_FALSE(a, err_code, log_tag, format, ...) do { \ if (!(a)) { \ @@ -15,10 +19,14 @@ static esp_mn_node_t *esp_mn_root = NULL; } \ } while(0) -esp_err_t esp_mn_commands_alloc(void) +esp_err_t esp_mn_commands_alloc(esp_mn_iface_t *multinet, model_iface_data_t *model_data) { - ESP_RETURN_ON_FALSE(NULL == esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands already initialized"); + if (esp_mn_root != NULL) { + esp_mn_commands_free(); + } esp_mn_root = esp_mn_node_alloc(NULL); + esp_mn_model_handle = multinet; + esp_mn_model_data = model_data; return ESP_OK; } @@ -27,6 +35,8 @@ esp_err_t esp_mn_commands_free(void) esp_mn_commands_clear(); esp_mn_node_free(esp_mn_root); esp_mn_root = NULL; + esp_mn_model_handle = NULL; + esp_mn_model_data = NULL; return ESP_OK; } @@ -57,14 +67,53 @@ esp_err_t esp_mn_commands_clear(void) return ESP_OK; } -esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string) -{ +esp_mn_node_t *esp_mn_command_search(char *string) { + int command_id; esp_mn_node_t *temp = esp_mn_root; ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized"); - int last_node_elem_num = esp_mn_commands_num(); - ESP_RETURN_ON_FALSE(ESP_MN_MAX_PHRASE_NUM >= last_node_elem_num, ESP_ERR_INVALID_STATE, TAG, "The number of speech commands phrase must less than 200"); - esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, phoneme_string); + while (temp->next) { + temp = temp->next; + if (strcmp(string, temp->phrase->string) == 0) { + return temp; + } + } + return NULL; +} + +esp_err_t esp_mn_commands_add(int command_id, char *string) +{ + if (NULL == esp_mn_root || esp_mn_model_handle == NULL || esp_mn_model_data == NULL) { + ESP_LOGE(TAG, "Please create mn model first.\n"); + return ESP_ERR_INVALID_STATE; + } + esp_mn_node_t *temp = esp_mn_root; + int last_node_elem_num = esp_mn_commands_num(); + ESP_RETURN_ON_FALSE(ESP_MN_MAX_PHRASE_NUM >= last_node_elem_num, ESP_ERR_INVALID_STATE, TAG, "The number of speech commands exceed ESP_MN_MAX_PHRASE_NUM"); + + if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, string) == 0) { + // error message is printed inside check_speech_command + ESP_LOGE(TAG, "invalid command, please check format, %s.\n", string); + return ESP_ERR_INVALID_STATE; + } + + temp = esp_mn_command_search(string); + + if (temp != NULL) { + // command already exists + if (command_id != temp->phrase->command_id) { + // change command id + temp->phrase->command_id = command_id; + } else { + // it's exactly the same, do nothing + ESP_LOGI(TAG, "command %d: (%s) already exists.", command_id, string); + } + return ESP_OK; + } + + temp = esp_mn_root; + + esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, string); if (phrase == NULL) { return ESP_ERR_INVALID_STATE; } @@ -78,48 +127,43 @@ esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string) return ESP_OK; } -esp_err_t esp_mn_commands_modify(char *old_phoneme_string, char *new_phoneme_string) +esp_err_t esp_mn_commands_modify(char *old_string, char *new_string) { + if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, new_string) == 0) { + // error message is printed inside check_speech_command + return ESP_ERR_INVALID_STATE; + } esp_mn_node_t *temp = esp_mn_root; ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized"); - // search old phoneme_string to get command id - bool flag = false; - int command_id; - while (temp->next) { - temp = temp->next; - if (strcmp(old_phoneme_string, temp->phrase->phoneme_string) == 0) { - command_id = temp->phrase->command_id; - flag = true; - break; - } - } + // search old string to get command id + temp = esp_mn_command_search(old_string); // replace old phrase with new phrase - if (flag) { - esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, new_phoneme_string); + if (temp != NULL) { + esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(temp->phrase->command_id, new_string); if (phrase == NULL) { return ESP_ERR_INVALID_STATE; } esp_mn_phrase_free(temp->phrase); temp->phrase = phrase; } else { - ESP_LOGE(TAG, "No such speech command: \"%s\"", old_phoneme_string); + ESP_LOGE(TAG, "No such speech command: \"%s\"", old_string); return ESP_ERR_INVALID_STATE; } return ESP_OK; } -esp_err_t esp_mn_commands_remove(char *phoneme_string) +esp_err_t esp_mn_commands_remove(char *string) { esp_mn_node_t *temp = esp_mn_root; ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized"); - // search phoneme_string to get node point + // search string to get node point bool flag = false; while (temp->next) { - if (strcmp(phoneme_string, temp->next->phrase->phoneme_string) == 0) { + if (strcmp(string, temp->next->phrase->string) == 0) { flag = true; break; } @@ -132,7 +176,7 @@ esp_err_t esp_mn_commands_remove(char *phoneme_string) esp_mn_node_free(rm_node); return ESP_OK; } else { - ESP_LOGE(TAG, "No such speech command: \"%s\"", phoneme_string); + ESP_LOGE(TAG, "No such speech command: \"%s\"", string); return ESP_ERR_INVALID_STATE; } @@ -155,14 +199,14 @@ esp_mn_phrase_t *esp_mn_commands_get_from_index(int index) return temp->phrase; } -esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string) +esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *string) { ESP_RETURN_ON_FALSE(NULL != esp_mn_root, NULL, TAG, "The mn commands is not initialized"); // phrase index also is phrase id, which is the depth from this phrase node to root node esp_mn_node_t *temp = esp_mn_root; while (temp->next) { - if (strcmp(phoneme_string, temp->next->phrase->phoneme_string) == 0) { + if (strcmp(string, temp->next->phrase->string) == 0) { return temp->next->phrase; } temp = temp->next; @@ -171,10 +215,10 @@ esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string) return NULL; } -esp_mn_error_t *esp_mn_commands_update(const esp_mn_iface_t *multinet, model_iface_data_t *model_data) +esp_mn_error_t *esp_mn_commands_update() { ESP_RETURN_ON_FALSE(NULL != esp_mn_root, NULL, TAG, "The mn commands is not initialize"); - esp_mn_error_t *error = multinet->set_speech_commands(model_data, esp_mn_root); + esp_mn_error_t *error = esp_mn_model_handle->set_speech_commands(esp_mn_model_data, esp_mn_root); if (error->num == 0) { return NULL; @@ -190,7 +234,7 @@ void esp_mn_commands_print(void) int phrase_id = 0; while (temp->next) { temp = temp->next; - ESP_LOGI(TAG, "Command ID%d, phrase ID%d: %s", temp->phrase->command_id, phrase_id, temp->phrase->phoneme_string); + ESP_LOGI(TAG, "Command ID%d, phrase ID%d: %s", temp->phrase->command_id, phrase_id, temp->phrase->string); phrase_id++; } ESP_LOGI(TAG, "---------------------------------------------------------\n"); @@ -205,21 +249,21 @@ void *_esp_mn_calloc_(int n, int size) #endif } -esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string) +esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *string) { - int phoneme_string_len = strlen(phoneme_string); - if (phoneme_string_len > ESP_MN_MAX_PHRASE_LEN || phoneme_string_len < 1) { - ESP_LOGE(TAG, "The Length of \"%s\" > ESP_MN_MAX_PHRASE_LEN", phoneme_string); - return NULL; - } + int string_len = strlen(string); + ESP_RETURN_ON_FALSE( string_len > 0, NULL, TAG, "input string is empty"); esp_mn_phrase_t *phrase = _esp_mn_calloc_(1, sizeof(esp_mn_phrase_t)); ESP_RETURN_ON_FALSE(NULL != phrase, NULL, TAG, "Fail to alloc mn phrase"); + + phrase->string = malloc((string_len+1) * sizeof(char)); + memcpy(phrase->string, string, string_len); + phrase->string[string_len] = '\0'; phrase->command_id = command_id; phrase->threshold = 0; phrase->wave = NULL; - memcpy(phrase->phoneme_string, phoneme_string, phoneme_string_len); return phrase; } @@ -227,6 +271,12 @@ esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string) void esp_mn_phrase_free(esp_mn_phrase_t *phrase) { if (phrase != NULL) { + if (phrase->wave != NULL) { + free(phrase->wave); + } + if (phrase->string != NULL) { + free(phrase->string); + } free(phrase); } } diff --git a/src/esp_process_sdkconfig.c b/src/esp_process_sdkconfig.c index f38aa40..5b19fb1 100644 --- a/src/esp_process_sdkconfig.c +++ b/src/esp_process_sdkconfig.c @@ -876,11 +876,11 @@ char *get_id_name_en(int i) esp_mn_error_t *esp_mn_commands_update_from_sdkconfig(const esp_mn_iface_t *multinet, model_iface_data_t *model_data) { -#if defined CONFIG_SR_MN_CN_MULTINET6_QUANT || defined CONFIG_SR_MN_EN_MULTINET6_QUANT +#if defined CONFIG_SR_MN_CN_MULTINET6_QUANT || defined CONFIG_SR_MN_EN_MULTINET6_QUANT || defined CONFIG_SR_MN_CN_MULTINET6_AC_QUANT return NULL; #endif - esp_mn_commands_alloc(); + esp_mn_commands_alloc(multinet, model_data); printf("esp_mn_commands_update_from_sdkconfig\n"); int total_phrase_num = 0; int language_id = 1; // 0: Chinese, 1:English @@ -939,4 +939,4 @@ end: esp_mn_commands_print(); return esp_mn_commands_update(multinet, model_data); -} \ No newline at end of file +} diff --git a/src/include/esp_mn_speech_commands.h b/src/include/esp_mn_speech_commands.h index c7b2927..ba99c55 100644 --- a/src/include/esp_mn_speech_commands.h +++ b/src/include/esp_mn_speech_commands.h @@ -29,7 +29,7 @@ It is easy to add one speech command into linked list and remove one speech comm * - ESP_ERR_NO_MEM No memory * - ESP_ERR_INVALID_STATE The Speech Commands link has been initialized */ -esp_err_t esp_mn_commands_alloc(void); +esp_err_t esp_mn_commands_alloc(esp_mn_iface_t *multinet, model_iface_data_t *model_data); /** * @brief Clear the speech commands linked list and free root node. @@ -41,39 +41,39 @@ esp_err_t esp_mn_commands_alloc(void); esp_err_t esp_mn_commands_free(void); /** - * @brief Add one speech commands with phoneme string and command ID + * @brief Add one speech commands with command string and command ID * * @param command_id The command ID - * @param phoneme_string The phoneme string of the speech commands + * @param string The command string of the speech commands * * @return * - ESP_OK Success * - ESP_ERR_INVALID_STATE Fail */ -esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string); +esp_err_t esp_mn_commands_add(int command_id, char *string); /** - * @brief Modify one speech commands with new phoneme string + * @brief Modify one speech commands with new command string * - * @param old_phoneme_string The old phoneme string of the speech commands - * @param new_phoneme_string The new phoneme string of the speech commands + * @param old_string The old command string of the speech commands + * @param new_string The new command string of the speech commands * * @return * - ESP_OK Success * - ESP_ERR_INVALID_STATE Fail */ -esp_err_t esp_mn_commands_modify(char *old_phoneme_string, char *new_phoneme_string); +esp_err_t esp_mn_commands_modify(char *old_string, char *new_string); /** - * @brief Remove one speech commands by phoneme string + * @brief Remove one speech commands by command string * - * @param phoneme_string The phoneme string of the speech commands + * @param string The command string of the speech commands * * @return * - ESP_OK Success * - ESP_ERR_INVALID_STATE Fail */ -esp_err_t esp_mn_commands_remove(char *phoneme_string); +esp_err_t esp_mn_commands_remove(char *string); /** * @brief Clear all speech commands in linked list @@ -96,40 +96,32 @@ esp_err_t esp_mn_commands_clear(void); esp_mn_phrase_t *esp_mn_commands_get_from_index(int index); /** - * @brief Get phrase from phoneme string + * @brief Get phrase from command string * * @return * - esp_mn_phrase_t* Success * - NULL Fail */ -esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string); +esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *string); /** * @brief Update the speech commands of MultiNet * * @Warning: Must be used after [add/remove/modify/clear] function, * otherwise the language model of multinet can not be updated. - * - * @param multinet The multinet handle - * @param model_data The model object to query - * + * * @return * - NULL Success * - others The list of error phrase which can not be parsed by multinet. */ -esp_mn_error_t *esp_mn_commands_update(const esp_mn_iface_t *multinet, model_iface_data_t *model_data); +esp_mn_error_t *esp_mn_commands_update(); /** - * @brief Print the MultiNet Speech Commands. - */ -void esp_mn_print_commands(void); - -/** - * @brief Initialze the esp_mn_phrase_t struct by command id and phoneme string . + * @brief Initialze the esp_mn_phrase_t struct by command id and command string . * * @return the pointer of esp_mn_phrase_t */ -esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string); +esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *string); /** * @brief Free esp_mn_phrase_t pointer. diff --git a/src/include/model_path.h b/src/include/model_path.h index 9f4f6de..944d76a 100644 --- a/src/include/model_path.h +++ b/src/include/model_path.h @@ -16,7 +16,9 @@ typedef struct { typedef struct { char **model_name; // the name of models, like "wn9_hilexin"(wakenet9, hilexin), "mn5_en"(multinet5, english) +#ifdef ESP_PLATFORM esp_partition_t *partition; // partition label used to save the files of model +#endif void * mmap_handle; // mmap_handle if using esp_partition_mmap else NULL; int num; // the number of models srmodel_data_t **model_data; // the model data , NULL if spiffs format @@ -75,7 +77,9 @@ int esp_srmodel_exists(srmodel_list_t *models, char *model_name); * * @return all avaliable models in spiffs,save as srmodel_list_t. */ +#ifdef ESP_PLATFORM srmodel_list_t *srmodel_spiffs_init(const esp_partition_t *part); +#endif /** * @brief unregister SPIFFS filesystem and free srmodel_list_t. diff --git a/src/model_path.c b/src/model_path.c index da9916a..1258e0c 100644 --- a/src/model_path.c +++ b/src/model_path.c @@ -32,7 +32,9 @@ static srmodel_list_t *srmodel_list_alloc(void) models->model_data = NULL; models->model_name = NULL; models->num = 0; +#ifdef ESP_PLATFORM models->partition = NULL; +#endif models->mmap_handle = NULL; return models; @@ -431,7 +433,9 @@ srmodel_list_t *srmodel_sdcard_init(const char *base_path) return models; } else { models->num = model_num; +#ifdef ESP_PLATFORM models->partition = NULL; +#endif models->model_name = malloc(models->num * sizeof(char *)); for (int i = 0; i < models->num; i++) { models->model_name[i] = (char *) calloc(MODEL_NAME_MAX_LENGTH, sizeof(char));