diff --git a/include/esp32/esp_mn_iface.h b/include/esp32/esp_mn_iface.h index 6f3e5ea..faccea1 100644 --- a/include/esp32/esp_mn_iface.h +++ b/include/esp32/esp_mn_iface.h @@ -3,7 +3,7 @@ #include "esp_wn_iface.h" #define ESP_MN_RESULT_MAX_NUM 5 -#define ESP_MN_MAX_PHRASE_NUM 200 +#define ESP_MN_MAX_PHRASE_NUM 400 #define ESP_MN_MAX_PHRASE_LEN 63 #define ESP_MN_MIN_PHRASE_LEN 2 @@ -12,11 +12,31 @@ #define ESP_MN_CHINESE "cn" typedef enum { - ESP_MN_STATE_DETECTING = 0, // detecting - ESP_MN_STATE_DETECTED = 1, // detected + ESP_MN_STATE_DETECTING = 0, // detecting + ESP_MN_STATE_DETECTED = 1, // detected ESP_MN_STATE_TIMEOUT = 2, // time out } esp_mn_state_t; +//Set multinet loading mode +//The memory comsumption is decreased with increasing mode, +//As a consequence also the CPU loading rate goes up +typedef enum { + ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption + ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default) + ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation +} esp_mn_loader_mode_t; + +typedef enum { + ESP_MN_GREEDY_SEARCH = 0, // greedy search + ESP_MN_BEAM_SEARCH = 1, // beam search + ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model +} esp_mn_search_method_t; + +typedef enum { + CHINESE_ID = 1, // Chinese language + ENGLISH_ID = 2, // English language +} language_id_t; + // Return all possible recognition results typedef struct{ esp_mn_state_t state; @@ -24,18 +44,14 @@ typedef struct{ int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id. int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id. float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability. + char string[256]; } esp_mn_results_t; -typedef struct{ - int16_t num; // The number of error phrases, which can not added into model - int16_t phrase_idx[ESP_MN_MAX_PHRASE_NUM]; // The error phrase index in singly linked list. -} esp_mn_error_t; - typedef struct { - char phoneme_string[ESP_MN_MAX_PHRASE_LEN + 1]; // phoneme string - int16_t command_id; // the command id - float threshold; // trigger threshold, default: 0 - int16_t *wave; // prompt wave data of the phrase + char *string; // command string + int16_t command_id; // the command id + float threshold; // trigger threshold, default: 0 + int16_t *wave; // prompt wave data of the phrase } esp_mn_phrase_t; typedef struct _mn_node_ { @@ -43,6 +59,11 @@ typedef struct _mn_node_ { struct _mn_node_ *next; } esp_mn_node_t; +typedef struct{ + int16_t num; // The number of error phrases, which can not added into model + esp_mn_phrase_t **phrases; // The array of error phrase pointer +} esp_mn_error_t; + /** * @brief Initialze a model instance with specified model name. * @@ -53,6 +74,18 @@ typedef struct _mn_node_ { */ typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration); +/** + * @brief Switch multinet mode to change memory consumption and CPU loading + * + * @warning Just Support multinet6 or later versions + * + * @param model The model object to query + * @param mode The multinet loader mode + * + * @returns Handle to the model data. + */ +typedef model_iface_data_t* (*esp_mn_iface_op_switch_loader_mode_t)(model_iface_data_t *model, esp_mn_loader_mode_t mode); + /** * @brief Callback function type to fetch the amount of samples that need to be passed to the detect function * @@ -130,6 +163,14 @@ typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *m */ typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data); +/** + * @brief Clean all status of model + * + * @param model_data The model object to query. + * + */ +typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data); + /** * @brief Set the speech commands by mn_command_root * @@ -139,15 +180,36 @@ typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data); */ typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root); + +/** + * @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here + * + * @param model_data The model object to query +*/ +typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data); + +/** + * @brief Check if input string can be tokenized + * + * @param model_data The model object to query + * @param str The input string +*/ +typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, char *str); + typedef struct { esp_mn_iface_op_create_t create; esp_mn_iface_op_get_samp_rate_t get_samp_rate; esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize; esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum; esp_mn_iface_op_set_det_threshold_t set_det_threshold; + esp_mn_iface_op_get_language_t get_language; esp_mn_iface_op_detect_t detect; esp_mn_iface_op_destroy_t destroy; esp_mn_iface_op_get_results_t get_results; esp_mn_iface_op_open_log_t open_log; + esp_mn_iface_op_clean_t clean; esp_wn_iface_op_set_speech_commands set_speech_commands; + esp_mn_iface_op_switch_loader_mode_t switch_loader_mode; + esp_mn_iface_op_print_active_speech_commands print_active_speech_commands; + esp_mn_iface_op_check_speech_command check_speech_command; } esp_mn_iface_t; diff --git a/src/esp_mn_speech_commands.c b/src/esp_mn_speech_commands.c index bb7d5c1..f4de5d7 100644 --- a/src/esp_mn_speech_commands.c +++ b/src/esp_mn_speech_commands.c @@ -70,7 +70,7 @@ esp_err_t esp_mn_commands_clear(void) esp_mn_node_t *esp_mn_command_search(char *string) { esp_mn_node_t *temp = esp_mn_root; - if(NULL != esp_mn_root) { + if(NULL == esp_mn_root) { return NULL; }