mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
Merge branch 'feat/modify_commands' into 'master'
Update multinet API to add/modify/print/check new commands See merge request speech-recognition-framework/esp-sr!37
This commit is contained in:
commit
31b8cb660e
@ -58,7 +58,8 @@ build_esp_sr_pdf:
|
||||
script:
|
||||
- cd $DOCS_DIR
|
||||
- ./check_lang_folder_sync.sh
|
||||
- build-docs -bs latex -l $DOCLANG -t $DOCTGT
|
||||
- pip install -r requirements.txt
|
||||
- build-docs --skip-reqs-check -bs latex -l $DOCLANG -t $DOCTGT
|
||||
parallel:
|
||||
matrix:
|
||||
- DOCLANG: ["en", "zh_CN"]
|
||||
|
||||
@ -2,12 +2,13 @@
|
||||
|
||||
## Known issues:
|
||||
- Available storage is less than the remaining flash space on IDF v5.0.
|
||||
If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)`
|
||||
If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later.
|
||||
|
||||
## unreleased
|
||||
## 1.3.1
|
||||
- Bugfix: remove all cxx11:string
|
||||
- Bugfix: remove esp-partition require for esp32s2 & esp32c3 on idf v4.4
|
||||
- Add more loader option for multinet to blance CPU and memory consumption
|
||||
- Bugfix: remove esp-partition for esp32s2 & esp32c3 on idf v4.4
|
||||
- Update multinet API to add/modify/check new commands in the code
|
||||
- Update documents to introduce how to use multinet API
|
||||
|
||||
## 1.3.0
|
||||
- Update the partition APIs to keep compatible with both IDF v4.4 and IDF v5.0
|
||||
|
||||
@ -153,6 +153,7 @@ choice CHINESE_SR_MN_MODEL_SEL
|
||||
config SR_MN_CN_MULTINET6_AC_QUANT
|
||||
bool "chinese recognition for air conditioner controller (mn6_cn_ac)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
endchoice
|
||||
|
||||
choice ENGLISH_SR_MN_MODEL_SEL
|
||||
|
||||
@ -68,9 +68,8 @@ MultiNet5 customize speech commands
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
MultiNet5 use phonemes for English speech commands. For simplicity, we use characters to denote different phonemes. Please use :project_file:`tool/multinet_g2p.py` to do the convention.
|
||||
There are two methods to customize speech commands offline:
|
||||
|
||||
- Via ``menuconfig``
|
||||
- Via ``menuconfig``
|
||||
|
||||
1. Navigate to ``idf.py menuconfig`` > ``ESP Speech Recognition`` > ``Add Chinese speech commands/Add English speech commands`` to add speech commands. For details, please refer to the example in ESP-Skainet.
|
||||
|
||||
@ -86,19 +85,122 @@ There are two methods to customize speech commands offline:
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Update the speech commands of MultiNet by menuconfig
|
||||
* @brief Update the speech commands of MultiNet by menuconfig
|
||||
*
|
||||
* @param multinet The multinet handle
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
*
|
||||
* @param langugae The language of MultiNet
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data);
|
||||
|
||||
Customize Speech Commands Via API calls
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Alternatively, speech commands can be modified via API calls, this method works for both MultiNet5 and MultiNet6.
|
||||
|
||||
- Print active speech commands, this function will print out all speech commands that are active.
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Update the speech commands of MultiNet
|
||||
*
|
||||
* @Warning: Must be used after [add/remove/modify/clear] function,
|
||||
* otherwise the language model of multinet can not be updated.
|
||||
*
|
||||
* @param multinet The multinet handle
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
*
|
||||
* @param langugae The language of MultiNet
|
||||
* @return
|
||||
* - NULL Success
|
||||
* - others The list of error phrase which can not be parsed by multinet.
|
||||
*/
|
||||
esp_mn_error_t *esp_mn_commands_update();
|
||||
|
||||
.. note::
|
||||
The modifications will not be applied, thus not printed out, until you call ``esp_mn_commands_update()``.
|
||||
|
||||
- Apply new changes, the add/remove/modify/clear actions will not take effect util this function is called.
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Update the speech commands of MultiNet
|
||||
*
|
||||
* @Warning: Must be used after [add/remove/modify/clear] function,
|
||||
* otherwise the language model of multinet can not be updated.
|
||||
*
|
||||
* @return
|
||||
* - NULL Success
|
||||
* - others The list of error phrase which can not be parsed by multinet.
|
||||
*/
|
||||
esp_mn_error_t *esp_mn_commands_update();
|
||||
|
||||
|
||||
- Add a new speech command, will return ``ESP_ERR_INVALID_STATE`` if the input string is not in the correct format.
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Add one speech commands with command string and command ID
|
||||
*
|
||||
* @param command_id The command ID
|
||||
* @param string The command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data);
|
||||
esp_err_t esp_mn_commands_add(int command_id, char *string);
|
||||
|
||||
- Remove a speech command, will return ``ESP_ERR_INVALID_STATE`` if the command does not exist.
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Remove one speech commands by command string
|
||||
*
|
||||
* @param string The command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_remove(char *string);
|
||||
|
||||
- Modify a speech command, will return ``ESP_ERR_INVALID_STATE`` if the command does not exist.
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Modify one speech commands with new command string
|
||||
*
|
||||
* @param old_string The old command string of the speech commands
|
||||
* @param new_string The new command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_modify(char *old_string, char *new_string);
|
||||
|
||||
- Clear all speech commands.
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Clear all speech commands in linked list
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_clear(void);
|
||||
|
||||
Use MultiNet
|
||||
------------
|
||||
|
||||
@ -110,6 +110,109 @@ MultiNet5 定义方法:
|
||||
esp_err_t esp_mn_commands_update_from_sdkconfig(esp_mn_iface_t *multinet, const model_iface_data_t *model_data);
|
||||
|
||||
|
||||
通过调用 API 修改
|
||||
~~~~~~~~~~~~~~~~~
|
||||
指令还可以通过调用 API 修改,这种方法对于 MultiNet5 和 MultiNet6 都适用。
|
||||
|
||||
- 打印现有指令。
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Update the speech commands of MultiNet
|
||||
*
|
||||
* @Warning: Must be used after [add/remove/modify/clear] function,
|
||||
* otherwise the language model of multinet can not be updated.
|
||||
*
|
||||
* @param multinet The multinet handle
|
||||
* @param model_data The model object to query
|
||||
*
|
||||
* @return
|
||||
* - NULL Success
|
||||
* - others The list of error phrase which can not be parsed by multinet.
|
||||
*/
|
||||
esp_mn_error_t *esp_mn_commands_update();
|
||||
|
||||
.. note::
|
||||
所有修改操作在调用 ``esp_mn_commands_update()`` 后才会被打印出来。
|
||||
|
||||
- 应用新的修改操作,所有添加、移除、修改及清空操作在调用后才会被应用。
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Update the speech commands of MultiNet
|
||||
*
|
||||
* @Warning: Must be used after [add/remove/modify/clear] function,
|
||||
* otherwise the language model of multinet can not be updated.
|
||||
*
|
||||
* @return
|
||||
* - NULL Success
|
||||
* - others The list of error phrase which can not be parsed by multinet.
|
||||
*/
|
||||
esp_mn_error_t *esp_mn_commands_update();
|
||||
|
||||
|
||||
- 添加一条新指令,如果指令格式不正确则返回 ``ESP_ERR_INVALID_STATE``。
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Add one speech commands with command string and command ID
|
||||
*
|
||||
* @param command_id The command ID
|
||||
* @param string The command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_add(int command_id, char *string);
|
||||
|
||||
- 移除一条指令,如果该指令不存在则返回 ``ESP_ERR_INVALID_STATE``。
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Remove one speech commands by command string
|
||||
*
|
||||
* @param string The command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_remove(char *string);
|
||||
|
||||
- 修改一条指令,如果该指令不存在则返回 ``ESP_ERR_INVALID_STATE``。
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Modify one speech commands with new command string
|
||||
*
|
||||
* @param old_string The old command string of the speech commands
|
||||
* @param new_string The new command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_modify(char *old_string, char *new_string);
|
||||
|
||||
- 清空所有指令。
|
||||
|
||||
::
|
||||
|
||||
/**
|
||||
* @brief Clear all speech commands in linked list
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_clear(void);
|
||||
|
||||
MultiNet 的使用
|
||||
----------------
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
version: "1.3.0"
|
||||
version: "1.3.1"
|
||||
description: esp_sr provides basic algorithms for Speech Recognition applications
|
||||
url: https://github.com/espressif/esp-sr
|
||||
dependencies:
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
#include "esp_wn_iface.h"
|
||||
|
||||
#define ESP_MN_RESULT_MAX_NUM 5
|
||||
#define ESP_MN_MAX_PHRASE_NUM 200
|
||||
#define ESP_MN_MAX_PHRASE_NUM 400
|
||||
#define ESP_MN_MAX_PHRASE_LEN 63
|
||||
#define ESP_MN_MIN_PHRASE_LEN 2
|
||||
|
||||
@ -12,8 +12,8 @@
|
||||
#define ESP_MN_CHINESE "cn"
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_STATE_DETECTING = 0, // detecting
|
||||
ESP_MN_STATE_DETECTED = 1, // detected
|
||||
ESP_MN_STATE_DETECTING = 0, // detecting
|
||||
ESP_MN_STATE_DETECTED = 1, // detected
|
||||
ESP_MN_STATE_TIMEOUT = 2, // time out
|
||||
} esp_mn_state_t;
|
||||
|
||||
@ -21,20 +21,20 @@ typedef enum {
|
||||
//The memory comsumption is decreased with increasing mode,
|
||||
//As a consequence also the CPU loading rate goes up
|
||||
typedef enum {
|
||||
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
|
||||
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
|
||||
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
|
||||
ESP_MN_LOAD_FROM_PSRAM_FLASH = 1, // Load some weights from PSRAM and laod the rest from FLASH (default)
|
||||
ESP_MN_LOAD_FROM_FLASH = 2, // Load more weights from FLASH. Minimum memory consumption with slowest computation
|
||||
} esp_mn_loader_mode_t;
|
||||
|
||||
typedef enum {
|
||||
ESP_MN_GREEDY_SEARCH = 0, // greedy search
|
||||
ESP_MN_BEAM_SEARCH = 1, // beam search
|
||||
ESP_MN_GREEDY_SEARCH = 0, // greedy search
|
||||
ESP_MN_BEAM_SEARCH = 1, // beam search
|
||||
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
|
||||
} esp_mn_search_method_t;
|
||||
|
||||
typedef enum {
|
||||
CHINESE_ID = 1, // Chinese language
|
||||
ENGLISH_ID = 2, // English language
|
||||
CHINESE_ID = 1, // Chinese language
|
||||
ENGLISH_ID = 2, // English language
|
||||
} language_id_t;
|
||||
|
||||
// Return all possible recognition results
|
||||
@ -47,17 +47,11 @@ typedef struct{
|
||||
char string[256];
|
||||
} esp_mn_results_t;
|
||||
|
||||
|
||||
typedef struct{
|
||||
int16_t num; // The number of error phrases, which can not added into model
|
||||
int16_t phrase_idx[ESP_MN_MAX_PHRASE_NUM]; // The error phrase index in singly linked list.
|
||||
} esp_mn_error_t;
|
||||
|
||||
typedef struct {
|
||||
char phoneme_string[ESP_MN_MAX_PHRASE_LEN + 1]; // phoneme string
|
||||
int16_t command_id; // the command id
|
||||
float threshold; // trigger threshold, default: 0
|
||||
int16_t *wave; // prompt wave data of the phrase
|
||||
char *string; // command string
|
||||
int16_t command_id; // the command id
|
||||
float threshold; // trigger threshold, default: 0
|
||||
int16_t *wave; // prompt wave data of the phrase
|
||||
} esp_mn_phrase_t;
|
||||
|
||||
typedef struct _mn_node_ {
|
||||
@ -65,6 +59,11 @@ typedef struct _mn_node_ {
|
||||
struct _mn_node_ *next;
|
||||
} esp_mn_node_t;
|
||||
|
||||
typedef struct{
|
||||
int16_t num; // The number of error phrases, which can not added into model
|
||||
esp_mn_phrase_t **phrases; // The array of error phrase pointer
|
||||
} esp_mn_error_t;
|
||||
|
||||
/**
|
||||
* @brief Initialze a model instance with specified model name.
|
||||
*
|
||||
@ -181,6 +180,22 @@ typedef void (*esp_mn_iface_op_clean_t)(model_iface_data_t *model_data);
|
||||
*/
|
||||
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Check if input string can be tokenized
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
* @param str The input string
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_check_speech_command)(model_iface_data_t *model_data, char *str);
|
||||
|
||||
typedef struct {
|
||||
esp_mn_iface_op_create_t create;
|
||||
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
@ -195,4 +210,6 @@ typedef struct {
|
||||
esp_mn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_set_speech_commands set_speech_commands;
|
||||
esp_mn_iface_op_switch_loader_mode_t switch_loader_mode;
|
||||
esp_mn_iface_op_print_active_speech_commands print_active_speech_commands;
|
||||
esp_mn_iface_op_check_speech_command check_speech_command;
|
||||
} esp_mn_iface_t;
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,2 +1,2 @@
|
||||
# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
|
||||
MN6_v1_chinese_8_0.9_0.90
|
||||
MN6_v3_chinese_8_0.9_0.90
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -4,9 +4,13 @@
|
||||
#include "esp_log.h"
|
||||
#include "esp_heap_caps.h"
|
||||
#include "esp_mn_speech_commands.h"
|
||||
#include "esp_mn_iface.h"
|
||||
|
||||
static char *TAG = "MN_COMMAND";
|
||||
static esp_mn_node_t *esp_mn_root = NULL;
|
||||
static esp_mn_iface_t *esp_mn_model_handle = NULL;
|
||||
static model_iface_data_t *esp_mn_model_data = NULL;
|
||||
|
||||
|
||||
#define ESP_RETURN_ON_FALSE(a, err_code, log_tag, format, ...) do { \
|
||||
if (!(a)) { \
|
||||
@ -15,10 +19,14 @@ static esp_mn_node_t *esp_mn_root = NULL;
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
esp_err_t esp_mn_commands_alloc(void)
|
||||
esp_err_t esp_mn_commands_alloc(esp_mn_iface_t *multinet, model_iface_data_t *model_data)
|
||||
{
|
||||
ESP_RETURN_ON_FALSE(NULL == esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands already initialized");
|
||||
if (esp_mn_root != NULL) {
|
||||
esp_mn_commands_free();
|
||||
}
|
||||
esp_mn_root = esp_mn_node_alloc(NULL);
|
||||
esp_mn_model_handle = multinet;
|
||||
esp_mn_model_data = model_data;
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
@ -27,6 +35,8 @@ esp_err_t esp_mn_commands_free(void)
|
||||
esp_mn_commands_clear();
|
||||
esp_mn_node_free(esp_mn_root);
|
||||
esp_mn_root = NULL;
|
||||
esp_mn_model_handle = NULL;
|
||||
esp_mn_model_data = NULL;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
@ -57,14 +67,53 @@ esp_err_t esp_mn_commands_clear(void)
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string)
|
||||
{
|
||||
esp_mn_node_t *esp_mn_command_search(char *string) {
|
||||
int command_id;
|
||||
esp_mn_node_t *temp = esp_mn_root;
|
||||
ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized");
|
||||
int last_node_elem_num = esp_mn_commands_num();
|
||||
ESP_RETURN_ON_FALSE(ESP_MN_MAX_PHRASE_NUM >= last_node_elem_num, ESP_ERR_INVALID_STATE, TAG, "The number of speech commands phrase must less than 200");
|
||||
|
||||
esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, phoneme_string);
|
||||
while (temp->next) {
|
||||
temp = temp->next;
|
||||
if (strcmp(string, temp->phrase->string) == 0) {
|
||||
return temp;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
esp_err_t esp_mn_commands_add(int command_id, char *string)
|
||||
{
|
||||
if (NULL == esp_mn_root || esp_mn_model_handle == NULL || esp_mn_model_data == NULL) {
|
||||
ESP_LOGE(TAG, "Please create mn model first.\n");
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
esp_mn_node_t *temp = esp_mn_root;
|
||||
int last_node_elem_num = esp_mn_commands_num();
|
||||
ESP_RETURN_ON_FALSE(ESP_MN_MAX_PHRASE_NUM >= last_node_elem_num, ESP_ERR_INVALID_STATE, TAG, "The number of speech commands exceed ESP_MN_MAX_PHRASE_NUM");
|
||||
|
||||
if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, string) == 0) {
|
||||
// error message is printed inside check_speech_command
|
||||
ESP_LOGE(TAG, "invalid command, please check format, %s.\n", string);
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
temp = esp_mn_command_search(string);
|
||||
|
||||
if (temp != NULL) {
|
||||
// command already exists
|
||||
if (command_id != temp->phrase->command_id) {
|
||||
// change command id
|
||||
temp->phrase->command_id = command_id;
|
||||
} else {
|
||||
// it's exactly the same, do nothing
|
||||
ESP_LOGI(TAG, "command %d: (%s) already exists.", command_id, string);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
temp = esp_mn_root;
|
||||
|
||||
esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, string);
|
||||
if (phrase == NULL) {
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
@ -78,48 +127,43 @@ esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string)
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t esp_mn_commands_modify(char *old_phoneme_string, char *new_phoneme_string)
|
||||
esp_err_t esp_mn_commands_modify(char *old_string, char *new_string)
|
||||
{
|
||||
if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, new_string) == 0) {
|
||||
// error message is printed inside check_speech_command
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
esp_mn_node_t *temp = esp_mn_root;
|
||||
ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized");
|
||||
|
||||
// search old phoneme_string to get command id
|
||||
bool flag = false;
|
||||
int command_id;
|
||||
while (temp->next) {
|
||||
temp = temp->next;
|
||||
if (strcmp(old_phoneme_string, temp->phrase->phoneme_string) == 0) {
|
||||
command_id = temp->phrase->command_id;
|
||||
flag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// search old string to get command id
|
||||
temp = esp_mn_command_search(old_string);
|
||||
|
||||
// replace old phrase with new phrase
|
||||
if (flag) {
|
||||
esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, new_phoneme_string);
|
||||
if (temp != NULL) {
|
||||
esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(temp->phrase->command_id, new_string);
|
||||
if (phrase == NULL) {
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
esp_mn_phrase_free(temp->phrase);
|
||||
temp->phrase = phrase;
|
||||
} else {
|
||||
ESP_LOGE(TAG, "No such speech command: \"%s\"", old_phoneme_string);
|
||||
ESP_LOGE(TAG, "No such speech command: \"%s\"", old_string);
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t esp_mn_commands_remove(char *phoneme_string)
|
||||
esp_err_t esp_mn_commands_remove(char *string)
|
||||
{
|
||||
esp_mn_node_t *temp = esp_mn_root;
|
||||
ESP_RETURN_ON_FALSE(NULL != esp_mn_root, ESP_ERR_INVALID_STATE, TAG, "The mn commands is not initialized");
|
||||
|
||||
// search phoneme_string to get node point
|
||||
// search string to get node point
|
||||
bool flag = false;
|
||||
while (temp->next) {
|
||||
if (strcmp(phoneme_string, temp->next->phrase->phoneme_string) == 0) {
|
||||
if (strcmp(string, temp->next->phrase->string) == 0) {
|
||||
flag = true;
|
||||
break;
|
||||
}
|
||||
@ -132,7 +176,7 @@ esp_err_t esp_mn_commands_remove(char *phoneme_string)
|
||||
esp_mn_node_free(rm_node);
|
||||
return ESP_OK;
|
||||
} else {
|
||||
ESP_LOGE(TAG, "No such speech command: \"%s\"", phoneme_string);
|
||||
ESP_LOGE(TAG, "No such speech command: \"%s\"", string);
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
@ -155,14 +199,14 @@ esp_mn_phrase_t *esp_mn_commands_get_from_index(int index)
|
||||
return temp->phrase;
|
||||
}
|
||||
|
||||
esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string)
|
||||
esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *string)
|
||||
{
|
||||
ESP_RETURN_ON_FALSE(NULL != esp_mn_root, NULL, TAG, "The mn commands is not initialized");
|
||||
|
||||
// phrase index also is phrase id, which is the depth from this phrase node to root node
|
||||
esp_mn_node_t *temp = esp_mn_root;
|
||||
while (temp->next) {
|
||||
if (strcmp(phoneme_string, temp->next->phrase->phoneme_string) == 0) {
|
||||
if (strcmp(string, temp->next->phrase->string) == 0) {
|
||||
return temp->next->phrase;
|
||||
}
|
||||
temp = temp->next;
|
||||
@ -171,10 +215,10 @@ esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
esp_mn_error_t *esp_mn_commands_update(const esp_mn_iface_t *multinet, model_iface_data_t *model_data)
|
||||
esp_mn_error_t *esp_mn_commands_update()
|
||||
{
|
||||
ESP_RETURN_ON_FALSE(NULL != esp_mn_root, NULL, TAG, "The mn commands is not initialize");
|
||||
esp_mn_error_t *error = multinet->set_speech_commands(model_data, esp_mn_root);
|
||||
esp_mn_error_t *error = esp_mn_model_handle->set_speech_commands(esp_mn_model_data, esp_mn_root);
|
||||
|
||||
if (error->num == 0) {
|
||||
return NULL;
|
||||
@ -190,7 +234,7 @@ void esp_mn_commands_print(void)
|
||||
int phrase_id = 0;
|
||||
while (temp->next) {
|
||||
temp = temp->next;
|
||||
ESP_LOGI(TAG, "Command ID%d, phrase ID%d: %s", temp->phrase->command_id, phrase_id, temp->phrase->phoneme_string);
|
||||
ESP_LOGI(TAG, "Command ID%d, phrase ID%d: %s", temp->phrase->command_id, phrase_id, temp->phrase->string);
|
||||
phrase_id++;
|
||||
}
|
||||
ESP_LOGI(TAG, "---------------------------------------------------------\n");
|
||||
@ -205,21 +249,21 @@ void *_esp_mn_calloc_(int n, int size)
|
||||
#endif
|
||||
}
|
||||
|
||||
esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string)
|
||||
esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *string)
|
||||
{
|
||||
|
||||
int phoneme_string_len = strlen(phoneme_string);
|
||||
if (phoneme_string_len > ESP_MN_MAX_PHRASE_LEN || phoneme_string_len < 1) {
|
||||
ESP_LOGE(TAG, "The Length of \"%s\" > ESP_MN_MAX_PHRASE_LEN", phoneme_string);
|
||||
return NULL;
|
||||
}
|
||||
int string_len = strlen(string);
|
||||
ESP_RETURN_ON_FALSE( string_len > 0, NULL, TAG, "input string is empty");
|
||||
|
||||
esp_mn_phrase_t *phrase = _esp_mn_calloc_(1, sizeof(esp_mn_phrase_t));
|
||||
ESP_RETURN_ON_FALSE(NULL != phrase, NULL, TAG, "Fail to alloc mn phrase");
|
||||
|
||||
phrase->string = malloc((string_len+1) * sizeof(char));
|
||||
memcpy(phrase->string, string, string_len);
|
||||
phrase->string[string_len] = '\0';
|
||||
phrase->command_id = command_id;
|
||||
phrase->threshold = 0;
|
||||
phrase->wave = NULL;
|
||||
memcpy(phrase->phoneme_string, phoneme_string, phoneme_string_len);
|
||||
|
||||
return phrase;
|
||||
}
|
||||
@ -227,6 +271,12 @@ esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string)
|
||||
void esp_mn_phrase_free(esp_mn_phrase_t *phrase)
|
||||
{
|
||||
if (phrase != NULL) {
|
||||
if (phrase->wave != NULL) {
|
||||
free(phrase->wave);
|
||||
}
|
||||
if (phrase->string != NULL) {
|
||||
free(phrase->string);
|
||||
}
|
||||
free(phrase);
|
||||
}
|
||||
}
|
||||
|
||||
@ -876,11 +876,11 @@ char *get_id_name_en(int i)
|
||||
|
||||
esp_mn_error_t *esp_mn_commands_update_from_sdkconfig(const esp_mn_iface_t *multinet, model_iface_data_t *model_data)
|
||||
{
|
||||
#if defined CONFIG_SR_MN_CN_MULTINET6_QUANT || defined CONFIG_SR_MN_EN_MULTINET6_QUANT
|
||||
#if defined CONFIG_SR_MN_CN_MULTINET6_QUANT || defined CONFIG_SR_MN_EN_MULTINET6_QUANT || defined CONFIG_SR_MN_CN_MULTINET6_AC_QUANT
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
esp_mn_commands_alloc();
|
||||
esp_mn_commands_alloc(multinet, model_data);
|
||||
printf("esp_mn_commands_update_from_sdkconfig\n");
|
||||
int total_phrase_num = 0;
|
||||
int language_id = 1; // 0: Chinese, 1:English
|
||||
@ -939,4 +939,4 @@ end:
|
||||
esp_mn_commands_print();
|
||||
|
||||
return esp_mn_commands_update(multinet, model_data);
|
||||
}
|
||||
}
|
||||
|
||||
@ -29,7 +29,7 @@ It is easy to add one speech command into linked list and remove one speech comm
|
||||
* - ESP_ERR_NO_MEM No memory
|
||||
* - ESP_ERR_INVALID_STATE The Speech Commands link has been initialized
|
||||
*/
|
||||
esp_err_t esp_mn_commands_alloc(void);
|
||||
esp_err_t esp_mn_commands_alloc(esp_mn_iface_t *multinet, model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Clear the speech commands linked list and free root node.
|
||||
@ -41,39 +41,39 @@ esp_err_t esp_mn_commands_alloc(void);
|
||||
esp_err_t esp_mn_commands_free(void);
|
||||
|
||||
/**
|
||||
* @brief Add one speech commands with phoneme string and command ID
|
||||
* @brief Add one speech commands with command string and command ID
|
||||
*
|
||||
* @param command_id The command ID
|
||||
* @param phoneme_string The phoneme string of the speech commands
|
||||
* @param string The command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string);
|
||||
esp_err_t esp_mn_commands_add(int command_id, char *string);
|
||||
|
||||
/**
|
||||
* @brief Modify one speech commands with new phoneme string
|
||||
* @brief Modify one speech commands with new command string
|
||||
*
|
||||
* @param old_phoneme_string The old phoneme string of the speech commands
|
||||
* @param new_phoneme_string The new phoneme string of the speech commands
|
||||
* @param old_string The old command string of the speech commands
|
||||
* @param new_string The new command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_modify(char *old_phoneme_string, char *new_phoneme_string);
|
||||
esp_err_t esp_mn_commands_modify(char *old_string, char *new_string);
|
||||
|
||||
/**
|
||||
* @brief Remove one speech commands by phoneme string
|
||||
* @brief Remove one speech commands by command string
|
||||
*
|
||||
* @param phoneme_string The phoneme string of the speech commands
|
||||
* @param string The command string of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_remove(char *phoneme_string);
|
||||
esp_err_t esp_mn_commands_remove(char *string);
|
||||
|
||||
/**
|
||||
* @brief Clear all speech commands in linked list
|
||||
@ -96,40 +96,32 @@ esp_err_t esp_mn_commands_clear(void);
|
||||
esp_mn_phrase_t *esp_mn_commands_get_from_index(int index);
|
||||
|
||||
/**
|
||||
* @brief Get phrase from phoneme string
|
||||
* @brief Get phrase from command string
|
||||
*
|
||||
* @return
|
||||
* - esp_mn_phrase_t* Success
|
||||
* - NULL Fail
|
||||
*/
|
||||
esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string);
|
||||
esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *string);
|
||||
|
||||
/**
|
||||
* @brief Update the speech commands of MultiNet
|
||||
*
|
||||
* @Warning: Must be used after [add/remove/modify/clear] function,
|
||||
* otherwise the language model of multinet can not be updated.
|
||||
*
|
||||
* @param multinet The multinet handle
|
||||
* @param model_data The model object to query
|
||||
*
|
||||
*
|
||||
* @return
|
||||
* - NULL Success
|
||||
* - others The list of error phrase which can not be parsed by multinet.
|
||||
*/
|
||||
esp_mn_error_t *esp_mn_commands_update(const esp_mn_iface_t *multinet, model_iface_data_t *model_data);
|
||||
esp_mn_error_t *esp_mn_commands_update();
|
||||
|
||||
/**
|
||||
* @brief Print the MultiNet Speech Commands.
|
||||
*/
|
||||
void esp_mn_print_commands(void);
|
||||
|
||||
/**
|
||||
* @brief Initialze the esp_mn_phrase_t struct by command id and phoneme string .
|
||||
* @brief Initialze the esp_mn_phrase_t struct by command id and command string .
|
||||
*
|
||||
* @return the pointer of esp_mn_phrase_t
|
||||
*/
|
||||
esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string);
|
||||
esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *string);
|
||||
|
||||
/**
|
||||
* @brief Free esp_mn_phrase_t pointer.
|
||||
|
||||
@ -16,7 +16,9 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
char **model_name; // the name of models, like "wn9_hilexin"(wakenet9, hilexin), "mn5_en"(multinet5, english)
|
||||
#ifdef ESP_PLATFORM
|
||||
esp_partition_t *partition; // partition label used to save the files of model
|
||||
#endif
|
||||
void * mmap_handle; // mmap_handle if using esp_partition_mmap else NULL;
|
||||
int num; // the number of models
|
||||
srmodel_data_t **model_data; // the model data , NULL if spiffs format
|
||||
@ -75,7 +77,9 @@ int esp_srmodel_exists(srmodel_list_t *models, char *model_name);
|
||||
*
|
||||
* @return all avaliable models in spiffs,save as srmodel_list_t.
|
||||
*/
|
||||
#ifdef ESP_PLATFORM
|
||||
srmodel_list_t *srmodel_spiffs_init(const esp_partition_t *part);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief unregister SPIFFS filesystem and free srmodel_list_t.
|
||||
|
||||
@ -32,7 +32,9 @@ static srmodel_list_t *srmodel_list_alloc(void)
|
||||
models->model_data = NULL;
|
||||
models->model_name = NULL;
|
||||
models->num = 0;
|
||||
#ifdef ESP_PLATFORM
|
||||
models->partition = NULL;
|
||||
#endif
|
||||
models->mmap_handle = NULL;
|
||||
|
||||
return models;
|
||||
@ -431,7 +433,9 @@ srmodel_list_t *srmodel_sdcard_init(const char *base_path)
|
||||
return models;
|
||||
} else {
|
||||
models->num = model_num;
|
||||
#ifdef ESP_PLATFORM
|
||||
models->partition = NULL;
|
||||
#endif
|
||||
models->model_name = malloc(models->num * sizeof(char *));
|
||||
for (int i = 0; i < models->num; i++) {
|
||||
models->model_name[i] = (char *) calloc(MODEL_NAME_MAX_LENGTH, sizeof(char));
|
||||
|
||||
Loading…
Reference in New Issue
Block a user