Merge branch 'feat/support_idf_5_3' into 'master'

Feat/support idf 5 3

See merge request speech-recognition-framework/esp-sr!103
This commit is contained in:
Sun Xiang Yu 2024-08-05 10:58:33 +08:00
commit 5d718db85a
24 changed files with 130 additions and 8 deletions

View File

@ -4,6 +4,11 @@
- Available storage is less than the remaining flash space on IDF v5.0.
If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later.
## 1.8.0
- Support esp-idf v5.3
- Add more new wake words
- Add setting "fixed_first_channel" in afe_config
## 1.7.1
- Add 喵喵同学Hi,joy, (Hi,Lily/Hi,莉莉) wakenet model

View File

@ -162,6 +162,10 @@ choice SR_WN_MODEL_LOAD
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
depends on IDF_TARGET_ESP32S3
config SR_WN_WN9_HITELLY_TTS
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
depends on IDF_TARGET_ESP32S3
config SR_WN_WN9_HEYWANDA_TTS
bool "Hey,Wanda (wn9_heywanda_tts)"
depends on IDF_TARGET_ESP32S3
@ -269,6 +273,10 @@ menu "Load Multiple Wake Words"
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
depends on IDF_TARGET_ESP32S3
config SR_WN_WN9_HITELLY_TTS_MULTI
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
depends on IDF_TARGET_ESP32S3
config SR_WN_WN9_XIAOBINXIAOBIN_TTS_MULTI
bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
depends on IDF_TARGET_ESP32S3

View File

@ -54,6 +54,7 @@ The following wake words are supported in esp-sr:
|喵喵同学 | | wn9_miaomiaotongxue_tts|
|Hi,喵喵 | | wn9_himiaomiao_tts |
|Hi,Lily/Hi,莉莉 | | wn9_hilili_tts |
|Hi,Telly/Hi,泰力 | | wn9_hitelly_tts |
|小滨小滨/小冰小冰| | wn9_xiaobinxiaobin_tts |
*NOTE:* `_tts` suffix means this WakeNet model is trained by TTS samples.

View File

@ -1,4 +1,4 @@
version: "1.7.1"
version: "1.8.0"
description: esp_sr provides basic algorithms for Speech Recognition applications
url: https://github.com/espressif/esp-sr
dependencies:

View File

@ -25,10 +25,6 @@
extern "C" {
#endif
// #ifdef CONFIG_IDF_TARGET_ESP32S3
// #include "dl_tie728_bzero.h"
// #endif
typedef float fptp_t;
#if CONFIG_BT_SHARE_MEM_REUSE

View File

@ -23,7 +23,8 @@ extern "C" {
#define USE_AEC_FFT // Not kiss_fft
#define AEC_USE_SPIRAM 0
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
#define AEC_FRAME_LENGTH_MS 16
//#define AEC_FRAME_LENGTH_MS 16
#define AEC_FRAME_LENGTH_MS 32
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
typedef void* aec_handle_t;

View File

@ -90,6 +90,8 @@ typedef struct {
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
afe_ns_mode_t afe_ns_mode;
char *afe_ns_model_name;
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
// otherwise, select channel number by wakenet
} afe_config_t;
@ -121,6 +123,37 @@ typedef struct {
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
}
#elif CONFIG_IDF_TARGET_ESP32P4
#define AFE_CONFIG_DEFAULT() { \
.aec_init = true, \
.se_init = true, \
.vad_init = true, \
.wakenet_init = true, \
.voice_communication_init = false, \
.voice_communication_agc_init = false, \
.voice_communication_agc_gain = 15, \
.vad_mode = VAD_MODE_3, \
.wakenet_model_name = NULL, \
.wakenet_model_name_2 = NULL, \
.wakenet_mode = DET_MODE_90, \
.afe_mode = SR_MODE_HIGH_PERF, \
.afe_perferred_core = 0, \
.afe_perferred_priority = 5, \
.afe_ringbuf_size = 50, \
.memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
.afe_linear_gain = 1.0, \
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
.pcm_config.total_ch_num = 3, \
.pcm_config.mic_num = 2, \
.pcm_config.ref_num = 1, \
.pcm_config.sample_rate = 16000, \
.debug_init = false, \
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
}
#elif CONFIG_IDF_TARGET_ESP32S3
#define AFE_CONFIG_DEFAULT() { \
@ -150,6 +183,7 @@ typedef struct {
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
}
#endif

View File

@ -14,4 +14,4 @@ Now there are nsnet1 and nsnet2
* @param model_name The name of model
* @returns The handle of multinet
*/
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
wakenet9l_tts1h8_Hi,Telly or Hi,泰力_3_0.613_0.619

Binary file not shown.

Binary file not shown.

View File

@ -156,6 +156,64 @@ esp_err_t esp_mn_commands_add(int command_id, char *string)
return ESP_OK;
}
esp_err_t esp_mn_commands_phoneme_add(int command_id, char *string, char *phonemes)
{
if (NULL == esp_mn_root || esp_mn_model_handle == NULL || esp_mn_model_data == NULL) {
ESP_LOGE(TAG, "Please create mn model first.\n");
return ESP_ERR_INVALID_STATE;
}
esp_mn_node_t *temp = esp_mn_root;
int last_node_elem_num = esp_mn_commands_num();
ESP_RETURN_ON_FALSE(ESP_MN_MAX_PHRASE_NUM >= last_node_elem_num, ESP_ERR_INVALID_STATE, TAG, "The number of speech commands exceed ESP_MN_MAX_PHRASE_NUM");
#ifdef CONFIG_SR_MN_EN_MULTINET7_QUANT
if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, phonemes) == 0) {
// error message is printed inside check_speech_command
ESP_LOGE(TAG, "invalid command, please check format, %s (%s).\n", string, phonemes);
return ESP_ERR_INVALID_STATE;
}
#else
if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, string) == 0) {
// error message is printed inside check_speech_command
ESP_LOGE(TAG, "invalid command, please check format, %s.\n", string);
return ESP_ERR_INVALID_STATE;
}
#endif
temp = esp_mn_command_search(string);
if (temp != NULL) {
// command already exists
if (command_id != temp->phrase->command_id) {
// change command id
temp->phrase->command_id = command_id;
} else {
// it's exactly the same, do nothing
ESP_LOGI(TAG, "command %d: (%s) already exists.", command_id, string);
}
return ESP_OK;
}
temp = esp_mn_root;
esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, string);
if (phrase == NULL) {
return ESP_ERR_INVALID_STATE;
}
int phoneme_len = strlen(phonemes);
phrase->phonemes = _esp_mn_calloc_(phoneme_len+1, sizeof(char));
memcpy(phrase->phonemes, phonemes, phoneme_len);
phrase->phonemes[phoneme_len] = '\0';
esp_mn_node_t *new_node = esp_mn_node_alloc(phrase);
while (temp->next != NULL) {
temp = temp->next;
}
temp->next = new_node;
return ESP_OK;
}
esp_err_t esp_mn_commands_modify(char *old_string, char *new_string)
{
#ifdef CONFIG_SR_MN_EN_MULTINET7_QUANT

View File

@ -59,6 +59,20 @@ esp_err_t esp_mn_commands_free(void);
*/
esp_err_t esp_mn_commands_add(int command_id, char *string);
/**
* @brief Add one speech commands with command string, command phonemes and command ID
* Please use multinet_g2p.py[esp-sr/tool/multinet_g2p.py] to generate phonemes from command string.
*
* @param command_id The command ID
* @param string The command string of the speech commands
* @param phonemes The phonemes of the speech commands
*
* @return
* - ESP_OK Success
* - ESP_ERR_INVALID_STATE Fail
*/
esp_err_t esp_mn_commands_phoneme_add(int command_id, char *string, char *phonemes);
/**
* @brief Modify one speech commands with new command string
*
@ -178,4 +192,4 @@ void esp_mn_active_commands_print(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -4,7 +4,9 @@
#include "string.h"
#include "model_path.h"
#include "esp_wn_models.h"
#ifndef CONFIG_IDF_TARGET_ESP32P4
#include "esp_mn_models.h"
#endif
#ifdef ESP_PLATFORM
#include <sys/dirent.h>
@ -244,6 +246,7 @@ void srmodel_spiffs_deinit(srmodel_list_t *models)
}
#ifdef CONFIG_IDF_TARGET_ESP32
srmodel_list_t *srmodel_config_init()
{
if (static_srmodels == NULL) {
@ -305,6 +308,7 @@ void srmodel_config_deinit(srmodel_list_t *models)
// models is static_srmodels
static_srmodels = NULL;
}
#endif
model_coeff_getter_t *srmodel_get_model_coeff(char *model_name)
{