mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
Merge branch 'feat/support_idf_5_3' into 'master'
Feat/support idf 5 3 See merge request speech-recognition-framework/esp-sr!103
This commit is contained in:
commit
5d718db85a
@ -4,6 +4,11 @@
|
||||
- Available storage is less than the remaining flash space on IDF v5.0.
|
||||
If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later.
|
||||
|
||||
## 1.8.0
|
||||
- Support esp-idf v5.3
|
||||
- Add more new wake words
|
||||
- Add setting "fixed_first_channel" in afe_config
|
||||
|
||||
## 1.7.1
|
||||
- Add 喵喵同学,Hi,joy, (Hi,Lily/Hi,莉莉) wakenet model
|
||||
|
||||
|
||||
@ -162,6 +162,10 @@ choice SR_WN_MODEL_LOAD
|
||||
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
config SR_WN_WN9_HITELLY_TTS
|
||||
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
config SR_WN_WN9_HEYWANDA_TTS
|
||||
bool "Hey,Wanda (wn9_heywanda_tts)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
@ -269,6 +273,10 @@ menu "Load Multiple Wake Words"
|
||||
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
config SR_WN_WN9_HITELLY_TTS_MULTI
|
||||
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
config SR_WN_WN9_XIAOBINXIAOBIN_TTS_MULTI
|
||||
bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
@ -54,6 +54,7 @@ The following wake words are supported in esp-sr:
|
||||
|喵喵同学 | | wn9_miaomiaotongxue_tts|
|
||||
|Hi,喵喵 | | wn9_himiaomiao_tts |
|
||||
|Hi,Lily/Hi,莉莉 | | wn9_hilili_tts |
|
||||
|Hi,Telly/Hi,泰力 | | wn9_hitelly_tts |
|
||||
|小滨小滨/小冰小冰| | wn9_xiaobinxiaobin_tts |
|
||||
|
||||
*NOTE:* `_tts` suffix means this WakeNet model is trained by TTS samples.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
version: "1.7.1"
|
||||
version: "1.8.0"
|
||||
description: esp_sr provides basic algorithms for Speech Recognition applications
|
||||
url: https://github.com/espressif/esp-sr
|
||||
dependencies:
|
||||
|
||||
@ -25,10 +25,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// #ifdef CONFIG_IDF_TARGET_ESP32S3
|
||||
// #include "dl_tie728_bzero.h"
|
||||
// #endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
#if CONFIG_BT_SHARE_MEM_REUSE
|
||||
|
||||
@ -23,7 +23,8 @@ extern "C" {
|
||||
#define USE_AEC_FFT // Not kiss_fft
|
||||
#define AEC_USE_SPIRAM 0
|
||||
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
||||
#define AEC_FRAME_LENGTH_MS 16
|
||||
//#define AEC_FRAME_LENGTH_MS 16
|
||||
#define AEC_FRAME_LENGTH_MS 32
|
||||
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
|
||||
|
||||
typedef void* aec_handle_t;
|
||||
|
||||
@ -90,6 +90,8 @@ typedef struct {
|
||||
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
|
||||
afe_ns_mode_t afe_ns_mode;
|
||||
char *afe_ns_model_name;
|
||||
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||
// otherwise, select channel number by wakenet
|
||||
} afe_config_t;
|
||||
|
||||
|
||||
@ -121,6 +123,37 @@ typedef struct {
|
||||
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||
.afe_ns_mode = NS_MODE_SSP, \
|
||||
.afe_ns_model_name = NULL, \
|
||||
.fixed_first_channel = true, \
|
||||
}
|
||||
#elif CONFIG_IDF_TARGET_ESP32P4
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
.aec_init = true, \
|
||||
.se_init = true, \
|
||||
.vad_init = true, \
|
||||
.wakenet_init = true, \
|
||||
.voice_communication_init = false, \
|
||||
.voice_communication_agc_init = false, \
|
||||
.voice_communication_agc_gain = 15, \
|
||||
.vad_mode = VAD_MODE_3, \
|
||||
.wakenet_model_name = NULL, \
|
||||
.wakenet_model_name_2 = NULL, \
|
||||
.wakenet_mode = DET_MODE_90, \
|
||||
.afe_mode = SR_MODE_HIGH_PERF, \
|
||||
.afe_perferred_core = 0, \
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
|
||||
.afe_linear_gain = 1.0, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config.total_ch_num = 3, \
|
||||
.pcm_config.mic_num = 2, \
|
||||
.pcm_config.ref_num = 1, \
|
||||
.pcm_config.sample_rate = 16000, \
|
||||
.debug_init = false, \
|
||||
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||
.afe_ns_mode = NS_MODE_SSP, \
|
||||
.afe_ns_model_name = NULL, \
|
||||
.fixed_first_channel = true, \
|
||||
}
|
||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
@ -150,6 +183,7 @@ typedef struct {
|
||||
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||
.afe_ns_mode = NS_MODE_SSP, \
|
||||
.afe_ns_model_name = NULL, \
|
||||
.fixed_first_channel = true, \
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -14,4 +14,4 @@ Now there are nsnet1 and nsnet2
|
||||
* @param model_name The name of model
|
||||
* @returns The handle of multinet
|
||||
*/
|
||||
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);
|
||||
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1
model/wakenet_model/wn9_hitelly_tts/_MODEL_INFO_
Normal file
1
model/wakenet_model/wn9_hitelly_tts/_MODEL_INFO_
Normal file
@ -0,0 +1 @@
|
||||
wakenet9l_tts1h8_Hi,Telly or Hi,泰力_3_0.613_0.619
|
||||
BIN
model/wakenet_model/wn9_hitelly_tts/wn9_data
Normal file
BIN
model/wakenet_model/wn9_hitelly_tts/wn9_data
Normal file
Binary file not shown.
BIN
model/wakenet_model/wn9_hitelly_tts/wn9_index
Normal file
BIN
model/wakenet_model/wn9_hitelly_tts/wn9_index
Normal file
Binary file not shown.
@ -156,6 +156,64 @@ esp_err_t esp_mn_commands_add(int command_id, char *string)
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t esp_mn_commands_phoneme_add(int command_id, char *string, char *phonemes)
|
||||
{
|
||||
if (NULL == esp_mn_root || esp_mn_model_handle == NULL || esp_mn_model_data == NULL) {
|
||||
ESP_LOGE(TAG, "Please create mn model first.\n");
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
esp_mn_node_t *temp = esp_mn_root;
|
||||
int last_node_elem_num = esp_mn_commands_num();
|
||||
ESP_RETURN_ON_FALSE(ESP_MN_MAX_PHRASE_NUM >= last_node_elem_num, ESP_ERR_INVALID_STATE, TAG, "The number of speech commands exceed ESP_MN_MAX_PHRASE_NUM");
|
||||
|
||||
#ifdef CONFIG_SR_MN_EN_MULTINET7_QUANT
|
||||
if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, phonemes) == 0) {
|
||||
// error message is printed inside check_speech_command
|
||||
ESP_LOGE(TAG, "invalid command, please check format, %s (%s).\n", string, phonemes);
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
#else
|
||||
if (esp_mn_model_handle->check_speech_command(esp_mn_model_data, string) == 0) {
|
||||
// error message is printed inside check_speech_command
|
||||
ESP_LOGE(TAG, "invalid command, please check format, %s.\n", string);
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
#endif
|
||||
|
||||
temp = esp_mn_command_search(string);
|
||||
|
||||
if (temp != NULL) {
|
||||
// command already exists
|
||||
if (command_id != temp->phrase->command_id) {
|
||||
// change command id
|
||||
temp->phrase->command_id = command_id;
|
||||
} else {
|
||||
// it's exactly the same, do nothing
|
||||
ESP_LOGI(TAG, "command %d: (%s) already exists.", command_id, string);
|
||||
}
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
temp = esp_mn_root;
|
||||
|
||||
esp_mn_phrase_t *phrase = esp_mn_phrase_alloc(command_id, string);
|
||||
if (phrase == NULL) {
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
int phoneme_len = strlen(phonemes);
|
||||
phrase->phonemes = _esp_mn_calloc_(phoneme_len+1, sizeof(char));
|
||||
memcpy(phrase->phonemes, phonemes, phoneme_len);
|
||||
phrase->phonemes[phoneme_len] = '\0';
|
||||
|
||||
esp_mn_node_t *new_node = esp_mn_node_alloc(phrase);
|
||||
while (temp->next != NULL) {
|
||||
temp = temp->next;
|
||||
}
|
||||
temp->next = new_node;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t esp_mn_commands_modify(char *old_string, char *new_string)
|
||||
{
|
||||
#ifdef CONFIG_SR_MN_EN_MULTINET7_QUANT
|
||||
|
||||
@ -59,6 +59,20 @@ esp_err_t esp_mn_commands_free(void);
|
||||
*/
|
||||
esp_err_t esp_mn_commands_add(int command_id, char *string);
|
||||
|
||||
/**
|
||||
* @brief Add one speech commands with command string, command phonemes and command ID
|
||||
* Please use multinet_g2p.py[esp-sr/tool/multinet_g2p.py] to generate phonemes from command string.
|
||||
*
|
||||
* @param command_id The command ID
|
||||
* @param string The command string of the speech commands
|
||||
* @param phonemes The phonemes of the speech commands
|
||||
*
|
||||
* @return
|
||||
* - ESP_OK Success
|
||||
* - ESP_ERR_INVALID_STATE Fail
|
||||
*/
|
||||
esp_err_t esp_mn_commands_phoneme_add(int command_id, char *string, char *phonemes);
|
||||
|
||||
/**
|
||||
* @brief Modify one speech commands with new command string
|
||||
*
|
||||
@ -178,4 +192,4 @@ void esp_mn_active_commands_print(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -4,7 +4,9 @@
|
||||
#include "string.h"
|
||||
#include "model_path.h"
|
||||
#include "esp_wn_models.h"
|
||||
#ifndef CONFIG_IDF_TARGET_ESP32P4
|
||||
#include "esp_mn_models.h"
|
||||
#endif
|
||||
|
||||
#ifdef ESP_PLATFORM
|
||||
#include <sys/dirent.h>
|
||||
@ -244,6 +246,7 @@ void srmodel_spiffs_deinit(srmodel_list_t *models)
|
||||
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IDF_TARGET_ESP32
|
||||
srmodel_list_t *srmodel_config_init()
|
||||
{
|
||||
if (static_srmodels == NULL) {
|
||||
@ -305,6 +308,7 @@ void srmodel_config_deinit(srmodel_list_t *models)
|
||||
// models is static_srmodels
|
||||
static_srmodels = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
model_coeff_getter_t *srmodel_get_model_coeff(char *model_name)
|
||||
{
|
||||
|
||||
Loading…
Reference in New Issue
Block a user