diff --git a/include/esp32s3/esp_afe_config.h b/include/esp32s3/esp_afe_config.h index 3bf55f7..cf0b06a 100644 --- a/include/esp32s3/esp_afe_config.h +++ b/include/esp32s3/esp_afe_config.h @@ -8,6 +8,7 @@ //SR: Speech Recognition //afe_sr/AFE_SR: the audio front-end for speech recognition + //Set AFE_SR mode typedef enum { SR_MODE_LOW_COST = 0, @@ -34,6 +35,26 @@ typedef struct { int sample_rate; // sample rate of audio } afe_pcm_config_t; +/** + * @brief Function to get the debug audio data + * + * @param data The debug audio data which don't be modify. It should be copied away as soon as possible that avoid blocking for too long. + * @param data_size The number of bytes of data. + * @returns + */ +typedef void (*afe_debug_hook_callback_t)(const int16_t* data, int data_size); + +typedef enum { + AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task + AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task + AFE_DEBUG_HOOK_MAX = 2 +} afe_debug_hook_type_t; + +typedef struct { + afe_debug_hook_type_t hook_type; // debug type of hook + afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data +} afe_debug_hook_t; + typedef struct { bool aec_init; bool se_init; @@ -52,6 +73,8 @@ typedef struct { afe_memory_alloc_mode_t memory_alloc_mode; afe_mn_peak_agc_mode_t agc_mode; // The agc mode for ASR afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function. + bool debug_init; + afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX]; } afe_config_t; @@ -77,6 +100,8 @@ typedef struct { .pcm_config.mic_num = 1, \ .pcm_config.ref_num = 1, \ .pcm_config.sample_rate = 16000, \ + .debug_init = false, \ + .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \ } #elif CONFIG_IDF_TARGET_ESP32S3 #define AFE_CONFIG_DEFAULT() { \ @@ -100,5 +125,7 @@ typedef struct { .pcm_config.mic_num = 2, \ .pcm_config.ref_num = 1, \ .pcm_config.sample_rate = 16000, \ + .debug_init = false, \ + .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \ } #endif \ No newline at end of file diff --git a/include/esp32s3/esp_afe_sr_iface.h b/include/esp32s3/esp_afe_sr_iface.h index b513b5c..b9025e9 100644 --- a/include/esp32s3/esp_afe_sr_iface.h +++ b/include/esp32s3/esp_afe_sr_iface.h @@ -25,9 +25,9 @@ typedef struct afe_fetch_result_t { int16_t *data; // the data of audio. int data_size; // the size of data. The unit is byte. - int wakeup_state; // the value is afe_wakeup_state_t + wakenet_state_t wakeup_state; // the value is wakenet_state_t int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1. - int vad_state; // the value is afe_vad_state_t + afe_vad_state_t vad_state; // the value is afe_vad_state_t int trigger_channel_id; // the channel index of output int wake_word_length; // the length of wake word. It's unit is the number of samples. int ret_value; // the return state of fetch function @@ -101,6 +101,14 @@ typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t* */ typedef afe_fetch_result_t* (*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe); +/** + * @brief reset ringbuf of AFE. + * + * @param afe The AFE_SR object to query + * @return -1: fail, 0: success + */ +typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe); + /** * @brief Initial wakenet and wake words coefficient, or reset wakenet and wake words coefficient * when wakenet has been initialized. @@ -174,6 +182,7 @@ typedef struct { esp_afe_sr_iface_op_create_from_config_t create_from_config; esp_afe_sr_iface_op_feed_t feed; esp_afe_sr_iface_op_fetch_t fetch; + esp_afe_sr_iface_op_reset_buffer_t reset_buffer; esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize; esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize; esp_afe_sr_iface_op_get_total_channel_num_t get_total_channel_num; diff --git a/lib/esp32s3/libc_speech_features.a b/lib/esp32s3/libc_speech_features.a index 9a0bfcd..8843b35 100644 Binary files a/lib/esp32s3/libc_speech_features.a and b/lib/esp32s3/libc_speech_features.a differ diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a index dbf2fc3..76cfbe7 100644 Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ diff --git a/lib/esp32s3/libesp_audio_processor.a b/lib/esp32s3/libesp_audio_processor.a index 33bf239..965431a 100644 Binary files a/lib/esp32s3/libesp_audio_processor.a and b/lib/esp32s3/libesp_audio_processor.a differ diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a index ff28ac3..602a1d2 100644 Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a index 9f66cbd..06e65bf 100644 Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ diff --git a/src/model_path.c b/src/model_path.c index 053afad..71abee9 100644 --- a/src/model_path.c +++ b/src/model_path.c @@ -132,6 +132,7 @@ void srmodel_spiffs_deinit(srmodel_list_t *models) for (int i=0; inum; i++) { free(models->model_name[i]); } + free(models->model_name); } free(models); } @@ -165,6 +166,7 @@ srmodel_list_t *srmodel_config_init() // could not find any avaliable models, return NULL if (models->num == 0) { + free(models->model_name); free(models); models = NULL; } @@ -179,6 +181,7 @@ void srmodel_config_deinit(srmodel_list_t *models) for (int i=0; inum; i++) { free(models->model_name[i]); } + free(models->model_name); } free(models); }