diff --git a/Kconfig.projbuild b/Kconfig.projbuild index 03cdb40..a426695 100644 --- a/Kconfig.projbuild +++ b/Kconfig.projbuild @@ -12,6 +12,24 @@ choice MODEL_DATA_PATH bool "SD Card" endchoice + +config USE_AFE + bool "use afe" + default "y" + +choice AFE_INTERFACE_SEL + prompt "Afe interface selection" + default AFE_INTERFACE_V1 + depends on USE_AFE + help + Select the afe interface to be used. + + config AFE_INTERFACE_V1 + bool "afe interface v1" + +endchoice + + config USE_WAKENET bool "use wakenet" default "y" diff --git a/include/esp32s3/esp_afe_config.h b/include/esp32s3/esp_afe_config.h new file mode 100644 index 0000000..e4853fa --- /dev/null +++ b/include/esp32s3/esp_afe_config.h @@ -0,0 +1,98 @@ +#pragma once +#include "stdint.h" +#include "esp_wn_iface.h" +#include "esp_wn_models.h" +#include "esp_vad.h" + +//AFE: Audio Front-End +//SR: Speech Recognition +//afe_sr/AFE_SR: the audio front-end for speech recognition + +//Set AFE_SR mode +typedef enum { + SR_MODE_LOW_COST = 0, + SR_MODE_HIGH_PERF = 1 +} afe_sr_mode_t; + +typedef enum { + AFE_MEMORY_ALLOC_MORE_INTERNAL = 0, // malloc with more internal ram + AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 1, // malloc with internal ram and psram in balance + AFE_MEMORY_ALLOC_MORE_PSRAM = 2 // malloc with more psram +} afe_memory_alloc_mode_t; + +typedef enum { + AFE_MN_PEAK_AGC_MODE_1 = -5, // The peak amplitude of audio fed to multinet is -5dB + AFE_MN_PEAK_AGC_MODE_2 = -4, // The peak amplitude of audio fed to multinet is -4dB + AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB + AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain +} afe_mn_peak_agc_mode_t; + +typedef struct { + int total_ch_num; // total channel num. It must be: total_ch_num = mic_num + ref_num + int mic_num; // mic channel num + int ref_num; // reference channel num +} afe_pcm_config_t; + +typedef struct { + bool aec_init; + bool se_init; + bool vad_init; + bool wakenet_init; + bool voice_communication_init; + vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4 + esp_wn_iface_t *wakenet_model; + model_coeff_getter_t *wakenet_coeff; + det_mode_t wakenet_mode; + afe_sr_mode_t afe_mode; + int afe_perferred_core; + int afe_perferred_priority; + int afe_ringbuf_size; + afe_memory_alloc_mode_t memory_alloc_mode; + afe_mn_peak_agc_mode_t agc_mode; + afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function. +} afe_config_t; + + +#if CONFIG_IDF_TARGET_ESP32 +#define AFE_CONFIG_DEFAULT() { \ + .aec_init = true, \ + .se_init = true, \ + .vad_init = true, \ + .wakenet_init = true, \ + .voice_communication_init = false, \ + .vad_mode = VAD_MODE_3, \ + .wakenet_model = &WAKENET_MODEL, \ + .wakenet_coeff = &WAKENET_COEFF, \ + .wakenet_mode = DET_MODE_90, \ + .afe_mode = SR_MODE_HIGH_PERF, \ + .afe_perferred_core = 0, \ + .afe_perferred_priority = 5, \ + .afe_ringbuf_size = 50, \ + .memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \ + .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \ + .pcm_config.total_ch_num = 2, \ + .pcm_config.mic_num = 1, \ + .pcm_config.ref_num = 1, \ +} +#elif CONFIG_IDF_TARGET_ESP32S3 +#define AFE_CONFIG_DEFAULT() { \ + .aec_init = true, \ + .se_init = true, \ + .vad_init = true, \ + .wakenet_init = true, \ + .voice_communication_init = false, \ + .vad_mode = VAD_MODE_3, \ + .wakenet_model = &WAKENET_MODEL, \ + .wakenet_coeff = &WAKENET_COEFF, \ + .wakenet_mode = DET_MODE_2CH_90, \ + .afe_mode = SR_MODE_LOW_COST, \ + .afe_perferred_core = 0, \ + .afe_perferred_priority = 5, \ + .afe_ringbuf_size = 50, \ + .memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \ + .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \ + .pcm_config.total_ch_num = 3, \ + .pcm_config.mic_num = 2, \ + .pcm_config.ref_num = 1, \ +} +#endif \ No newline at end of file diff --git a/include/esp32s3/esp_afe_sr_iface.h b/include/esp32s3/esp_afe_sr_iface.h index ada84a8..0cac239 100644 --- a/include/esp32s3/esp_afe_sr_iface.h +++ b/include/esp32s3/esp_afe_sr_iface.h @@ -1,8 +1,10 @@ #pragma once #include "stdint.h" -#include "esp_wn_iface.h" -#include "esp_wn_models.h" -#include "esp_vad.h" +#if CONFIG_AFE_INTERFACE_V1 +#include "esp_afe_config.h" +#else +#include "esp_afe_config.h" +#endif //AFE: Audio Front-End //SR: Speech Recognition @@ -11,12 +13,6 @@ //Opaque AFE_SR data container typedef struct esp_afe_sr_data_t esp_afe_sr_data_t; -//Set AFE_SR mode -typedef enum { - SR_MODE_LOW_COST = 0, - SR_MODE_HIGH_PERF = 1 -} afe_sr_mode_t; - // the output state of fetch function typedef enum { AFE_FETCH_ERROR = -3, // fetch empty data, retry it @@ -26,85 +22,6 @@ typedef enum { AFE_FETCH_WWE_DETECTED = 1 // wwe state: wake word is detected } afe_fetch_mode_t; -typedef enum { - AFE_PSRAM_LOW_COST = 0, - AFE_PSRAM_MIDDLE_COST = 1, - AFE_PSRAM_HIGH_COST = 2 -} afe_use_psram_mode_t; - -typedef enum { - AFE_MN_PEAK_AGC_MODE_1 = -5, // The peak amplitude of audio fed to multinet is -5dB - AFE_MN_PEAK_AGC_MODE_2 = -4, // The peak amplitude of audio fed to multinet is -4dB - AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB - AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain -} afe_mn_peak_agc_mode_t; - -typedef struct { - int total_ch_num; // total channel num. It must be: total_ch_num = mic_num + ref_num - int mic_num; // mic channel num - int ref_num; // reference channel num -} afe_pcm_config_t; - -typedef struct { - bool aec_init; - bool se_init; - bool vad_init; - bool wakenet_init; - vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4 - esp_wn_iface_t *wakenet_model; - model_coeff_getter_t *wakenet_coeff; - det_mode_t wakenet_mode; - afe_sr_mode_t afe_mode; - int afe_perferred_core; - int afe_perferred_priority; - int afe_ringbuf_size; - afe_use_psram_mode_t alloc_from_psram; - afe_mn_peak_agc_mode_t agc_mode; - afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function. -} afe_config_t; - - -#if CONFIG_IDF_TARGET_ESP32 -#define AFE_CONFIG_DEFAULT() { \ - .aec_init = true, \ - .se_init = true, \ - .vad_init = true, \ - .wakenet_init = true, \ - .vad_mode = VAD_MODE_3, \ - .wakenet_model = &WAKENET_MODEL, \ - .wakenet_coeff = &WAKENET_COEFF, \ - .wakenet_mode = DET_MODE_90, \ - .afe_mode = SR_MODE_HIGH_PERF, \ - .afe_perferred_core = 0, \ - .afe_perferred_priority = 5, \ - .afe_ringbuf_size = 50, \ - .alloc_from_psram = AFE_PSRAM_MIDDLE_COST, \ - .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \ - .pcm_config.total_ch_num = 2, \ - .pcm_config.mic_num = 1, \ - .pcm_config.ref_num = 1, \ -} -#elif CONFIG_IDF_TARGET_ESP32S3 -#define AFE_CONFIG_DEFAULT() { \ - .aec_init = true, \ - .se_init = true, \ - .vad_init = true, \ - .wakenet_init = true, \ - .vad_mode = VAD_MODE_3, \ - .wakenet_model = &WAKENET_MODEL, \ - .wakenet_coeff = &WAKENET_COEFF, \ - .wakenet_mode = DET_MODE_2CH_90, \ - .afe_mode = SR_MODE_LOW_COST, \ - .afe_perferred_core = 0, \ - .afe_perferred_priority = 5, \ - .afe_ringbuf_size = 50, \ - .alloc_from_psram = AFE_PSRAM_HIGH_COST, \ - .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \ - .pcm_config.total_ch_num = 3, \ - .pcm_config.mic_num = 2, \ - .pcm_config.ref_num = 1, \ -} -#endif /** * @brief Function to initialze a AFE_SR instance with a specified mode * @@ -274,5 +191,3 @@ typedef struct { esp_afe_sr_iface_op_enable_se_t enable_se; esp_afe_sr_iface_op_destroy_t destroy; } esp_afe_sr_iface_t; - -extern esp_afe_sr_iface_t esp_afe_sr; diff --git a/include/esp32s3/esp_afe_sr_models.h b/include/esp32s3/esp_afe_sr_models.h index 5424134..1042eb5 100644 --- a/include/esp32s3/esp_afe_sr_models.h +++ b/include/esp32s3/esp_afe_sr_models.h @@ -1,6 +1,18 @@ #pragma once + +#if defined CONFIG_USE_AFE + +#if CONFIG_AFE_INTERFACE_V1 #include "esp_afe_sr_iface.h" +extern const esp_afe_sr_iface_t esp_afe_v1; +#define ESP_AFE_HANDLE esp_afe_v1 +#else +#error No valid afe selected. +#endif -extern const esp_afe_sr_iface_t esp_afe_sr_2mic; -extern const esp_afe_sr_iface_t esp_afe_sr_1mic; +#else +#include "esp_afe_sr_iface.h" +extern const esp_afe_sr_iface_t esp_afe_v1; +#define ESP_AFE_HANDLE esp_afe_v1 +#endif \ No newline at end of file diff --git a/include/esp32s3/esp_agc.h b/include/esp32s3/esp_agc.h index 20b0e3f..37116eb 100644 --- a/include/esp32s3/esp_agc.h +++ b/include/esp32s3/esp_agc.h @@ -14,10 +14,6 @@ #ifndef _ESP_AGC_H_ #define _ESP_AGC_H_ -#ifdef __cplusplus -extern "C" { -#endif - ////all positive value is valid, negective is error typedef enum { ESP_AGC_SUCCESS = 0, ////success @@ -32,8 +28,4 @@ void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int targe int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate); void esp_agc_close(void *agc_handle); -#ifdef __cplusplus -} -#endif - #endif // _ESP_AGC_H_ diff --git a/include/esp32s3/esp_mase.h b/include/esp32s3/esp_mase.h index 3cf403f..0b12e82 100644 --- a/include/esp32s3/esp_mase.h +++ b/include/esp32s3/esp_mase.h @@ -12,13 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License -#ifndef _ESP_MASE_H_ -#define _ESP_MASE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - #define MASE_SAMPLE_RATE 16000 // Supports 16kHz only #define MASE_FRAME_SIZE 16 // Supports 16ms only #define MASE_MIC_DISTANCE 65 // According to physical design of mic-array @@ -85,10 +78,4 @@ void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out); * @return None * */ -void mase_destory(mase_handle_t st); - -#ifdef __cplusplus -} -#endif - -#endif \ No newline at end of file +void mase_destory(mase_handle_t st); \ No newline at end of file diff --git a/include/esp32s3/esp_vad.h b/include/esp32s3/esp_vad.h index e1a76cf..2440d39 100644 --- a/include/esp32s3/esp_vad.h +++ b/include/esp32s3/esp_vad.h @@ -47,15 +47,11 @@ typedef void* vad_handle_t; * * @param vad_mode Sets the VAD operating mode. * - * @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000. - * - * @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30. - * * @return * - NULL: Create failed * - Others: The instance of VAD */ -vad_handle_t vad_create(vad_mode_t vad_mode, int sample_rate_hz, int one_frame_ms); +vad_handle_t vad_create(vad_mode_t vad_mode); /** * @brief Feed samples of an audio stream to the VAD and check if there is someone speaking. @@ -64,12 +60,16 @@ vad_handle_t vad_create(vad_mode_t vad_mode, int sample_rate_hz, int one_frame_m * * @param data An array of 16-bit signed audio samples. * + * @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000. + * + * @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30. + * * @return * - VAD_SILENCE if no voice * - VAD_SPEECH if voice is detected * */ -vad_state_t vad_process(vad_handle_t inst, int16_t *data); +vad_state_t vad_process(vad_handle_t inst, int16_t *data, int sample_rate_hz, int one_frame_ms); /** * @brief Free the VAD instance diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a index f710d83..427869b 100644 Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ diff --git a/lib/esp32s3/libesp_audio_processor.a b/lib/esp32s3/libesp_audio_processor.a index d66b7cb..f5f833c 100644 Binary files a/lib/esp32s3/libesp_audio_processor.a and b/lib/esp32s3/libesp_audio_processor.a differ diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a index eb8ff0b..a0e70e9 100644 Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a index 5a48c66..04d23c0 100644 Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ diff --git a/libversion b/libversion index 826a98d..c5a5500 100644 --- a/libversion +++ b/libversion @@ -1 +1 @@ -c008766c5e30abbd2e418086a688de59359ff1df \ No newline at end of file +fd54094054a442b011659a0d32898b728c33ad1c diff --git a/model/movemodel.py b/model/movemodel.py index fdae930..8528c7d 100644 --- a/model/movemodel.py +++ b/model/movemodel.py @@ -88,4 +88,4 @@ if multinet_model != 'null': # os.system("cp %s %s" % (wakenet_model+'/_MODEL_INFO_', target_model)) total_size = calculate_total_size(target_model) -print("Recommended model partition size: ", str(int((total_size / 1024 + 500) / 4 ) * 4) + 'KB') +print("Recommended model partition size: ", str(int((total_size / 1024 + 500) / 4 ) * 4) + 'KB') \ No newline at end of file diff --git a/model/wakenet_model/alexa8/_MODEL_INFO_ b/model/wakenet_model/alexa8/_MODEL_INFO_ index 52e1632..e0d78b6 100644 --- a/model/wakenet_model/alexa8/_MODEL_INFO_ +++ b/model/wakenet_model/alexa8/_MODEL_INFO_ @@ -1 +1 @@ -wakeNet8_v5h8_alexa_5_0.57_0.60 +wakeNet8_v5_alexa_5_0.55_0.54 \ No newline at end of file diff --git a/model/wakenet_model/xiaoaitongxue7q8/_MODEL_INFO_ b/model/wakenet_model/xiaoaitongxue7q8/_MODEL_INFO_ index a01f350..8bd886c 100644 --- a/model/wakenet_model/xiaoaitongxue7q8/_MODEL_INFO_ +++ b/model/wakenet_model/xiaoaitongxue7q8/_MODEL_INFO_ @@ -1 +1 @@ -wakeNet7Q8_v2h8_xiaoaitongxue_5_0.975_0.985 +wakeNet7Q8_v1_xiaoaitongxue_5_0.97_0.90 \ No newline at end of file