mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
feat(AFE): modify the API of AFE to support multiple version
This commit is contained in:
parent
564bea09f8
commit
dcc206936e
@ -12,6 +12,24 @@ choice MODEL_DATA_PATH
|
||||
bool "SD Card"
|
||||
endchoice
|
||||
|
||||
|
||||
config USE_AFE
|
||||
bool "use afe"
|
||||
default "y"
|
||||
|
||||
choice AFE_INTERFACE_SEL
|
||||
prompt "Afe interface selection"
|
||||
default AFE_INTERFACE_V1
|
||||
depends on USE_AFE
|
||||
help
|
||||
Select the afe interface to be used.
|
||||
|
||||
config AFE_INTERFACE_V1
|
||||
bool "afe interface v1"
|
||||
|
||||
endchoice
|
||||
|
||||
|
||||
config USE_WAKENET
|
||||
bool "use wakenet"
|
||||
default "y"
|
||||
|
||||
98
include/esp32s3/esp_afe_config.h
Normal file
98
include/esp32s3/esp_afe_config.h
Normal file
@ -0,0 +1,98 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "esp_vad.h"
|
||||
|
||||
//AFE: Audio Front-End
|
||||
//SR: Speech Recognition
|
||||
//afe_sr/AFE_SR: the audio front-end for speech recognition
|
||||
|
||||
//Set AFE_SR mode
|
||||
typedef enum {
|
||||
SR_MODE_LOW_COST = 0,
|
||||
SR_MODE_HIGH_PERF = 1
|
||||
} afe_sr_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MEMORY_ALLOC_MORE_INTERNAL = 0, // malloc with more internal ram
|
||||
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 1, // malloc with internal ram and psram in balance
|
||||
AFE_MEMORY_ALLOC_MORE_PSRAM = 2 // malloc with more psram
|
||||
} afe_memory_alloc_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -5, // The peak amplitude of audio fed to multinet is -5dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -4, // The peak amplitude of audio fed to multinet is -4dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB
|
||||
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
||||
} afe_mn_peak_agc_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int total_ch_num; // total channel num. It must be: total_ch_num = mic_num + ref_num
|
||||
int mic_num; // mic channel num
|
||||
int ref_num; // reference channel num
|
||||
} afe_pcm_config_t;
|
||||
|
||||
typedef struct {
|
||||
bool aec_init;
|
||||
bool se_init;
|
||||
bool vad_init;
|
||||
bool wakenet_init;
|
||||
bool voice_communication_init;
|
||||
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
|
||||
esp_wn_iface_t *wakenet_model;
|
||||
model_coeff_getter_t *wakenet_coeff;
|
||||
det_mode_t wakenet_mode;
|
||||
afe_sr_mode_t afe_mode;
|
||||
int afe_perferred_core;
|
||||
int afe_perferred_priority;
|
||||
int afe_ringbuf_size;
|
||||
afe_memory_alloc_mode_t memory_alloc_mode;
|
||||
afe_mn_peak_agc_mode_t agc_mode;
|
||||
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
||||
} afe_config_t;
|
||||
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
.aec_init = true, \
|
||||
.se_init = true, \
|
||||
.vad_init = true, \
|
||||
.wakenet_init = true, \
|
||||
.voice_communication_init = false, \
|
||||
.vad_mode = VAD_MODE_3, \
|
||||
.wakenet_model = &WAKENET_MODEL, \
|
||||
.wakenet_coeff = &WAKENET_COEFF, \
|
||||
.wakenet_mode = DET_MODE_90, \
|
||||
.afe_mode = SR_MODE_HIGH_PERF, \
|
||||
.afe_perferred_core = 0, \
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config.total_ch_num = 2, \
|
||||
.pcm_config.mic_num = 1, \
|
||||
.pcm_config.ref_num = 1, \
|
||||
}
|
||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
.aec_init = true, \
|
||||
.se_init = true, \
|
||||
.vad_init = true, \
|
||||
.wakenet_init = true, \
|
||||
.voice_communication_init = false, \
|
||||
.vad_mode = VAD_MODE_3, \
|
||||
.wakenet_model = &WAKENET_MODEL, \
|
||||
.wakenet_coeff = &WAKENET_COEFF, \
|
||||
.wakenet_mode = DET_MODE_2CH_90, \
|
||||
.afe_mode = SR_MODE_LOW_COST, \
|
||||
.afe_perferred_core = 0, \
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config.total_ch_num = 3, \
|
||||
.pcm_config.mic_num = 2, \
|
||||
.pcm_config.ref_num = 1, \
|
||||
}
|
||||
#endif
|
||||
@ -1,8 +1,10 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "esp_vad.h"
|
||||
#if CONFIG_AFE_INTERFACE_V1
|
||||
#include "esp_afe_config.h"
|
||||
#else
|
||||
#include "esp_afe_config.h"
|
||||
#endif
|
||||
|
||||
//AFE: Audio Front-End
|
||||
//SR: Speech Recognition
|
||||
@ -11,12 +13,6 @@
|
||||
//Opaque AFE_SR data container
|
||||
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
|
||||
|
||||
//Set AFE_SR mode
|
||||
typedef enum {
|
||||
SR_MODE_LOW_COST = 0,
|
||||
SR_MODE_HIGH_PERF = 1
|
||||
} afe_sr_mode_t;
|
||||
|
||||
// the output state of fetch function
|
||||
typedef enum {
|
||||
AFE_FETCH_ERROR = -3, // fetch empty data, retry it
|
||||
@ -26,85 +22,6 @@ typedef enum {
|
||||
AFE_FETCH_WWE_DETECTED = 1 // wwe state: wake word is detected
|
||||
} afe_fetch_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_PSRAM_LOW_COST = 0,
|
||||
AFE_PSRAM_MIDDLE_COST = 1,
|
||||
AFE_PSRAM_HIGH_COST = 2
|
||||
} afe_use_psram_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -5, // The peak amplitude of audio fed to multinet is -5dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -4, // The peak amplitude of audio fed to multinet is -4dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB
|
||||
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
||||
} afe_mn_peak_agc_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int total_ch_num; // total channel num. It must be: total_ch_num = mic_num + ref_num
|
||||
int mic_num; // mic channel num
|
||||
int ref_num; // reference channel num
|
||||
} afe_pcm_config_t;
|
||||
|
||||
typedef struct {
|
||||
bool aec_init;
|
||||
bool se_init;
|
||||
bool vad_init;
|
||||
bool wakenet_init;
|
||||
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
|
||||
esp_wn_iface_t *wakenet_model;
|
||||
model_coeff_getter_t *wakenet_coeff;
|
||||
det_mode_t wakenet_mode;
|
||||
afe_sr_mode_t afe_mode;
|
||||
int afe_perferred_core;
|
||||
int afe_perferred_priority;
|
||||
int afe_ringbuf_size;
|
||||
afe_use_psram_mode_t alloc_from_psram;
|
||||
afe_mn_peak_agc_mode_t agc_mode;
|
||||
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
||||
} afe_config_t;
|
||||
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
.aec_init = true, \
|
||||
.se_init = true, \
|
||||
.vad_init = true, \
|
||||
.wakenet_init = true, \
|
||||
.vad_mode = VAD_MODE_3, \
|
||||
.wakenet_model = &WAKENET_MODEL, \
|
||||
.wakenet_coeff = &WAKENET_COEFF, \
|
||||
.wakenet_mode = DET_MODE_90, \
|
||||
.afe_mode = SR_MODE_HIGH_PERF, \
|
||||
.afe_perferred_core = 0, \
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.alloc_from_psram = AFE_PSRAM_MIDDLE_COST, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config.total_ch_num = 2, \
|
||||
.pcm_config.mic_num = 1, \
|
||||
.pcm_config.ref_num = 1, \
|
||||
}
|
||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
.aec_init = true, \
|
||||
.se_init = true, \
|
||||
.vad_init = true, \
|
||||
.wakenet_init = true, \
|
||||
.vad_mode = VAD_MODE_3, \
|
||||
.wakenet_model = &WAKENET_MODEL, \
|
||||
.wakenet_coeff = &WAKENET_COEFF, \
|
||||
.wakenet_mode = DET_MODE_2CH_90, \
|
||||
.afe_mode = SR_MODE_LOW_COST, \
|
||||
.afe_perferred_core = 0, \
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.alloc_from_psram = AFE_PSRAM_HIGH_COST, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config.total_ch_num = 3, \
|
||||
.pcm_config.mic_num = 2, \
|
||||
.pcm_config.ref_num = 1, \
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* @brief Function to initialze a AFE_SR instance with a specified mode
|
||||
*
|
||||
@ -274,5 +191,3 @@ typedef struct {
|
||||
esp_afe_sr_iface_op_enable_se_t enable_se;
|
||||
esp_afe_sr_iface_op_destroy_t destroy;
|
||||
} esp_afe_sr_iface_t;
|
||||
|
||||
extern esp_afe_sr_iface_t esp_afe_sr;
|
||||
|
||||
@ -1,6 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#if defined CONFIG_USE_AFE
|
||||
|
||||
#if CONFIG_AFE_INTERFACE_V1
|
||||
#include "esp_afe_sr_iface.h"
|
||||
extern const esp_afe_sr_iface_t esp_afe_v1;
|
||||
#define ESP_AFE_HANDLE esp_afe_v1
|
||||
#else
|
||||
#error No valid afe selected.
|
||||
#endif
|
||||
|
||||
extern const esp_afe_sr_iface_t esp_afe_sr_2mic;
|
||||
extern const esp_afe_sr_iface_t esp_afe_sr_1mic;
|
||||
#else
|
||||
#include "esp_afe_sr_iface.h"
|
||||
extern const esp_afe_sr_iface_t esp_afe_v1;
|
||||
#define ESP_AFE_HANDLE esp_afe_v1
|
||||
|
||||
#endif
|
||||
@ -14,10 +14,6 @@
|
||||
#ifndef _ESP_AGC_H_
|
||||
#define _ESP_AGC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
////all positive value is valid, negective is error
|
||||
typedef enum {
|
||||
ESP_AGC_SUCCESS = 0, ////success
|
||||
@ -32,8 +28,4 @@ void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int targe
|
||||
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
|
||||
void esp_agc_close(void *agc_handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _ESP_AGC_H_
|
||||
|
||||
@ -12,13 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
|
||||
#ifndef _ESP_MASE_H_
|
||||
#define _ESP_MASE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
|
||||
#define MASE_FRAME_SIZE 16 // Supports 16ms only
|
||||
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
|
||||
@ -85,10 +78,4 @@ void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void mase_destory(mase_handle_t st);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
void mase_destory(mase_handle_t st);
|
||||
@ -47,15 +47,11 @@ typedef void* vad_handle_t;
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
*
|
||||
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
|
||||
*
|
||||
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create(vad_mode_t vad_mode, int sample_rate_hz, int one_frame_ms);
|
||||
vad_handle_t vad_create(vad_mode_t vad_mode);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
@ -64,12 +60,16 @@ vad_handle_t vad_create(vad_mode_t vad_mode, int sample_rate_hz, int one_frame_m
|
||||
*
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
*
|
||||
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
|
||||
*
|
||||
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
|
||||
*
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process(vad_handle_t inst, int16_t *data);
|
||||
vad_state_t vad_process(vad_handle_t inst, int16_t *data, int sample_rate_hz, int one_frame_ms);
|
||||
|
||||
/**
|
||||
* @brief Free the VAD instance
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1 +1 @@
|
||||
c008766c5e30abbd2e418086a688de59359ff1df
|
||||
fd54094054a442b011659a0d32898b728c33ad1c
|
||||
|
||||
@ -88,4 +88,4 @@ if multinet_model != 'null':
|
||||
# os.system("cp %s %s" % (wakenet_model+'/_MODEL_INFO_', target_model))
|
||||
|
||||
total_size = calculate_total_size(target_model)
|
||||
print("Recommended model partition size: ", str(int((total_size / 1024 + 500) / 4 ) * 4) + 'KB')
|
||||
print("Recommended model partition size: ", str(int((total_size / 1024 + 500) / 4 ) * 4) + 'KB')
|
||||
@ -1 +1 @@
|
||||
wakeNet8_v5h8_alexa_5_0.57_0.60
|
||||
wakeNet8_v5_alexa_5_0.55_0.54
|
||||
@ -1 +1 @@
|
||||
wakeNet7Q8_v2h8_xiaoaitongxue_5_0.975_0.985
|
||||
wakeNet7Q8_v1_xiaoaitongxue_5_0.97_0.90
|
||||
Loading…
Reference in New Issue
Block a user