esp-sr/include/esp32p4/esp_afe_config.h

#pragma once
#include "stdint.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "esp_vad.h"

#ifdef __cplusplus
extern "C" {
#endif

//AFE: Audio Front-End
//SR:  Speech Recognition
//afe_sr/AFE_SR: the audio front-end for speech recognition


//Set AFE_SR mode
typedef enum {
    SR_MODE_LOW_COST = 0,
    SR_MODE_HIGH_PERF = 1
} afe_sr_mode_t;

typedef enum {
    AFE_MEMORY_ALLOC_MORE_INTERNAL = 1,             // malloc with more internal ram
    AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2,    // malloc with internal ram and psram in balance
    AFE_MEMORY_ALLOC_MORE_PSRAM = 3                 // malloc with more psram
} afe_memory_alloc_mode_t;

typedef enum {
    AFE_MN_PEAK_AGC_MODE_1 = -9,            // The peak amplitude of audio fed to multinet is -9dB
    AFE_MN_PEAK_AGC_MODE_2 = -6,            // The peak amplitude of audio fed to multinet is -6dB
    AFE_MN_PEAK_AGC_MODE_3 = -3,            // The peak amplitude of audio fed to multinet is -3dB
    AFE_MN_PEAK_NO_AGC = 0,                 // There is no agc gain
} afe_mn_peak_agc_mode_t;

typedef struct {
    int total_ch_num;                       // total channel num. It must be: total_ch_num = mic_num + ref_num
    int mic_num;                            // mic channel num
    int ref_num;                            // reference channel num
    int sample_rate;                        // sample rate of audio
} afe_pcm_config_t;

typedef enum {
    NS_MODE_SSP = 0,                        // speech signal process method
    NS_MODE_NET = 1,                        // deep noise suppression net method
} afe_ns_mode_t;


/**
 * @brief Function to get the debug audio data
 *
 * @param data        The debug audio data which don't be modify. It should be copied away as soon as possible that avoid blocking for too long.
 * @param data_size   The number of bytes of data.
 * @returns
 */
typedef void (*afe_debug_hook_callback_t)(const int16_t* data, int data_size);

typedef enum {
    AFE_DEBUG_HOOK_MASE_TASK_IN = 0,        // To get the input data of mase task
    AFE_DEBUG_HOOK_FETCH_TASK_IN = 1,       // To get the input data of fetch task
    AFE_DEBUG_HOOK_MAX = 2
} afe_debug_hook_type_t;

typedef struct {
    afe_debug_hook_type_t hook_type;            // debug type of hook
    afe_debug_hook_callback_t hook_callback;    // callback function which transfer debug audio data
} afe_debug_hook_t;

typedef struct {
    bool aec_init;
    bool se_init;
    bool vad_init;
    bool wakenet_init;
    bool voice_communication_init;
    bool voice_communication_agc_init;      // AGC swich for voice communication
    int voice_communication_agc_gain;       // AGC gain(dB) for voice communication
    vad_mode_t vad_mode;                    // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
    char *wakenet_model_name;               // The model name of wakenet 1
    char *wakenet_model_name_2;             // The model name of wakenet 2 if has wakenet 2
    det_mode_t wakenet_mode;
    afe_sr_mode_t afe_mode;
    int afe_perferred_core;
    int afe_perferred_priority;
    int afe_ringbuf_size;
    afe_memory_alloc_mode_t memory_alloc_mode;
    float afe_linear_gain;                  // The linear gain for sr output(note: invaild for vc), the value should be in [0.1, 10.0].
                                            // This value acts directly on the output amplitude: out_linear_gain * amplitude.
    afe_mn_peak_agc_mode_t agc_mode;        // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
    afe_pcm_config_t pcm_config;            // Config the channel num of original data which is fed to the afe feed function.
    bool debug_init;
    afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
    afe_ns_mode_t afe_ns_mode;
    char *afe_ns_model_name;
    bool fixed_first_channel;                // If true, the channel after first wake-up is fixed to raw data of microphone
                                             // otherwise, select channel number by wakenet
} afe_config_t;


#if CONFIG_IDF_TARGET_ESP32
#define AFE_CONFIG_DEFAULT() { \
    .aec_init = true, \
    .se_init = true, \
    .vad_init = true, \
    .wakenet_init = true, \
    .voice_communication_init = false, \
    .voice_communication_agc_init = false, \
    .voice_communication_agc_gain = 15, \
    .vad_mode = VAD_MODE_3, \
    .wakenet_model_name = NULL, \
    .wakenet_model_name_2 = NULL, \
    .wakenet_mode = DET_MODE_90, \
    .afe_mode = SR_MODE_HIGH_PERF, \
    .afe_perferred_core = 0, \
    .afe_perferred_priority = 5, \
    .afe_ringbuf_size = 50, \
    .memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
    .afe_linear_gain = 1.0, \
    .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
    .pcm_config.total_ch_num = 2, \
    .pcm_config.mic_num = 1, \
    .pcm_config.ref_num = 1, \
    .pcm_config.sample_rate = 16000, \
    .debug_init = false, \
    .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
    .afe_ns_mode = NS_MODE_SSP, \
    .afe_ns_model_name = NULL, \
    .fixed_first_channel = true, \
}
#elif CONFIG_IDF_TARGET_ESP32P4
#define AFE_CONFIG_DEFAULT() { \
    .aec_init = true, \
    .se_init = true, \
    .vad_init = true, \
    .wakenet_init = true, \
    .voice_communication_init = false, \
    .voice_communication_agc_init = false, \
    .voice_communication_agc_gain = 15, \
    .vad_mode = VAD_MODE_3, \
    .wakenet_model_name = NULL, \
    .wakenet_model_name_2 = NULL, \
    .wakenet_mode = DET_MODE_90, \
    .afe_mode = SR_MODE_LOW_COST, \
    .afe_perferred_core = 0, \
    .afe_perferred_priority = 5, \
    .afe_ringbuf_size = 50, \
    .memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
    .afe_linear_gain = 1.0, \
    .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
    .pcm_config.total_ch_num = 2, \
    .pcm_config.mic_num = 1, \
    .pcm_config.ref_num = 1, \
    .pcm_config.sample_rate = 16000, \
    .debug_init = false, \
    .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
    .afe_ns_mode = NS_MODE_SSP, \
    .afe_ns_model_name = NULL, \
    .fixed_first_channel = true, \
}
#elif CONFIG_IDF_TARGET_ESP32S3
#define AFE_CONFIG_DEFAULT() { \
    .aec_init = true, \
    .se_init = true, \
    .vad_init = true, \
    .wakenet_init = true, \
    .voice_communication_init = false, \
    .voice_communication_agc_init = false, \
    .voice_communication_agc_gain = 15, \
    .vad_mode = VAD_MODE_3, \
    .wakenet_model_name = NULL, \
    .wakenet_model_name_2 = NULL, \
    .wakenet_mode = DET_MODE_2CH_90, \
    .afe_mode = SR_MODE_LOW_COST, \
    .afe_perferred_core = 0, \
    .afe_perferred_priority = 5, \
    .afe_ringbuf_size = 50, \
    .memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
    .afe_linear_gain = 1.0, \
    .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
    .pcm_config.total_ch_num = 3, \
    .pcm_config.mic_num = 2, \
    .pcm_config.ref_num = 1, \
    .pcm_config.sample_rate = 16000, \
    .debug_init = false, \
    .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
    .afe_ns_mode = NS_MODE_SSP, \
    .afe_ns_model_name = NULL, \
    .fixed_first_channel = true, \
}
#endif

#ifdef __cplusplus
}
#endif