mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
193 lines
7.1 KiB
C
193 lines
7.1 KiB
C
#pragma once
|
|
#include "stdint.h"
|
|
#include "esp_wn_iface.h"
|
|
#include "esp_wn_models.h"
|
|
#include "esp_vad.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
//AFE: Audio Front-End
|
|
//SR: Speech Recognition
|
|
//afe_sr/AFE_SR: the audio front-end for speech recognition
|
|
|
|
|
|
//Set AFE_SR mode
|
|
typedef enum {
|
|
SR_MODE_LOW_COST = 0,
|
|
SR_MODE_HIGH_PERF = 1
|
|
} afe_sr_mode_t;
|
|
|
|
typedef enum {
|
|
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
|
|
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
|
|
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
|
|
} afe_memory_alloc_mode_t;
|
|
|
|
typedef enum {
|
|
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of audio fed to multinet is -9dB
|
|
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of audio fed to multinet is -6dB
|
|
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB
|
|
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
|
} afe_mn_peak_agc_mode_t;
|
|
|
|
typedef struct {
|
|
int total_ch_num; // total channel num. It must be: total_ch_num = mic_num + ref_num
|
|
int mic_num; // mic channel num
|
|
int ref_num; // reference channel num
|
|
int sample_rate; // sample rate of audio
|
|
} afe_pcm_config_t;
|
|
|
|
typedef enum {
|
|
NS_MODE_SSP = 0, // speech signal process method
|
|
NS_MODE_NET = 1, // deep noise suppression net method
|
|
} afe_ns_mode_t;
|
|
|
|
|
|
/**
|
|
* @brief Function to get the debug audio data
|
|
*
|
|
* @param data The debug audio data which don't be modify. It should be copied away as soon as possible that avoid blocking for too long.
|
|
* @param data_size The number of bytes of data.
|
|
* @returns
|
|
*/
|
|
typedef void (*afe_debug_hook_callback_t)(const int16_t* data, int data_size);
|
|
|
|
typedef enum {
|
|
AFE_DEBUG_HOOK_MASE_TASK_IN = 0, // To get the input data of mase task
|
|
AFE_DEBUG_HOOK_FETCH_TASK_IN = 1, // To get the input data of fetch task
|
|
AFE_DEBUG_HOOK_MAX = 2
|
|
} afe_debug_hook_type_t;
|
|
|
|
typedef struct {
|
|
afe_debug_hook_type_t hook_type; // debug type of hook
|
|
afe_debug_hook_callback_t hook_callback; // callback function which transfer debug audio data
|
|
} afe_debug_hook_t;
|
|
|
|
typedef struct {
|
|
bool aec_init;
|
|
bool se_init;
|
|
bool vad_init;
|
|
bool wakenet_init;
|
|
bool voice_communication_init;
|
|
bool voice_communication_agc_init; // AGC swich for voice communication
|
|
int voice_communication_agc_gain; // AGC gain(dB) for voice communication
|
|
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
|
|
char *wakenet_model_name; // The model name of wakenet 1
|
|
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
|
|
det_mode_t wakenet_mode;
|
|
afe_sr_mode_t afe_mode;
|
|
int afe_perferred_core;
|
|
int afe_perferred_priority;
|
|
int afe_ringbuf_size;
|
|
afe_memory_alloc_mode_t memory_alloc_mode;
|
|
float afe_linear_gain; // The linear gain for sr output(note: invaild for vc), the value should be in [0.1, 10.0].
|
|
// This value acts directly on the output amplitude: out_linear_gain * amplitude.
|
|
afe_mn_peak_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
|
|
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
|
bool debug_init;
|
|
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
|
|
afe_ns_mode_t afe_ns_mode;
|
|
char *afe_ns_model_name;
|
|
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
|
// otherwise, select channel number by wakenet
|
|
} afe_config_t;
|
|
|
|
|
|
#if CONFIG_IDF_TARGET_ESP32
|
|
#define AFE_CONFIG_DEFAULT() { \
|
|
.aec_init = true, \
|
|
.se_init = true, \
|
|
.vad_init = true, \
|
|
.wakenet_init = true, \
|
|
.voice_communication_init = false, \
|
|
.voice_communication_agc_init = false, \
|
|
.voice_communication_agc_gain = 15, \
|
|
.vad_mode = VAD_MODE_3, \
|
|
.wakenet_model_name = NULL, \
|
|
.wakenet_model_name_2 = NULL, \
|
|
.wakenet_mode = DET_MODE_90, \
|
|
.afe_mode = SR_MODE_HIGH_PERF, \
|
|
.afe_perferred_core = 0, \
|
|
.afe_perferred_priority = 5, \
|
|
.afe_ringbuf_size = 50, \
|
|
.memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
|
|
.afe_linear_gain = 1.0, \
|
|
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
|
.pcm_config.total_ch_num = 2, \
|
|
.pcm_config.mic_num = 1, \
|
|
.pcm_config.ref_num = 1, \
|
|
.pcm_config.sample_rate = 16000, \
|
|
.debug_init = false, \
|
|
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
|
.afe_ns_mode = NS_MODE_SSP, \
|
|
.afe_ns_model_name = NULL, \
|
|
.fixed_first_channel = true, \
|
|
}
|
|
#elif CONFIG_IDF_TARGET_ESP32P4
|
|
#define AFE_CONFIG_DEFAULT() { \
|
|
.aec_init = true, \
|
|
.se_init = true, \
|
|
.vad_init = true, \
|
|
.wakenet_init = true, \
|
|
.voice_communication_init = false, \
|
|
.voice_communication_agc_init = false, \
|
|
.voice_communication_agc_gain = 15, \
|
|
.vad_mode = VAD_MODE_3, \
|
|
.wakenet_model_name = NULL, \
|
|
.wakenet_model_name_2 = NULL, \
|
|
.wakenet_mode = DET_MODE_90, \
|
|
.afe_mode = SR_MODE_LOW_COST, \
|
|
.afe_perferred_core = 0, \
|
|
.afe_perferred_priority = 5, \
|
|
.afe_ringbuf_size = 50, \
|
|
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
|
|
.afe_linear_gain = 1.0, \
|
|
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
|
.pcm_config.total_ch_num = 2, \
|
|
.pcm_config.mic_num = 1, \
|
|
.pcm_config.ref_num = 1, \
|
|
.pcm_config.sample_rate = 16000, \
|
|
.debug_init = false, \
|
|
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
|
.afe_ns_mode = NS_MODE_SSP, \
|
|
.afe_ns_model_name = NULL, \
|
|
.fixed_first_channel = true, \
|
|
}
|
|
#elif CONFIG_IDF_TARGET_ESP32S3
|
|
#define AFE_CONFIG_DEFAULT() { \
|
|
.aec_init = true, \
|
|
.se_init = true, \
|
|
.vad_init = true, \
|
|
.wakenet_init = true, \
|
|
.voice_communication_init = false, \
|
|
.voice_communication_agc_init = false, \
|
|
.voice_communication_agc_gain = 15, \
|
|
.vad_mode = VAD_MODE_3, \
|
|
.wakenet_model_name = NULL, \
|
|
.wakenet_model_name_2 = NULL, \
|
|
.wakenet_mode = DET_MODE_2CH_90, \
|
|
.afe_mode = SR_MODE_LOW_COST, \
|
|
.afe_perferred_core = 0, \
|
|
.afe_perferred_priority = 5, \
|
|
.afe_ringbuf_size = 50, \
|
|
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
|
|
.afe_linear_gain = 1.0, \
|
|
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
|
.pcm_config.total_ch_num = 3, \
|
|
.pcm_config.mic_num = 2, \
|
|
.pcm_config.ref_num = 1, \
|
|
.pcm_config.sample_rate = 16000, \
|
|
.debug_init = false, \
|
|
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
|
.afe_ns_mode = NS_MODE_SSP, \
|
|
.afe_ns_model_name = NULL, \
|
|
.fixed_first_channel = true, \
|
|
}
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|