mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
feat: update esp32p4 AFE
This commit is contained in:
parent
07d64a5db9
commit
e609fda983
@ -21,80 +21,72 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#define USE_AEC_FFT // Not kiss_fft
|
||||
#define AEC_USE_SPIRAM 0
|
||||
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
||||
//#define AEC_FRAME_LENGTH_MS 16
|
||||
#define AEC_FRAME_LENGTH_MS 32
|
||||
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
|
||||
|
||||
typedef void* aec_handle_t;
|
||||
typedef struct aec_handle_t aec_handle_t;
|
||||
typedef enum {
|
||||
AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition
|
||||
AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition
|
||||
AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication
|
||||
AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication
|
||||
} aec_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
*
|
||||
* @deprecated This API will be deprecated after version 1.0, please use aec_pro_create
|
||||
* Please get frame size by aec_get_chunksize() function
|
||||
*
|
||||
* @param sample_rate The Sampling frequency (Hz) must be 16000.
|
||||
*
|
||||
* @param frame_length The length of the audio processing must be 16ms.
|
||||
*
|
||||
* @param filter_length Number of samples of echo to cancel.
|
||||
*
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t aec_create(int sample_rate, int frame_length, int filter_length);
|
||||
aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
*
|
||||
* @deprecated This API will be deprecated after version 1.0, please use aec_pro_create
|
||||
*
|
||||
* @param sample_rate The Sampling frequency (Hz) must be 16000.
|
||||
*
|
||||
* @param frame_length The length of the audio processing must be 16ms.
|
||||
*
|
||||
* @param filter_length Number of samples of echo to cancel.
|
||||
*
|
||||
* @param nch Number of input signal channel.
|
||||
* @brief Creates an instance to the AEC structure, same with aec_create().
|
||||
*
|
||||
* @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption.
|
||||
* @param channel_num The input microphone channel number
|
||||
* @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t aec_create_multimic(int sample_rate, int frame_length, int filter_length, int nch);
|
||||
|
||||
/**
|
||||
* @brief Creates an instance of more powerful AEC.
|
||||
*
|
||||
* @param frame_length Length of input signal. Must be 16ms if mode is 0; otherwise could be 16ms or 32ms. Length of input signal to aec_process must be modified accordingly.
|
||||
*
|
||||
* @param nch Number of microphones.
|
||||
*
|
||||
* @param mode Mode of AEC (0 to 5), indicating aggressiveness and RAM allocation. 0: mild; 1 or 2: medium (1: internal RAM, 2: SPIRAM); 3 and 4: aggressive (3: internal RAM, 4: SPIRAM); 5: agressive, accelerated for ESP32-S3.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: An Instance of AEC
|
||||
*/
|
||||
aec_handle_t aec_pro_create(int frame_length, int nch, int mode);
|
||||
aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode);
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
* @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc().
|
||||
*
|
||||
* @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..."
|
||||
* @param indata An array of 16-bit signed audio samples from mic.
|
||||
*
|
||||
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
|
||||
*
|
||||
* @param outdata Returns near-end signal with echo removed.
|
||||
*
|
||||
* @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..."
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_process(const aec_handle_t inst, int16_t *indata, int16_t *refdata, int16_t *outdata);
|
||||
void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Get frame size of AEC (the samples of one frame)
|
||||
* @param handle The instance of AEC.
|
||||
* @return Frame size
|
||||
*/
|
||||
int aec_get_chunksize(const aec_handle_t *handle);
|
||||
|
||||
/**
|
||||
* @brief Get AEC mode string
|
||||
*
|
||||
* @param aec_mode The mode of AEC.
|
||||
*
|
||||
* @return AEC mode string
|
||||
*/
|
||||
char * aec_get_mode_string(aec_mode_t aec_mode);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
@ -104,7 +96,7 @@ void aec_process(const aec_handle_t inst, int16_t *indata, int16_t *refdata, int
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_destroy(aec_handle_t inst);
|
||||
void aec_destroy(aec_handle_t *handel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@ -1,24 +1,41 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "stdbool.h"
|
||||
#include "stdlib.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "esp_vad.h"
|
||||
|
||||
#include "esp_aec.h"
|
||||
#include "esp_agc.h"
|
||||
#include "model_path.h"
|
||||
#include "esp_vadn_models.h"
|
||||
#include "esp_nsn_models.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//AFE: Audio Front-End
|
||||
//SR: Speech Recognition
|
||||
//afe_sr/AFE_SR: the audio front-end for speech recognition
|
||||
|
||||
//VC: Voice Communication
|
||||
|
||||
//Set AFE_SR mode
|
||||
typedef enum {
|
||||
SR_MODE_LOW_COST = 0,
|
||||
SR_MODE_HIGH_PERF = 1
|
||||
SR_MODE_LOW_COST = 0, //Deprecated, please use afe_mode_t, AFE mode: low cost mode
|
||||
SR_MODE_HIGH_PERF = 1, //Deprecated, please use afe_mode_t, AFE mode: high performance mode
|
||||
} afe_sr_mode_t;
|
||||
|
||||
//Set AFE mode
|
||||
typedef enum {
|
||||
AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode
|
||||
AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode
|
||||
} afe_mode_t;
|
||||
|
||||
//Set AFE type
|
||||
typedef enum {
|
||||
AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression
|
||||
AFE_TYPE_VC = 1, // Voice communication scenarios, including nonlinear noise suppression
|
||||
} afe_type_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
|
||||
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
|
||||
@ -26,24 +43,30 @@ typedef enum {
|
||||
} afe_memory_alloc_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of audio fed to multinet is -9dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of audio fed to multinet is -6dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB
|
||||
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
||||
} afe_mn_peak_agc_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int total_ch_num; // total channel num. It must be: total_ch_num = mic_num + ref_num
|
||||
int mic_num; // mic channel num
|
||||
int ref_num; // reference channel num
|
||||
int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel
|
||||
int mic_num; // microphone channel number
|
||||
uint8_t* mic_ids; // microphone channel indices
|
||||
int ref_num; // playback reference channel number
|
||||
uint8_t* ref_ids; // playback reference channel indices
|
||||
int sample_rate; // sample rate of audio
|
||||
} afe_pcm_config_t;
|
||||
|
||||
typedef enum {
|
||||
NS_MODE_SSP = 0, // speech signal process method
|
||||
NS_MODE_NET = 1, // deep noise suppression net method
|
||||
AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
|
||||
AFE_NS_MODE_NET = 1, // please use model name of NSNET
|
||||
} afe_ns_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
|
||||
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
|
||||
} afe_agc_mode_t;
|
||||
|
||||
/**
|
||||
* @brief Function to get the debug audio data
|
||||
@ -66,148 +89,192 @@ typedef struct {
|
||||
} afe_debug_hook_t;
|
||||
|
||||
typedef struct {
|
||||
bool aec_init;
|
||||
bool se_init;
|
||||
bool vad_init;
|
||||
bool wakenet_init;
|
||||
bool voice_communication_init;
|
||||
bool voice_communication_agc_init; // AGC swich for voice communication
|
||||
int voice_communication_agc_gain; // AGC gain(dB) for voice communication
|
||||
/********** AEC(Acoustic Echo Cancellation) **********/
|
||||
bool aec_init; // Whether to init aec
|
||||
aec_mode_t aec_mode; // The mode of aec, AEC_MODE_SR_LOW_COST or AEC_MODE_SR_HIGH_PERF
|
||||
int aec_filter_length; // The filter length of aec
|
||||
|
||||
/********** SE(Speech Enhancement, microphone array processing) **********/
|
||||
bool se_init; // Whether to init se
|
||||
|
||||
/********** NS(Noise Suppression) **********/
|
||||
bool ns_init; // Whether to init ns
|
||||
char *ns_model_name; // Model name of ns
|
||||
afe_ns_mode_t afe_ns_mode; // Model mode of ns
|
||||
|
||||
/********** VAD(Voice Activity Detection) **********/
|
||||
bool vad_init; // Whether to init vad
|
||||
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
|
||||
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
|
||||
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
|
||||
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default: 1000 ms
|
||||
bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
|
||||
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
|
||||
|
||||
/********** WakeNet(Wake Word Engine) **********/
|
||||
bool wakenet_init;
|
||||
char *wakenet_model_name; // The model name of wakenet 1
|
||||
char *wakenet_model_name_2; // The model name of wakenet 2 if has wakenet 2
|
||||
det_mode_t wakenet_mode;
|
||||
afe_sr_mode_t afe_mode;
|
||||
int afe_perferred_core;
|
||||
int afe_perferred_priority;
|
||||
int afe_ringbuf_size;
|
||||
afe_memory_alloc_mode_t memory_alloc_mode;
|
||||
float afe_linear_gain; // The linear gain for sr output(note: invaild for vc), the value should be in [0.1, 10.0].
|
||||
// This value acts directly on the output amplitude: out_linear_gain * amplitude.
|
||||
afe_mn_peak_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
|
||||
det_mode_t wakenet_mode; // The mode of wakenet
|
||||
|
||||
/********** AGC(Automatic Gain Control) **********/
|
||||
bool agc_init; // Whether to init agc
|
||||
afe_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
|
||||
int agc_compression_gain_db; // Compression gain in dB (default 9)
|
||||
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default -3)
|
||||
|
||||
/********** General AFE(Audio Front End) parameter **********/
|
||||
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
||||
afe_mode_t afe_mode; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
afe_type_t afe_type; // The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
int afe_perferred_core; // The preferred core of afe se task, which is created in afe_create function.
|
||||
int afe_perferred_priority; // The preferred priority of afe se task, which is created in afe_create function.
|
||||
int afe_ringbuf_size; // The ring buffer size: the number of frame data in ring buffer.
|
||||
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
|
||||
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts directly on the output amplitude: out_linear_gain * amplitude.
|
||||
bool debug_init;
|
||||
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
|
||||
afe_ns_mode_t afe_ns_mode;
|
||||
char *afe_ns_model_name;
|
||||
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||
// otherwise, select channel number by wakenet
|
||||
char *vad_model_name; // The model name of vad, support vadnet1 and vadnet1_small
|
||||
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms
|
||||
int vad_min_noise_ms; // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
|
||||
bool vad_mute_playback; // If true, the playback will be muted for vad detection
|
||||
} afe_config_t;
|
||||
|
||||
/**
|
||||
* @brief Get AFE default configuration. The default configuration will enable all algorithms as much as possible based on the chip target and input format.
|
||||
* You can manually fine-tune it after creating the configuration
|
||||
*
|
||||
* The input format:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*
|
||||
* @param input_format The input format
|
||||
* @param models Models from partition, which is configured by Kconfig
|
||||
* @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC
|
||||
* @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF
|
||||
*
|
||||
* @return afe_config_t* The default config of afe
|
||||
*/
|
||||
afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, afe_type_t type, afe_mode_t mode);
|
||||
|
||||
#if CONFIG_IDF_TARGET_ESP32
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
.aec_init = true, \
|
||||
.se_init = true, \
|
||||
.vad_init = true, \
|
||||
.wakenet_init = true, \
|
||||
.voice_communication_init = false, \
|
||||
.voice_communication_agc_init = false, \
|
||||
.voice_communication_agc_gain = 15, \
|
||||
.vad_mode = VAD_MODE_3, \
|
||||
.wakenet_model_name = NULL, \
|
||||
.wakenet_model_name_2 = NULL, \
|
||||
.wakenet_mode = DET_MODE_90, \
|
||||
.afe_mode = SR_MODE_HIGH_PERF, \
|
||||
.afe_perferred_core = 0, \
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
|
||||
.afe_linear_gain = 1.0, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config = { \
|
||||
.total_ch_num = 2, \
|
||||
.mic_num = 1, \
|
||||
.ref_num = 1, \
|
||||
.sample_rate = 16000, \
|
||||
}, \
|
||||
.debug_init = false, \
|
||||
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||
.afe_ns_mode = NS_MODE_SSP, \
|
||||
.afe_ns_model_name = NULL, \
|
||||
.fixed_first_channel = true, \
|
||||
.vad_model_name = NULL, \
|
||||
.vad_min_speech_ms = 64, \
|
||||
.vad_min_noise_ms = 256, \
|
||||
.vad_mute_playback = false, \
|
||||
}
|
||||
#elif CONFIG_IDF_TARGET_ESP32P4
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
.aec_init = true, \
|
||||
.se_init = true, \
|
||||
.vad_init = true, \
|
||||
.wakenet_init = true, \
|
||||
.voice_communication_init = false, \
|
||||
.voice_communication_agc_init = false, \
|
||||
.voice_communication_agc_gain = 15, \
|
||||
.vad_mode = VAD_MODE_3, \
|
||||
.wakenet_model_name = NULL, \
|
||||
.wakenet_model_name_2 = NULL, \
|
||||
.wakenet_mode = DET_MODE_90, \
|
||||
.afe_mode = SR_MODE_LOW_COST, \
|
||||
.afe_perferred_core = 0, \
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
|
||||
.afe_linear_gain = 1.0, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config = { \
|
||||
.total_ch_num = 2, \
|
||||
.mic_num = 1, \
|
||||
.ref_num = 1, \
|
||||
.sample_rate = 16000, \
|
||||
}, \
|
||||
.debug_init = false, \
|
||||
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||
.afe_ns_mode = NS_MODE_SSP, \
|
||||
.afe_ns_model_name = NULL, \
|
||||
.fixed_first_channel = true, \
|
||||
.vad_model_name = NULL, \
|
||||
.vad_min_speech_ms = 64, \
|
||||
.vad_min_noise_ms = 256, \
|
||||
.vad_mute_playback = false, \
|
||||
}
|
||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||
#define AFE_CONFIG_DEFAULT() { \
|
||||
.aec_init = true, \
|
||||
.se_init = true, \
|
||||
.vad_init = true, \
|
||||
.wakenet_init = true, \
|
||||
.voice_communication_init = false, \
|
||||
.voice_communication_agc_init = false, \
|
||||
.voice_communication_agc_gain = 15, \
|
||||
.vad_mode = VAD_MODE_3, \
|
||||
.wakenet_model_name = NULL, \
|
||||
.wakenet_model_name_2 = NULL, \
|
||||
.wakenet_mode = DET_MODE_2CH_90, \
|
||||
.afe_mode = SR_MODE_LOW_COST, \
|
||||
.afe_perferred_core = 0, \
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
|
||||
.afe_linear_gain = 1.0, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config = { \
|
||||
.total_ch_num = 3, \
|
||||
.mic_num = 2, \
|
||||
.ref_num = 1, \
|
||||
.sample_rate = 16000, \
|
||||
}, \
|
||||
.debug_init = false, \
|
||||
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||
.afe_ns_mode = NS_MODE_SSP, \
|
||||
.afe_ns_model_name = NULL, \
|
||||
.fixed_first_channel = true, \
|
||||
.vad_model_name = NULL, \
|
||||
.vad_min_speech_ms = 64, \
|
||||
.vad_min_noise_ms = 256, \
|
||||
.vad_mute_playback = false, \
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* @brief Check AFE configuration and make sure it is correct.
|
||||
*
|
||||
* @warning If there is a configuration conflict, this function will modify some parameters.
|
||||
* The guiding behind these modifications is to maintain the highest performance of the output audio and results.
|
||||
* And remove the conflict between different algorithms.
|
||||
*
|
||||
* For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
|
||||
* If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
|
||||
*
|
||||
* @param afe_config Input AFE config
|
||||
*
|
||||
* @return afe_config_t* The modified AFE config
|
||||
*/
|
||||
afe_config_t *afe_config_check(afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input format
|
||||
*
|
||||
* @param input_format The input format, same with afe_config_init() function
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
* @return true if the input format is parsed successfully, otherwise false
|
||||
*/
|
||||
bool afe_parse_input_format(const char* input_format, afe_pcm_config_t* pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse I2S input data
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param mic_data The output microphone data
|
||||
* @param ref_data The output playback reference data
|
||||
* @param pcm_config The pcm config
|
||||
*
|
||||
*/
|
||||
void afe_parse_input(int16_t *data, int frame_size, int16_t* mic_data, int16_t* ref_data, afe_pcm_config_t* pcm_config);
|
||||
|
||||
/**
|
||||
* @brief Parse input data, from interleaved arrangement to contiguous arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_parse_data(int16_t *data, int frame_size, int channel_num, int16_t* out_data);
|
||||
|
||||
/**
|
||||
* @brief Format input data, from contiguous arrangement to interleaved arrangement
|
||||
*
|
||||
* @param data The input multi channel data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param channel_num The channel number of data
|
||||
* @param out_data The output data
|
||||
*
|
||||
*/
|
||||
void afe_format_data(int16_t *data, int frame_size, int channel_num, int16_t* out_data);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param data The input audio data
|
||||
* @param frame_size The frame size of input, it is also the size of single channel data
|
||||
* @param factor The gain factor
|
||||
*
|
||||
* @return int16_t* The output audio data
|
||||
*/
|
||||
int16_t* afe_adjust_gain(int16_t *data, int frame_size, float factor);
|
||||
|
||||
/**
|
||||
* @brief Adjust the gain of input data
|
||||
*
|
||||
* @warning the input data will be modified inplace.
|
||||
*
|
||||
* @param in_data The input audio data
|
||||
* @param in_frame_size Input data frame size of input
|
||||
* @param channel_num The channel number of input data, which is same as output data
|
||||
* @param out_data The output audio data
|
||||
* @param out_frame_size Onput data frame size of input
|
||||
*
|
||||
*/
|
||||
void afe_concat_data(int16_t *in_data, int in_frame_size, int channel_num, int16_t * out_data, int out_frame_size);
|
||||
|
||||
/**
|
||||
* @brief Copy the afe config
|
||||
*
|
||||
* @param dst_config The destination afe config
|
||||
* @param src_config The source afe config
|
||||
*
|
||||
* @return The destination afe config
|
||||
*/
|
||||
afe_config_t* afe_config_copy(afe_config_t *dst_config, const afe_config_t *src_config);
|
||||
|
||||
/**
|
||||
* @brief Print the afe config
|
||||
*
|
||||
* @param afe_config The afe config
|
||||
*/
|
||||
void afe_config_print(const afe_config_t *afe_config);
|
||||
|
||||
/**
|
||||
* @brief Allocate afe config
|
||||
*
|
||||
* @return The afe config pointer
|
||||
*/
|
||||
afe_config_t *afe_config_alloc();
|
||||
|
||||
/**
|
||||
* @brief Free afe config
|
||||
*
|
||||
* @param afe_config The afe config pointer
|
||||
*/
|
||||
void afe_config_free(afe_config_t *afe_config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "stdlib.h"
|
||||
#include "stdbool.h"
|
||||
#include "esp_afe_config.h"
|
||||
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -13,13 +16,15 @@ extern "C" {
|
||||
//Opaque AFE_SR data container
|
||||
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief The state of vad
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
AFE_VAD_SILENCE = 0, // noise or silence
|
||||
AFE_VAD_SPEECH // speech
|
||||
AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
|
||||
AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
|
||||
} afe_vad_state_t;
|
||||
|
||||
/**
|
||||
@ -27,7 +32,7 @@ typedef enum
|
||||
*/
|
||||
typedef struct afe_fetch_result_t
|
||||
{
|
||||
int16_t *data; // the data of audio.
|
||||
int16_t *data; // the target channel data of audio.
|
||||
int data_size; // the size of data. The unit is byte.
|
||||
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
|
||||
int vad_cache_size; // the size of vad_cache. The unit is byte.
|
||||
@ -36,10 +41,12 @@ typedef struct afe_fetch_result_t
|
||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
|
||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
|
||||
afe_vad_state_t vad_state; // the value is afe_vad_state_t
|
||||
vad_state_t vad_state; // the value is afe_vad_state_t
|
||||
int trigger_channel_id; // the channel index of output
|
||||
int wake_word_length; // the length of wake word. The unit is the number of samples.
|
||||
int ret_value; // the return state of fetch function
|
||||
int16_t *raw_data; // the multi-channel output data of audio.
|
||||
int raw_data_channels; // the channel number of raw data
|
||||
void* reserved; // reserved for future use
|
||||
} afe_fetch_result_t;
|
||||
|
||||
@ -63,19 +70,11 @@ typedef esp_afe_sr_data_t* (*esp_afe_sr_iface_op_create_from_config_t)(afe_confi
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the total channel number which be config
|
||||
* @brief Get the channel number
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of total channels
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_total_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the mic channel number which be config
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of mic channels
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
@ -104,12 +103,24 @@ typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t*
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
* Timeout is 2000 ms. If you want to adjust timeout, please refer to the definition of `fetch_with_delay`.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t* (*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief fetch enhanced samples of an audio stream from the AFE_SR, same with the function `fetch`
|
||||
*
|
||||
* @Warning The output is single channel data, no matter how many channels the input is.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @param ticks_to_wait The timeout value, in ticks, to wait for the fetch result.
|
||||
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output audio can be queried by the `get_fetch_chunksize`.)
|
||||
*/
|
||||
typedef afe_fetch_result_t* (*esp_afe_sr_iface_op_fetch_with_delay_t)(esp_afe_sr_data_t *afe, TickType_t ticks_to_wait);
|
||||
|
||||
/**
|
||||
* @brief reset ringbuf of AFE.
|
||||
*
|
||||
@ -129,52 +140,37 @@ typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
|
||||
typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* model_name);
|
||||
|
||||
/**
|
||||
* @brief Disable wakenet model.
|
||||
* @brief Enable VAD algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
|
||||
typedef int (*esp_afe_sr_iface_op_enable_vad_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Enable wakenet model.
|
||||
* @brief Disable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
|
||||
typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Disable AEC algorithm.
|
||||
* @brief Enable one function/module/algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
|
||||
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Enable AEC algorithm.
|
||||
* @brief Print all functions/modules/algorithms pipeline.
|
||||
* The pipeline is the order of the functions/modules/algorithms.
|
||||
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Disable SE algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Enable SE algorithm.
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return -1: fail, 0: disabled, 1: enabled
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_enable_se_t)(esp_afe_sr_data_t *afe);
|
||||
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Destroy a AFE_SR instance
|
||||
@ -191,22 +187,41 @@ typedef struct {
|
||||
esp_afe_sr_iface_op_create_from_config_t create_from_config;
|
||||
esp_afe_sr_iface_op_feed_t feed;
|
||||
esp_afe_sr_iface_op_fetch_t fetch;
|
||||
esp_afe_sr_iface_op_fetch_with_delay_t fetch_with_delay;
|
||||
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
|
||||
esp_afe_sr_iface_op_get_total_channel_num_t get_total_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
|
||||
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_afe_sr_iface_op_set_wakenet_t set_wakenet;
|
||||
esp_afe_sr_iface_op_disable_wakenet_t disable_wakenet;
|
||||
esp_afe_sr_iface_op_enable_wakenet_t enable_wakenet;
|
||||
esp_afe_sr_iface_op_disable_aec_t disable_aec;
|
||||
esp_afe_sr_iface_op_enable_aec_t enable_aec;
|
||||
esp_afe_sr_iface_op_disable_se_t disable_se;
|
||||
esp_afe_sr_iface_op_enable_se_t enable_se;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_wakenet;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_aec;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_aec;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_se;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_se;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_vad;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_vad;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_ns;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_ns;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_agc;
|
||||
esp_afe_sr_iface_op_enable_func_t enable_agc;
|
||||
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
|
||||
esp_afe_sr_iface_op_destroy_t destroy;
|
||||
} esp_afe_sr_iface_t;
|
||||
|
||||
|
||||
// struct is used to store the AFE handle and data for the AFE task
|
||||
typedef struct
|
||||
{
|
||||
esp_afe_sr_data_t *afe_data;
|
||||
esp_afe_sr_iface_t *afe_handle;
|
||||
TaskHandle_t feed_task;
|
||||
TaskHandle_t fetch_task;
|
||||
}afe_task_into_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@ -6,17 +6,7 @@ extern "C" {
|
||||
|
||||
#include "esp_afe_sr_iface.h"
|
||||
|
||||
|
||||
#if CONFIG_AFE_INTERFACE_V1
|
||||
extern const esp_afe_sr_iface_t esp_afe_sr_v1;
|
||||
extern const esp_afe_sr_iface_t esp_afe_vc_v1;
|
||||
#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
|
||||
#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
|
||||
|
||||
#else
|
||||
#error No valid afe selected.
|
||||
#endif
|
||||
|
||||
esp_afe_sr_iface_t *esp_afe_handle_from_config(const afe_config_t *config);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@ -26,8 +26,15 @@ typedef enum {
|
||||
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
|
||||
} ESP_AGE_ERR;
|
||||
|
||||
typedef enum {
|
||||
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
|
||||
AGC_MODE_0 = 0, // Only saturation protection
|
||||
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
|
||||
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
|
||||
} agc_mode_t;
|
||||
|
||||
void *esp_agc_open(int agc_mode, int sample_rate);
|
||||
void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
|
||||
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
|
||||
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
|
||||
void esp_agc_close(void *agc_handle);
|
||||
|
||||
@ -1,4 +1,7 @@
|
||||
#pragma once
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
|
||||
#pragma GCC diagnostic ignored "-Wimplicit-function-declaration"
|
||||
#include "esp_mn_iface.h"
|
||||
|
||||
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
|
||||
@ -64,3 +67,4 @@ model->destroy(model_data)
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#pragma GCC diagnostic pop
|
||||
@ -78,6 +78,8 @@ vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
|
||||
typedef struct {
|
||||
vad_trigger_t *trigger;
|
||||
void *vad_inst;
|
||||
int sample_rate;
|
||||
int frame_size;
|
||||
}vad_handle_with_trigger_t;
|
||||
|
||||
typedef vad_handle_with_trigger_t* vad_handle_t;
|
||||
@ -100,31 +102,41 @@ vad_handle_t vad_create(vad_mode_t vad_mode);
|
||||
* @brief Creates an instance to the VAD structure.
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
* @param min_speech_len Minimum frame number of speech duration
|
||||
* @param min_noise_len Minimum frame number of noise duration
|
||||
* @param sample_rate Sample rate in Hz
|
||||
* @param one_frame_ms Length of the audio chunksize, can be 10ms, 20ms, 30ms, default: 30.
|
||||
* @param min_speech_ms Minimum speech duration, unit is ms
|
||||
* @param min_noise_ms Minimum noise duration, unit is ms
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
|
||||
vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int sample_rate, int one_frame_ms, int min_speech_len, int min_noise_len);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param inst The instance of VAD.
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
*
|
||||
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
|
||||
*
|
||||
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
|
||||
*
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process(vad_handle_t inst, int16_t *data, int sample_rate_hz, int one_frame_ms);
|
||||
vad_state_t vad_process(vad_handle_t handle, int16_t *data, int sample_rate_hz, int one_frame_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param handle The instance of VAD.
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process_with_trigger(vad_handle_t handle, int16_t *data);
|
||||
|
||||
/**
|
||||
* @brief Free the VAD instance
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
#include "esp_vad.h"
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -98,6 +99,25 @@ typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the triggered channel index. Channel index starts from zero
|
||||
*
|
||||
@ -133,6 +153,8 @@ typedef struct {
|
||||
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_vadn_iface_op_detect_t detect;
|
||||
esp_vadn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_vadn_iface_op_clean_t clean;
|
||||
esp_vadn_iface_op_destroy_t destroy;
|
||||
} esp_vadn_iface_t;
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_convq_queue.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -167,6 +168,25 @@ typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
|
||||
*/
|
||||
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
|
||||
* voice.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param cq An array of 16-bit MFCC.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||
* the index of the wake words.
|
||||
*/
|
||||
typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Get MFCC of an audio stream
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return MFCC data
|
||||
*/
|
||||
typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a wake word detection model.
|
||||
@ -184,6 +204,8 @@ typedef struct {
|
||||
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||
esp_wn_iface_op_get_vol_gain_t get_vol_gain;
|
||||
esp_wn_iface_op_detect_t detect;
|
||||
esp_wn_iface_op_detect_mfcc_t detect_mfcc;
|
||||
esp_wn_iface_op_get_mfcc_data_t get_mfcc_data;
|
||||
esp_wn_iface_op_clean_t clean;
|
||||
esp_wn_iface_op_destroy_t destroy;
|
||||
} esp_wn_iface_t;
|
||||
|
||||
@ -70,20 +70,12 @@ typedef esp_afe_sr_data_t* (*esp_afe_sr_iface_op_create_from_config_t)(afe_confi
|
||||
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the total channel number which be config
|
||||
* @brief Get the channel number
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of total channels
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_feed_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the mic channel number which be config
|
||||
*
|
||||
* @param afe The AFE_SR object to query
|
||||
* @return The amount of mic channels
|
||||
*/
|
||||
typedef int (*esp_afe_sr_iface_op_get_fetch_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the function
|
||||
@ -199,8 +191,9 @@ typedef struct {
|
||||
esp_afe_sr_iface_op_reset_buffer_t reset_buffer;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
|
||||
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
|
||||
esp_afe_sr_iface_op_get_feed_channel_num_t get_feed_channel_num;
|
||||
esp_afe_sr_iface_op_get_fetch_channel_num_t get_fetch_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_channel_num; // same with get_feed_channel_num
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_feed_channel_num;
|
||||
esp_afe_sr_iface_op_get_channel_num_t get_fetch_channel_num;
|
||||
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_afe_sr_iface_op_set_wakenet_t set_wakenet;
|
||||
esp_afe_sr_iface_op_disable_func_t disable_wakenet;
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user