feat: update trigger of vadnet

This commit is contained in:
xysun 2025-01-17 17:23:40 +08:00
parent 8f6845123d
commit 07d64a5db9
18 changed files with 192 additions and 68 deletions

View File

@ -79,6 +79,15 @@ void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata,
*/ */
int aec_get_chunksize(const aec_handle_t *handle); int aec_get_chunksize(const aec_handle_t *handle);
/**
* @brief Get AEC mode string
*
* @param aec_mode The mode of AEC.
*
* @return AEC mode string
*/
char * aec_get_mode_string(aec_mode_t aec_mode);
/** /**
* @brief Free the AEC instance * @brief Free the AEC instance
* *

View File

@ -6,6 +6,7 @@
#include "esp_wn_models.h" #include "esp_wn_models.h"
#include "esp_vad.h" #include "esp_vad.h"
#include "esp_aec.h" #include "esp_aec.h"
#include "esp_agc.h"
#include "model_path.h" #include "model_path.h"
#include "esp_vadn_models.h" #include "esp_vadn_models.h"
#include "esp_nsn_models.h" #include "esp_nsn_models.h"
@ -58,10 +59,14 @@ typedef struct {
} afe_pcm_config_t; } afe_pcm_config_t;
typedef enum { typedef enum {
NS_MODE_SSP = 0, // Deprecated, please use model name of NS, SSP: "WEBRTC" AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC"
NS_MODE_NET = 1, // Deprecated, please use model name of NSNET AFE_NS_MODE_NET = 1, // please use model name of NSNET
} afe_ns_mode_t; } afe_ns_mode_t;
typedef enum {
AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC
AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated
} afe_agc_mode_t;
/** /**
* @brief Function to get the debug audio data * @brief Function to get the debug audio data
@ -90,20 +95,21 @@ typedef struct {
int aec_filter_length; // The filter length of aec int aec_filter_length; // The filter length of aec
/********** SE(Speech Enhancement, microphone array processing) **********/ /********** SE(Speech Enhancement, microphone array processing) **********/
bool se_init; bool se_init; // Whether to init se
/********** NS(Noise Suppression) **********/ /********** NS(Noise Suppression) **********/
bool ns_init; bool ns_init; // Whether to init ns
char *ns_model_name; char *ns_model_name; // Model name of ns
afe_ns_mode_t afe_ns_mode; afe_ns_mode_t afe_ns_mode; // Model mode of ns
/********** VAD(Voice Activity Detection) **********/ /********** VAD(Voice Activity Detection) **********/
bool vad_init; // Whether to init vad bool vad_init; // Whether to init vad
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4 vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used. char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used.
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms
int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default: 1000 ms
bool vad_mute_playback; // If true, the playback will be muted for vad detection. bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false
bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false
/********** WakeNet(Wake Word Engine) **********/ /********** WakeNet(Wake Word Engine) **********/
bool wakenet_init; bool wakenet_init;
@ -113,8 +119,9 @@ typedef struct {
/********** AGC(Automatic Gain Control) **********/ /********** AGC(Automatic Gain Control) **********/
bool agc_init; // Whether to init agc bool agc_init; // Whether to init agc
afe_mn_peak_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain. afe_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
int agc_gain; // AGC gain(dB) for voice communication int agc_compression_gain_db; // Compression gain in dB (default 9)
int agc_target_level_dbfs; // Target level in -dBfs of envelope (default -3)
/********** General AFE(Audio Front End) parameter **********/ /********** General AFE(Audio Front End) parameter **********/
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function. afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
@ -126,7 +133,6 @@ typedef struct {
afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM
float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts directly on the output amplitude: out_linear_gain * amplitude. float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts directly on the output amplitude: out_linear_gain * amplitude.
bool debug_init; bool debug_init;
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
// otherwise, select channel number by wakenet // otherwise, select channel number by wakenet
} afe_config_t; } afe_config_t;
@ -157,9 +163,10 @@ afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models,
* *
* @warning If there is a configuration conflict, this function will modify some parameters. * @warning If there is a configuration conflict, this function will modify some parameters.
* The guiding behind these modifications is to maintain the highest performance of the output audio and results. * The guiding behind these modifications is to maintain the highest performance of the output audio and results.
* And remove the conflict between different algorithms.
* *
* For example, input_format="MMNR" indicates that the input data consists of four channels, * For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm.
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel * If SE(BSS) algorithm is deactivated, will only use the first microphone channel.
* *
* @param afe_config Input AFE config * @param afe_config Input AFE config
* *
@ -171,11 +178,11 @@ afe_config_t *afe_config_check(afe_config_t *afe_config);
* @brief Parse input format * @brief Parse input format
* *
* @param input_format The input format, same with afe_config_init() function * @param input_format The input format, same with afe_config_init() function
* @param afe_config The afe config * @param pcm_config The pcm config
* *
* @return true if the input format is parsed successfully, otherwise false * @return true if the input format is parsed successfully, otherwise false
*/ */
bool afe_parse_input_format(const char* input_format, afe_config_t* afe_config); bool afe_parse_input_format(const char* input_format, afe_pcm_config_t* pcm_config);
/** /**
* @brief Parse I2S input data * @brief Parse I2S input data
@ -184,10 +191,10 @@ bool afe_parse_input_format(const char* input_format, afe_config_t* afe_config);
* @param frame_size The frame size of input, it is also the size of single channel data * @param frame_size The frame size of input, it is also the size of single channel data
* @param mic_data The output microphone data * @param mic_data The output microphone data
* @param ref_data The output playback reference data * @param ref_data The output playback reference data
* @param afe_config The afe config * @param pcm_config The pcm config
* *
*/ */
void afe_parse_input(int16_t *data, int frame_size, int16_t* mic_data, int16_t* ref_data, afe_config_t *afe_config); void afe_parse_input(int16_t *data, int frame_size, int16_t* mic_data, int16_t* ref_data, afe_pcm_config_t* pcm_config);
/** /**
* @brief Parse input data, from interleaved arrangement to contiguous arrangement * @brief Parse input data, from interleaved arrangement to contiguous arrangement

View File

@ -23,8 +23,8 @@ typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
*/ */
typedef enum typedef enum
{ {
AFE_VAD_SILENCE = 0, // noise or silence AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence
AFE_VAD_SPEECH // speech AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech
} afe_vad_state_t; } afe_vad_state_t;
/** /**
@ -41,12 +41,12 @@ typedef struct afe_fetch_result_t
wakenet_state_t wakeup_state; // the value is wakenet_state_t wakenet_state_t wakeup_state; // the value is wakenet_state_t
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1. int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1. int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
afe_vad_state_t vad_state; // the value is afe_vad_state_t vad_state_t vad_state; // the value is afe_vad_state_t
int trigger_channel_id; // the channel index of output int trigger_channel_id; // the channel index of output
int wake_word_length; // the length of wake word. The unit is the number of samples. int wake_word_length; // the length of wake word. The unit is the number of samples.
int ret_value; // the return state of fetch function int ret_value; // the return state of fetch function
int16_t *raw_data; // the multi-channel output data of audio. int16_t *raw_data; // the multi-channel output data of audio.
int channel_num; // Channel number of raw data int raw_data_channels; // the channel number of raw data
void* reserved; // reserved for future use void* reserved; // reserved for future use
} afe_fetch_result_t; } afe_fetch_result_t;
@ -171,6 +171,15 @@ typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe);
*/ */
typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe); typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe);
/**
* @brief Print all functions/modules/algorithms pipeline.
* The pipeline is the order of the functions/modules/algorithms.
* The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output]
*
* @param afe The AFE_SR object to query
*/
typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe);
/** /**
* @brief Destroy a AFE_SR instance * @brief Destroy a AFE_SR instance
* *
@ -204,6 +213,9 @@ typedef struct {
esp_afe_sr_iface_op_enable_func_t enable_vad; esp_afe_sr_iface_op_enable_func_t enable_vad;
esp_afe_sr_iface_op_disable_func_t disable_ns; esp_afe_sr_iface_op_disable_func_t disable_ns;
esp_afe_sr_iface_op_enable_func_t enable_ns; esp_afe_sr_iface_op_enable_func_t enable_ns;
esp_afe_sr_iface_op_disable_func_t disable_agc;
esp_afe_sr_iface_op_enable_func_t enable_agc;
esp_afe_sr_iface_op_print_pipeline_t print_pipeline;
esp_afe_sr_iface_op_destroy_t destroy; esp_afe_sr_iface_op_destroy_t destroy;
} esp_afe_sr_iface_t; } esp_afe_sr_iface_t;

View File

@ -26,8 +26,15 @@ typedef enum {
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
} ESP_AGE_ERR; } ESP_AGE_ERR;
typedef enum {
AGC_MODE_SR = -1, // Bypass WEBRTC AGC
AGC_MODE_0 = 0, // Only saturation protection
AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)]
AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)]
} agc_mode_t;
void *esp_agc_open(int agc_mode, int sample_rate); void *esp_agc_open(agc_mode_t agc_mode, int sample_rate);
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs); void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate); int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
void esp_agc_close(void *agc_handle); void esp_agc_close(void *agc_handle);

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include "esp_vad.h" #include "esp_vad.h"
#include "stdint.h" #include "stdint.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -18,19 +19,6 @@ typedef struct model_iface_data_t model_iface_data_t;
// VAD_SPEECH = 1 // Speech // VAD_SPEECH = 1 // Speech
// } vad_state_t; // } vad_state_t;
typedef struct vadn_trigger_tag {
float *probs;
float prob_sum;
float prob_max;
float prob_mean;
vad_state_t state;
unsigned int win_len;
unsigned int min_speech_len;
unsigned int noise_len;
unsigned int min_noise_len;
unsigned int speech_len;
} vadn_trigger_t;
/** /**
* @brief Easy function type to initialze a model instance with a detection mode * @brief Easy function type to initialze a model instance with a detection mode
* and specified model name * and specified model name
@ -112,14 +100,23 @@ typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples); typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/** /**
* @brief Feed samples of an audio stream to the vad model and return multi-channel trigger info * @brief Feed MFCC of an audio stream to the vad model and detect whether is
* voice.
* *
* @param model The model object to query * @param model The model object to query
* @param samples An array of 16-bit signed audio samples. The array size used * @param cq An array of 16-bit MFCC.
* can be queried by the get_samp_chunksize function. * @return The index of wake words, return 0 if no wake word is detected, else
* @return The trigger pointer array * the index of the wake words.
*/ */
typedef vadn_trigger_t** (*esp_vadn_iface_op_multi_channel_detect_t)(model_iface_data_t *model, int16_t *samples); typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq);
/**
* @brief Get MFCC of an audio stream
*
* @param model The model object to query
* @return MFCC data
*/
typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
/** /**
* @brief Get the triggered channel index. Channel index starts from zero * @brief Get the triggered channel index. Channel index starts from zero
@ -156,7 +153,8 @@ typedef struct {
esp_vadn_iface_op_get_det_threshold_t get_det_threshold; esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel; esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_vadn_iface_op_detect_t detect; esp_vadn_iface_op_detect_t detect;
esp_vadn_iface_op_multi_channel_detect_t multi_channel_detect; esp_vadn_iface_op_detect_mfcc_t detect_mfcc;
esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data;
esp_vadn_iface_op_clean_t clean; esp_vadn_iface_op_clean_t clean;
esp_vadn_iface_op_destroy_t destroy; esp_vadn_iface_op_destroy_t destroy;
} esp_vadn_iface_t; } esp_vadn_iface_t;

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "stdint.h" #include "stdint.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -167,6 +168,25 @@ typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
*/ */
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model); typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param cq An array of 16-bit MFCC.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq);
/**
* @brief Get MFCC of an audio stream
*
* @param model The model object to query
* @return MFCC data
*/
typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
/** /**
* This structure contains the functions used to do operations on a wake word detection model. * This structure contains the functions used to do operations on a wake word detection model.
@ -184,6 +204,8 @@ typedef struct {
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel; esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_wn_iface_op_get_vol_gain_t get_vol_gain; esp_wn_iface_op_get_vol_gain_t get_vol_gain;
esp_wn_iface_op_detect_t detect; esp_wn_iface_op_detect_t detect;
esp_wn_iface_op_detect_mfcc_t detect_mfcc;
esp_wn_iface_op_get_mfcc_data_t get_mfcc_data;
esp_wn_iface_op_clean_t clean; esp_wn_iface_op_clean_t clean;
esp_wn_iface_op_destroy_t destroy; esp_wn_iface_op_destroy_t destroy;
} esp_wn_iface_t; } esp_wn_iface_t;

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1 +1 @@
vadnet1_mediumv1_Speech_3_0.5_0.1 vadnet1_mediumv1_Speech_1_0.5_0.1

View File

@ -8,7 +8,7 @@ set(srcs
idf_component_register(SRCS ${srcs} idf_component_register(SRCS ${srcs}
INCLUDE_DIRS "." "samples" INCLUDE_DIRS "." "samples"
REQUIRES unity esp-sr REQUIRES unity esp-sr esp_timer
WHOLE_ARCHIVE) WHOLE_ARCHIVE)
target_compile_options(${COMPONENT_LIB} PRIVATE "-Wno-format") target_compile_options(${COMPONENT_LIB} PRIVATE "-Wno-format")

View File

@ -12,7 +12,7 @@
#include <limits.h> #include <limits.h>
#include "unity.h" #include "unity.h"
#include "esp_log.h" #include "esp_log.h"
#include "esp_timer.h"
#include "model_path.h" #include "model_path.h"
#include "esp_wn_iface.h" #include "esp_wn_iface.h"
#include "esp_wn_models.h" #include "esp_wn_models.h"
@ -33,39 +33,52 @@ static int detect_cnt = 0;
static int fetch_task_flag = 0; static int fetch_task_flag = 0;
void test_afe_by_config(afe_config_t *afe_config) void test_afe_by_config(afe_config_t *afe_config, int frame_num, int* memory, float* cpu, int idx)
{ {
int start_size = heap_caps_get_free_size(MALLOC_CAP_8BIT); int start_size = heap_caps_get_free_size(MALLOC_CAP_8BIT);
int start_internal_size = heap_caps_get_free_size(MALLOC_CAP_INTERNAL); int start_internal_size = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
int first_end_size = 0; int first_end_size = 0;
int end_size = 0; int end_size = 0;
int mem_leak = 0; int mem_leak = 0;
uint32_t feed_cpu_time = 0;
uint32_t fetch_cpu_time = 0;
uint32_t start=0, end = 0;
int loop = 3;
int feed_chunksize = 0;
int create_size = 0;
int create_internal_size = 0;
for (int i=0; i<3; i++) { for (int i=0; i<loop; i++) {
// init config and handle // init config and handle
esp_afe_sr_iface_t *afe_handle = esp_afe_handle_from_config(afe_config); esp_afe_sr_iface_t *afe_handle = esp_afe_handle_from_config(afe_config);
// afe_config_print(afe_config); // afe_config_print(afe_config);
esp_afe_sr_data_t *afe_data = afe_handle->create_from_config(afe_config); esp_afe_sr_data_t *afe_data = afe_handle->create_from_config(afe_config);
int create_size = start_size - heap_caps_get_free_size(MALLOC_CAP_8BIT); create_size = start_size - heap_caps_get_free_size(MALLOC_CAP_8BIT);
int create_internal_size = start_internal_size - heap_caps_get_free_size(MALLOC_CAP_INTERNAL); create_internal_size = start_internal_size - heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
printf("Internal RAM: %d, PSRAM:%d\n", create_internal_size, create_size - create_internal_size);
// run afe feed // run afe feed
int feed_chunksize = afe_handle->get_feed_chunksize(afe_data); feed_chunksize = afe_handle->get_feed_chunksize(afe_data);
int feed_nch = afe_handle->get_feed_channel_num(afe_data); int feed_nch = afe_handle->get_feed_channel_num(afe_data);
int16_t *feed_buff = (int16_t *) malloc(feed_chunksize * sizeof(int16_t) * feed_nch); int16_t *feed_buff = (int16_t *) malloc(feed_chunksize * sizeof(int16_t) * feed_nch);
for (int j=0; j<4; j++) { start = esp_timer_get_time();
for (int j=0; j<frame_num; j++) {
afe_handle->feed(afe_data, feed_buff); afe_handle->feed(afe_data, feed_buff);
} }
end = esp_timer_get_time();
feed_cpu_time += end - start;
//run afe fetch //run afe fetch
start = esp_timer_get_time();
while(1) { while(1) {
afe_fetch_result_t *res = afe_handle->fetch_with_delay(afe_data, 64 / portTICK_PERIOD_MS); afe_fetch_result_t *res = afe_handle->fetch_with_delay(afe_data, 1 / portTICK_PERIOD_MS);
if (res->ret_value != ESP_OK) { if (res->ret_value != ESP_OK) {
break; break;
} }
} }
end = esp_timer_get_time();
fetch_cpu_time += end - start;
free(feed_buff); free(feed_buff);
afe_handle->destroy(afe_data); afe_handle->destroy(afe_data);
end_size = heap_caps_get_free_size(MALLOC_CAP_8BIT); end_size = heap_caps_get_free_size(MALLOC_CAP_8BIT);
@ -74,20 +87,30 @@ void test_afe_by_config(afe_config_t *afe_config)
first_end_size = end_size; first_end_size = end_size;
} }
mem_leak = start_size - end_size; mem_leak = start_size - end_size;
printf("create&destroy times:%d, memory leak:%d\n", i, mem_leak); ESP_LOGI(TAG, "create&destroy times:%d, memory leak:%d\n", i, mem_leak);
} }
TEST_ASSERT_EQUAL(true, mem_leak < 1000 && end_size == first_end_size); uint32_t feed_data_time = loop * frame_num * feed_chunksize / 16 * 1000; // us
memory[idx*2] = create_internal_size;
memory[idx*2+1] = create_size - create_internal_size;
cpu[idx*2] = feed_cpu_time*1.0/feed_data_time;
cpu[idx*2+1] = fetch_cpu_time*1.0/feed_data_time;
printf("Internal RAM: %d, PSRAM:%d, feed cpu loading:%f, fetch cpu loading:%f\n",
memory[idx*2], memory[idx*2+1], cpu[idx*2], cpu[idx*2+1]);
TEST_ASSERT_EQUAL(true, mem_leak < 100 && end_size == first_end_size);
} }
TEST_CASE(">>>>>>>> audio_front_end create/destroy API & memory leak <<<<<<<<", "[afe]") TEST_CASE(">>>>>>>> AFE create/destroy API & memory leak <<<<<<<<", "[afe]")
{ {
const char *input_format[6] = {"M", "MR", "MM", "MMR", "MMNR", "MMMR"}; const char *input_format[6] = {"MR", "MMNR"};
afe_type_t afe_type[2] = {AFE_TYPE_SR, AFE_TYPE_VC}; afe_type_t afe_type[2] = {AFE_TYPE_SR, AFE_TYPE_VC};
afe_mode_t afe_model[2] = {AFE_MODE_HIGH_PERF, AFE_MODE_LOW_COST}; afe_mode_t afe_mode[2] = {AFE_MODE_LOW_COST, AFE_MODE_HIGH_PERF};
int count = 0;
int memory[512];
float cpu[512];
// test all setting // test all setting
srmodel_list_t *models = esp_srmodel_init("model"); srmodel_list_t *models = esp_srmodel_init("model");
for (int format_id=0; format_id<6; format_id++) { for (int format_id=0; format_id<2; format_id++) {
for (int type_id=0; type_id<2; type_id++) { for (int type_id=0; type_id<2; type_id++) {
for (int mode_id=0; mode_id<2; mode_id++) { for (int mode_id=0; mode_id<2; mode_id++) {
for (int aec_init = 0; aec_init < 2; aec_init++) { for (int aec_init = 0; aec_init < 2; aec_init++) {
@ -95,15 +118,17 @@ TEST_CASE(">>>>>>>> audio_front_end create/destroy API & memory leak <<<<<<<<",
for (int ns_init = 0; ns_init < 2; ns_init++) { for (int ns_init = 0; ns_init < 2; ns_init++) {
for (int vad_init = 0; vad_init < 2; vad_init++) { for (int vad_init = 0; vad_init < 2; vad_init++) {
for (int wakenet_init = 0; wakenet_init < 2; wakenet_init++) { for (int wakenet_init = 0; wakenet_init < 2; wakenet_init++) {
printf("format: %s, type: %d, mode: %d\n", input_format[format_id], afe_type[type_id], afe_model[mode_id]); printf("format: %s, type: %d, mode: %d, memory size:%d %d\n",
afe_config_t *afe_config = afe_config_init(input_format[format_id], models, afe_type[type_id], afe_model[mode_id]); input_format[format_id], afe_type[type_id], afe_mode[mode_id], heap_caps_get_free_size(MALLOC_CAP_8BIT), count);
afe_config_t *afe_config = afe_config_init(input_format[format_id], models, afe_type[type_id], afe_mode[mode_id]);
afe_config->aec_init = aec_init; afe_config->aec_init = aec_init;
afe_config->se_init = se_init; afe_config->se_init = se_init;
afe_config->ns_init = ns_init; afe_config->ns_init = ns_init;
afe_config->vad_init = vad_init; afe_config->vad_init = vad_init;
afe_config->wakenet_init = wakenet_init; afe_config->wakenet_init = wakenet_init;
test_afe_by_config(afe_config); test_afe_by_config(afe_config, 4, memory, cpu, count);
afe_config_free(afe_config); afe_config_free(afe_config);
count++;
} }
} }
} }
@ -112,7 +137,49 @@ TEST_CASE(">>>>>>>> audio_front_end create/destroy API & memory leak <<<<<<<<",
} }
} }
} }
esp_srmodel_deinit(models); for (int idx=0; idx<256; idx++) {
printf("Internal RAM: %d, PSRAM:%d, feed cpu loading:%f, fetch cpu loading:%f\n",
memory[idx*2], memory[idx*2+1], cpu[idx*2], cpu[idx*2+1]);
}
printf("AFE create/destroy API & memory leak test done\n");
}
TEST_CASE(">>>>>>>> AFE default setting <<<<<<<<", "[afe_benchmark]")
{
const char *input_format[6] = {"MR", "MMNR"};
afe_type_t afe_type[2] = {AFE_TYPE_SR, AFE_TYPE_VC};
afe_mode_t afe_mode[2] = {AFE_MODE_LOW_COST, AFE_MODE_HIGH_PERF};
int count = 0;
int memory[16];
float cpu[16];
// test all setting
srmodel_list_t *models = esp_srmodel_init("model");
for (int format_id=0; format_id<2; format_id++) {
for (int type_id=0; type_id<2; type_id++) {
for (int mode_id=0; mode_id<2; mode_id++) {
printf("format: %s, type: %d, mode: %d, memory size:%d %d\n",
input_format[format_id], afe_type[type_id], afe_mode[mode_id], heap_caps_get_free_size(MALLOC_CAP_8BIT), count);
afe_config_t *afe_config = afe_config_init(input_format[format_id], models, afe_type[type_id], afe_mode[mode_id]);
test_afe_by_config(afe_config, 8, memory, cpu, count);
afe_config_free(afe_config);
count++;
}
}
}
count = 0;
for (int format_id=0; format_id<2; format_id++) {
for (int type_id=0; type_id<2; type_id++) {
for (int mode_id=0; mode_id<2; mode_id++) {
printf("--------format: %s, type: %s, mode: %s------------\n", input_format[format_id], type_id==0? "SR": "VC", mode_id==0? "LOW_COST": "HIGH_PERF");
printf("Internal RAM: %d, PSRAM:%d, feed cpu loading:%f, fetch cpu loading:%f\n",
memory[count*2], memory[count*2+1], cpu[count*2], cpu[count*2+1]);
count++;
}
}
}
printf("test done\n");
} }
@ -164,13 +231,13 @@ void test_fetch_Task(void *arg)
} }
} }
TEST_ASSERT_EQUAL(true, detect_cnt > 0); // TEST_ASSERT_EQUAL(true, detect_cnt > 0);
ESP_LOGI(TAG, "detect task quit\n"); ESP_LOGI(TAG, "detect task quit\n");
fetch_task_flag = 0; fetch_task_flag = 0;
vTaskDelete(NULL); vTaskDelete(NULL);
} }
TEST_CASE("afe performance test (1ch)", "[afe]") TEST_CASE("afe performance test (1ch)", "[afe_perf]")
{ {
const char *input_format = "MR"; const char *input_format = "MR";
afe_type_t afe_type = AFE_TYPE_VC; afe_type_t afe_type = AFE_TYPE_VC;
@ -201,7 +268,7 @@ TEST_CASE("afe performance test (1ch)", "[afe]")
esp_srmodel_deinit(models); esp_srmodel_deinit(models);
} }
TEST_CASE("afe performance test (2ch)", "[afe]") TEST_CASE("afe performance test (2ch)", "[afe_perf]")
{ {
const char *input_format = "MMR"; const char *input_format = "MMR";
afe_type_t afe_type = AFE_TYPE_VC; afe_type_t afe_type = AFE_TYPE_VC;

View File

@ -51,5 +51,5 @@ def test_wakenet(dut: Dut)-> None:
], ],
) )
def test_sr_afe(dut: Dut)-> None: def test_sr_afe(dut: Dut)-> None:
dut.run_all_single_board_cases(group="afe", timeout=100000) dut.run_all_single_board_cases(group="afe", timeout=3600)

View File

@ -23,3 +23,5 @@ CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS=y
CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744 CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744
CONFIG_LWIP_TCP_WND_DEFAULT=5744 CONFIG_LWIP_TCP_WND_DEFAULT=5744
CONFIG_UNITY_CRITICAL_LEAK_LEVEL_GENERAL=1024 CONFIG_UNITY_CRITICAL_LEAK_LEVEL_GENERAL=1024
CONFIG_ESP_TASK_WDT_EN=n
CONFIG_ESP_TASK_WDT_INIT=n