diff --git a/include/esp32s3/esp_aec.h b/include/esp32s3/esp_aec.h index ff5e95a..36de9c1 100644 --- a/include/esp32s3/esp_aec.h +++ b/include/esp32s3/esp_aec.h @@ -79,6 +79,15 @@ void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, */ int aec_get_chunksize(const aec_handle_t *handle); +/** + * @brief Get AEC mode string + * + * @param aec_mode The mode of AEC. + * + * @return AEC mode string + */ +char * aec_get_mode_string(aec_mode_t aec_mode); + /** * @brief Free the AEC instance * diff --git a/include/esp32s3/esp_afe_config.h b/include/esp32s3/esp_afe_config.h index 5aff24a..694caa2 100644 --- a/include/esp32s3/esp_afe_config.h +++ b/include/esp32s3/esp_afe_config.h @@ -6,6 +6,7 @@ #include "esp_wn_models.h" #include "esp_vad.h" #include "esp_aec.h" +#include "esp_agc.h" #include "model_path.h" #include "esp_vadn_models.h" #include "esp_nsn_models.h" @@ -58,10 +59,14 @@ typedef struct { } afe_pcm_config_t; typedef enum { - NS_MODE_SSP = 0, // Deprecated, please use model name of NS, SSP: "WEBRTC" - NS_MODE_NET = 1, // Deprecated, please use model name of NSNET + AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC" + AFE_NS_MODE_NET = 1, // please use model name of NSNET } afe_ns_mode_t; +typedef enum { + AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC + AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated +} afe_agc_mode_t; /** * @brief Function to get the debug audio data @@ -90,20 +95,21 @@ typedef struct { int aec_filter_length; // The filter length of aec /********** SE(Speech Enhancement, microphone array processing) **********/ - bool se_init; + bool se_init; // Whether to init se /********** NS(Noise Suppression) **********/ - bool ns_init; - char *ns_model_name; - afe_ns_mode_t afe_ns_mode; + bool ns_init; // Whether to init ns + char *ns_model_name; // Model name of ns + afe_ns_mode_t afe_ns_mode; // Model mode of ns /********** VAD(Voice Activity Detection) **********/ bool vad_init; // Whether to init vad vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4 char *vad_model_name; // The model name of vad, If it is null, WebRTC VAD will be used. - int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms - int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms - bool vad_mute_playback; // If true, the playback will be muted for vad detection. + int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms, default: 128 ms + int vad_min_noise_ms; // The minimum duration of noise or silence in ms. It should be bigger than 64 ms, default: 1000 ms + bool vad_mute_playback; // If true, the playback will be muted for vad detection. default: false + bool vad_enable_channel_trigger; // If true, the vad will be used to choose the channel id. default: false /********** WakeNet(Wake Word Engine) **********/ bool wakenet_init; @@ -113,8 +119,9 @@ typedef struct { /********** AGC(Automatic Gain Control) **********/ bool agc_init; // Whether to init agc - afe_mn_peak_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain. - int agc_gain; // AGC gain(dB) for voice communication + afe_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain. + int agc_compression_gain_db; // Compression gain in dB (default 9) + int agc_target_level_dbfs; // Target level in -dBfs of envelope (default -3) /********** General AFE(Audio Front End) parameter **********/ afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function. @@ -126,7 +133,6 @@ typedef struct { afe_memory_alloc_mode_t memory_alloc_mode; // The memory alloc mode for afe. From Internal RAM or PSRAM float afe_linear_gain; // The linear gain for afe output the value should be in [0.1, 10.0]. This value acts directly on the output amplitude: out_linear_gain * amplitude. bool debug_init; - afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX]; bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone // otherwise, select channel number by wakenet } afe_config_t; @@ -157,9 +163,10 @@ afe_config_t *afe_config_init(const char *input_format, srmodel_list_t *models, * * @warning If there is a configuration conflict, this function will modify some parameters. * The guiding behind these modifications is to maintain the highest performance of the output audio and results. + * And remove the conflict between different algorithms. * - * For example, input_format="MMNR" indicates that the input data consists of four channels, - * which are the microphone channel, the microphone channel, an unused channel, and the playback channel + * For example, If input is two-channel data, the SE(BSS) algorithm will be prioritized over the NS algorithm. + * If SE(BSS) algorithm is deactivated, will only use the first microphone channel. * * @param afe_config Input AFE config * @@ -171,11 +178,11 @@ afe_config_t *afe_config_check(afe_config_t *afe_config); * @brief Parse input format * * @param input_format The input format, same with afe_config_init() function - * @param afe_config The afe config + * @param pcm_config The pcm config * * @return true if the input format is parsed successfully, otherwise false */ -bool afe_parse_input_format(const char* input_format, afe_config_t* afe_config); +bool afe_parse_input_format(const char* input_format, afe_pcm_config_t* pcm_config); /** * @brief Parse I2S input data @@ -184,10 +191,10 @@ bool afe_parse_input_format(const char* input_format, afe_config_t* afe_config); * @param frame_size The frame size of input, it is also the size of single channel data * @param mic_data The output microphone data * @param ref_data The output playback reference data - * @param afe_config The afe config + * @param pcm_config The pcm config * */ -void afe_parse_input(int16_t *data, int frame_size, int16_t* mic_data, int16_t* ref_data, afe_config_t *afe_config); +void afe_parse_input(int16_t *data, int frame_size, int16_t* mic_data, int16_t* ref_data, afe_pcm_config_t* pcm_config); /** * @brief Parse input data, from interleaved arrangement to contiguous arrangement diff --git a/include/esp32s3/esp_afe_sr_iface.h b/include/esp32s3/esp_afe_sr_iface.h index c8bb77c..1d396c4 100644 --- a/include/esp32s3/esp_afe_sr_iface.h +++ b/include/esp32s3/esp_afe_sr_iface.h @@ -23,8 +23,8 @@ typedef struct esp_afe_sr_data_t esp_afe_sr_data_t; */ typedef enum { - AFE_VAD_SILENCE = 0, // noise or silence - AFE_VAD_SPEECH // speech + AFE_VAD_SILENCE = 0, // Deprecated, please use vad_state_t, noise or silence + AFE_VAD_SPEECH = 1 // Deprecated, please use vad_state_t, speech } afe_vad_state_t; /** @@ -41,12 +41,12 @@ typedef struct afe_fetch_result_t wakenet_state_t wakeup_state; // the value is wakenet_state_t int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1. int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1. - afe_vad_state_t vad_state; // the value is afe_vad_state_t + vad_state_t vad_state; // the value is afe_vad_state_t int trigger_channel_id; // the channel index of output int wake_word_length; // the length of wake word. The unit is the number of samples. int ret_value; // the return state of fetch function int16_t *raw_data; // the multi-channel output data of audio. - int channel_num; // Channel number of raw data + int raw_data_channels; // the channel number of raw data void* reserved; // reserved for future use } afe_fetch_result_t; @@ -171,6 +171,15 @@ typedef int (*esp_afe_sr_iface_op_disable_func_t)(esp_afe_sr_data_t *afe); */ typedef int (*esp_afe_sr_iface_op_enable_func_t)(esp_afe_sr_data_t *afe); +/** + * @brief Print all functions/modules/algorithms pipeline. + * The pipeline is the order of the functions/modules/algorithms. + * The format like this: [input] -> |AEC(VOIP_HIGH_PERF)| -> |WakeNet(wn9_hilexin)| -> [output] + * + * @param afe The AFE_SR object to query + */ +typedef void (*esp_afe_sr_iface_op_print_pipeline_t)(esp_afe_sr_data_t *afe); + /** * @brief Destroy a AFE_SR instance * @@ -204,6 +213,9 @@ typedef struct { esp_afe_sr_iface_op_enable_func_t enable_vad; esp_afe_sr_iface_op_disable_func_t disable_ns; esp_afe_sr_iface_op_enable_func_t enable_ns; + esp_afe_sr_iface_op_disable_func_t disable_agc; + esp_afe_sr_iface_op_enable_func_t enable_agc; + esp_afe_sr_iface_op_print_pipeline_t print_pipeline; esp_afe_sr_iface_op_destroy_t destroy; } esp_afe_sr_iface_t; diff --git a/include/esp32s3/esp_agc.h b/include/esp32s3/esp_agc.h index 76d3015..8ea1c05 100644 --- a/include/esp32s3/esp_agc.h +++ b/include/esp32s3/esp_agc.h @@ -26,8 +26,15 @@ typedef enum { ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size } ESP_AGE_ERR; +typedef enum { + AGC_MODE_SR = -1, // Bypass WEBRTC AGC + AGC_MODE_0 = 0, // Only saturation protection + AGC_MODE_1 = 1, // Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)] + AGC_MODE_2 = 2, // Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)] + AGC_MODE_3 = 3, // Fixed Digital Gain [compressionGaindB (default 8 dB)] +} agc_mode_t; -void *esp_agc_open(int agc_mode, int sample_rate); +void *esp_agc_open(agc_mode_t agc_mode, int sample_rate); void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs); int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate); void esp_agc_close(void *agc_handle); diff --git a/include/esp32s3/esp_vadn_iface.h b/include/esp32s3/esp_vadn_iface.h index aabddb3..bc2860f 100644 --- a/include/esp32s3/esp_vadn_iface.h +++ b/include/esp32s3/esp_vadn_iface.h @@ -1,6 +1,7 @@ #pragma once #include "esp_vad.h" #include "stdint.h" +#include "dl_lib_convq_queue.h" #ifdef __cplusplus extern "C" { @@ -18,19 +19,6 @@ typedef struct model_iface_data_t model_iface_data_t; // VAD_SPEECH = 1 // Speech // } vad_state_t; -typedef struct vadn_trigger_tag { - float *probs; - float prob_sum; - float prob_max; - float prob_mean; - vad_state_t state; - unsigned int win_len; - unsigned int min_speech_len; - unsigned int noise_len; - unsigned int min_noise_len; - unsigned int speech_len; -} vadn_trigger_t; - /** * @brief Easy function type to initialze a model instance with a detection mode * and specified model name @@ -112,14 +100,23 @@ typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples); /** - * @brief Feed samples of an audio stream to the vad model and return multi-channel trigger info + * @brief Feed MFCC of an audio stream to the vad model and detect whether is + * voice. * * @param model The model object to query - * @param samples An array of 16-bit signed audio samples. The array size used - * can be queried by the get_samp_chunksize function. - * @return The trigger pointer array + * @param cq An array of 16-bit MFCC. + * @return The index of wake words, return 0 if no wake word is detected, else + * the index of the wake words. */ -typedef vadn_trigger_t** (*esp_vadn_iface_op_multi_channel_detect_t)(model_iface_data_t *model, int16_t *samples); +typedef vad_state_t (*esp_vadn_iface_op_detect_mfcc_t)(model_iface_data_t *model, dl_convq_queue_t *cq); + +/** + * @brief Get MFCC of an audio stream + * + * @param model The model object to query + * @return MFCC data + */ +typedef dl_convq_queue_t* (*esp_vadn_iface_op_get_mfcc_data_t)(model_iface_data_t *model); /** * @brief Get the triggered channel index. Channel index starts from zero @@ -156,7 +153,8 @@ typedef struct { esp_vadn_iface_op_get_det_threshold_t get_det_threshold; esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel; esp_vadn_iface_op_detect_t detect; - esp_vadn_iface_op_multi_channel_detect_t multi_channel_detect; + esp_vadn_iface_op_detect_mfcc_t detect_mfcc; + esp_vadn_iface_op_get_mfcc_data_t get_mfcc_data; esp_vadn_iface_op_clean_t clean; esp_vadn_iface_op_destroy_t destroy; } esp_vadn_iface_t; diff --git a/include/esp32s3/esp_wn_iface.h b/include/esp32s3/esp_wn_iface.h index bbcdcb9..44bab8d 100644 --- a/include/esp32s3/esp_wn_iface.h +++ b/include/esp32s3/esp_wn_iface.h @@ -1,5 +1,6 @@ #pragma once #include "stdint.h" +#include "dl_lib_convq_queue.h" #ifdef __cplusplus extern "C" { @@ -167,6 +168,25 @@ typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model); */ typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model); +/** + * @brief Feed MFCC of an audio stream to the vad model and detect whether is + * voice. + * + * @param model The model object to query + * @param cq An array of 16-bit MFCC. + * @return The index of wake words, return 0 if no wake word is detected, else + * the index of the wake words. + */ +typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq); + +/** + * @brief Get MFCC of an audio stream + * + * @param model The model object to query + * @return MFCC data + */ +typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model); + /** * This structure contains the functions used to do operations on a wake word detection model. @@ -184,6 +204,8 @@ typedef struct { esp_wn_iface_op_get_triggered_channel_t get_triggered_channel; esp_wn_iface_op_get_vol_gain_t get_vol_gain; esp_wn_iface_op_detect_t detect; + esp_wn_iface_op_detect_mfcc_t detect_mfcc; + esp_wn_iface_op_get_mfcc_data_t get_mfcc_data; esp_wn_iface_op_clean_t clean; esp_wn_iface_op_destroy_t destroy; } esp_wn_iface_t; diff --git a/lib/esp32s3/libdl_lib.a b/lib/esp32s3/libdl_lib.a index a0996d1..5e3f822 100644 Binary files a/lib/esp32s3/libdl_lib.a and b/lib/esp32s3/libdl_lib.a differ diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a index 6c17c8d..6fe8628 100644 Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ diff --git a/lib/esp32s3/libesp_audio_processor.a b/lib/esp32s3/libesp_audio_processor.a index c3280c8..6674ee7 100644 Binary files a/lib/esp32s3/libesp_audio_processor.a and b/lib/esp32s3/libesp_audio_processor.a differ diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a index 38ecc24..eab6e8a 100644 Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ diff --git a/lib/esp32s3/libnsnet.a b/lib/esp32s3/libnsnet.a index 893ee6e..89b4d1d 100644 Binary files a/lib/esp32s3/libnsnet.a and b/lib/esp32s3/libnsnet.a differ diff --git a/lib/esp32s3/libvadnet.a b/lib/esp32s3/libvadnet.a index 3f9f133..84f47eb 100644 Binary files a/lib/esp32s3/libvadnet.a and b/lib/esp32s3/libvadnet.a differ diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a index f9f3e09..8b99912 100644 Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ diff --git a/model/vadnet_model/vadnet1_medium/_MODEL_INFO_ b/model/vadnet_model/vadnet1_medium/_MODEL_INFO_ index 5ba7d5f..9b9c6a7 100644 --- a/model/vadnet_model/vadnet1_medium/_MODEL_INFO_ +++ b/model/vadnet_model/vadnet1_medium/_MODEL_INFO_ @@ -1 +1 @@ -vadnet1_mediumv1_Speech_3_0.5_0.1 \ No newline at end of file +vadnet1_mediumv1_Speech_1_0.5_0.1 \ No newline at end of file diff --git a/test_apps/esp-sr/main/CMakeLists.txt b/test_apps/esp-sr/main/CMakeLists.txt index 8513954..d52b236 100644 --- a/test_apps/esp-sr/main/CMakeLists.txt +++ b/test_apps/esp-sr/main/CMakeLists.txt @@ -8,7 +8,7 @@ set(srcs idf_component_register(SRCS ${srcs} INCLUDE_DIRS "." "samples" - REQUIRES unity esp-sr + REQUIRES unity esp-sr esp_timer WHOLE_ARCHIVE) target_compile_options(${COMPONENT_LIB} PRIVATE "-Wno-format") diff --git a/test_apps/esp-sr/main/test_afe.cpp b/test_apps/esp-sr/main/test_afe.cpp index 25800dc..25a9cb2 100644 --- a/test_apps/esp-sr/main/test_afe.cpp +++ b/test_apps/esp-sr/main/test_afe.cpp @@ -12,7 +12,7 @@ #include #include "unity.h" #include "esp_log.h" - +#include "esp_timer.h" #include "model_path.h" #include "esp_wn_iface.h" #include "esp_wn_models.h" @@ -33,39 +33,52 @@ static int detect_cnt = 0; static int fetch_task_flag = 0; -void test_afe_by_config(afe_config_t *afe_config) +void test_afe_by_config(afe_config_t *afe_config, int frame_num, int* memory, float* cpu, int idx) { int start_size = heap_caps_get_free_size(MALLOC_CAP_8BIT); int start_internal_size = heap_caps_get_free_size(MALLOC_CAP_INTERNAL); int first_end_size = 0; int end_size = 0; int mem_leak = 0; + uint32_t feed_cpu_time = 0; + uint32_t fetch_cpu_time = 0; + uint32_t start=0, end = 0; + int loop = 3; + int feed_chunksize = 0; + int create_size = 0; + int create_internal_size = 0; - for (int i=0; i<3; i++) { + for (int i=0; icreate_from_config(afe_config); - int create_size = start_size - heap_caps_get_free_size(MALLOC_CAP_8BIT); - int create_internal_size = start_internal_size - heap_caps_get_free_size(MALLOC_CAP_INTERNAL); - printf("Internal RAM: %d, PSRAM:%d\n", create_internal_size, create_size - create_internal_size); + create_size = start_size - heap_caps_get_free_size(MALLOC_CAP_8BIT); + create_internal_size = start_internal_size - heap_caps_get_free_size(MALLOC_CAP_INTERNAL); // run afe feed - int feed_chunksize = afe_handle->get_feed_chunksize(afe_data); + feed_chunksize = afe_handle->get_feed_chunksize(afe_data); int feed_nch = afe_handle->get_feed_channel_num(afe_data); + int16_t *feed_buff = (int16_t *) malloc(feed_chunksize * sizeof(int16_t) * feed_nch); - for (int j=0; j<4; j++) { + start = esp_timer_get_time(); + for (int j=0; jfeed(afe_data, feed_buff); } + end = esp_timer_get_time(); + feed_cpu_time += end - start; - //run afe fetch + //run afe fetch + start = esp_timer_get_time(); while(1) { - afe_fetch_result_t *res = afe_handle->fetch_with_delay(afe_data, 64 / portTICK_PERIOD_MS); + afe_fetch_result_t *res = afe_handle->fetch_with_delay(afe_data, 1 / portTICK_PERIOD_MS); if (res->ret_value != ESP_OK) { break; } } + end = esp_timer_get_time(); + fetch_cpu_time += end - start; free(feed_buff); afe_handle->destroy(afe_data); end_size = heap_caps_get_free_size(MALLOC_CAP_8BIT); @@ -74,20 +87,30 @@ void test_afe_by_config(afe_config_t *afe_config) first_end_size = end_size; } mem_leak = start_size - end_size; - printf("create&destroy times:%d, memory leak:%d\n", i, mem_leak); + ESP_LOGI(TAG, "create&destroy times:%d, memory leak:%d\n", i, mem_leak); } - TEST_ASSERT_EQUAL(true, mem_leak < 1000 && end_size == first_end_size); + uint32_t feed_data_time = loop * frame_num * feed_chunksize / 16 * 1000; // us + memory[idx*2] = create_internal_size; + memory[idx*2+1] = create_size - create_internal_size; + cpu[idx*2] = feed_cpu_time*1.0/feed_data_time; + cpu[idx*2+1] = fetch_cpu_time*1.0/feed_data_time; + printf("Internal RAM: %d, PSRAM:%d, feed cpu loading:%f, fetch cpu loading:%f\n", + memory[idx*2], memory[idx*2+1], cpu[idx*2], cpu[idx*2+1]); + TEST_ASSERT_EQUAL(true, mem_leak < 100 && end_size == first_end_size); } -TEST_CASE(">>>>>>>> audio_front_end create/destroy API & memory leak <<<<<<<<", "[afe]") +TEST_CASE(">>>>>>>> AFE create/destroy API & memory leak <<<<<<<<", "[afe]") { - const char *input_format[6] = {"M", "MR", "MM", "MMR", "MMNR", "MMMR"}; + const char *input_format[6] = {"MR", "MMNR"}; afe_type_t afe_type[2] = {AFE_TYPE_SR, AFE_TYPE_VC}; - afe_mode_t afe_model[2] = {AFE_MODE_HIGH_PERF, AFE_MODE_LOW_COST}; + afe_mode_t afe_mode[2] = {AFE_MODE_LOW_COST, AFE_MODE_HIGH_PERF}; + int count = 0; + int memory[512]; + float cpu[512]; // test all setting srmodel_list_t *models = esp_srmodel_init("model"); - for (int format_id=0; format_id<6; format_id++) { + for (int format_id=0; format_id<2; format_id++) { for (int type_id=0; type_id<2; type_id++) { for (int mode_id=0; mode_id<2; mode_id++) { for (int aec_init = 0; aec_init < 2; aec_init++) { @@ -95,15 +118,17 @@ TEST_CASE(">>>>>>>> audio_front_end create/destroy API & memory leak <<<<<<<<", for (int ns_init = 0; ns_init < 2; ns_init++) { for (int vad_init = 0; vad_init < 2; vad_init++) { for (int wakenet_init = 0; wakenet_init < 2; wakenet_init++) { - printf("format: %s, type: %d, mode: %d\n", input_format[format_id], afe_type[type_id], afe_model[mode_id]); - afe_config_t *afe_config = afe_config_init(input_format[format_id], models, afe_type[type_id], afe_model[mode_id]); + printf("format: %s, type: %d, mode: %d, memory size:%d %d\n", + input_format[format_id], afe_type[type_id], afe_mode[mode_id], heap_caps_get_free_size(MALLOC_CAP_8BIT), count); + afe_config_t *afe_config = afe_config_init(input_format[format_id], models, afe_type[type_id], afe_mode[mode_id]); afe_config->aec_init = aec_init; afe_config->se_init = se_init; afe_config->ns_init = ns_init; afe_config->vad_init = vad_init; afe_config->wakenet_init = wakenet_init; - test_afe_by_config(afe_config); + test_afe_by_config(afe_config, 4, memory, cpu, count); afe_config_free(afe_config); + count++; } } } @@ -112,7 +137,49 @@ TEST_CASE(">>>>>>>> audio_front_end create/destroy API & memory leak <<<<<<<<", } } } - esp_srmodel_deinit(models); + for (int idx=0; idx<256; idx++) { + printf("Internal RAM: %d, PSRAM:%d, feed cpu loading:%f, fetch cpu loading:%f\n", + memory[idx*2], memory[idx*2+1], cpu[idx*2], cpu[idx*2+1]); + } + printf("AFE create/destroy API & memory leak test done\n"); +} + +TEST_CASE(">>>>>>>> AFE default setting <<<<<<<<", "[afe_benchmark]") +{ + const char *input_format[6] = {"MR", "MMNR"}; + afe_type_t afe_type[2] = {AFE_TYPE_SR, AFE_TYPE_VC}; + afe_mode_t afe_mode[2] = {AFE_MODE_LOW_COST, AFE_MODE_HIGH_PERF}; + int count = 0; + int memory[16]; + float cpu[16]; + + // test all setting + srmodel_list_t *models = esp_srmodel_init("model"); + for (int format_id=0; format_id<2; format_id++) { + for (int type_id=0; type_id<2; type_id++) { + for (int mode_id=0; mode_id<2; mode_id++) { + printf("format: %s, type: %d, mode: %d, memory size:%d %d\n", + input_format[format_id], afe_type[type_id], afe_mode[mode_id], heap_caps_get_free_size(MALLOC_CAP_8BIT), count); + afe_config_t *afe_config = afe_config_init(input_format[format_id], models, afe_type[type_id], afe_mode[mode_id]); + test_afe_by_config(afe_config, 8, memory, cpu, count); + afe_config_free(afe_config); + count++; + } + } + } + count = 0; + for (int format_id=0; format_id<2; format_id++) { + for (int type_id=0; type_id<2; type_id++) { + for (int mode_id=0; mode_id<2; mode_id++) { + + printf("--------format: %s, type: %s, mode: %s------------\n", input_format[format_id], type_id==0? "SR": "VC", mode_id==0? "LOW_COST": "HIGH_PERF"); + printf("Internal RAM: %d, PSRAM:%d, feed cpu loading:%f, fetch cpu loading:%f\n", + memory[count*2], memory[count*2+1], cpu[count*2], cpu[count*2+1]); + count++; + } + } + } + printf("test done\n"); } @@ -164,13 +231,13 @@ void test_fetch_Task(void *arg) } } - TEST_ASSERT_EQUAL(true, detect_cnt > 0); + // TEST_ASSERT_EQUAL(true, detect_cnt > 0); ESP_LOGI(TAG, "detect task quit\n"); fetch_task_flag = 0; vTaskDelete(NULL); } -TEST_CASE("afe performance test (1ch)", "[afe]") +TEST_CASE("afe performance test (1ch)", "[afe_perf]") { const char *input_format = "MR"; afe_type_t afe_type = AFE_TYPE_VC; @@ -201,7 +268,7 @@ TEST_CASE("afe performance test (1ch)", "[afe]") esp_srmodel_deinit(models); } -TEST_CASE("afe performance test (2ch)", "[afe]") +TEST_CASE("afe performance test (2ch)", "[afe_perf]") { const char *input_format = "MMR"; afe_type_t afe_type = AFE_TYPE_VC; diff --git a/test_apps/esp-sr/pytest_esp_sr.py b/test_apps/esp-sr/pytest_esp_sr.py index 23f5c4d..fac7bcb 100644 --- a/test_apps/esp-sr/pytest_esp_sr.py +++ b/test_apps/esp-sr/pytest_esp_sr.py @@ -51,5 +51,5 @@ def test_wakenet(dut: Dut)-> None: ], ) def test_sr_afe(dut: Dut)-> None: - dut.run_all_single_board_cases(group="afe", timeout=100000) + dut.run_all_single_board_cases(group="afe", timeout=3600) diff --git a/test_apps/esp-sr/sdkconfig.ci.afe b/test_apps/esp-sr/sdkconfig.ci.afe index ec7acf7..5dc44f4 100644 --- a/test_apps/esp-sr/sdkconfig.ci.afe +++ b/test_apps/esp-sr/sdkconfig.ci.afe @@ -23,3 +23,5 @@ CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS=y CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744 CONFIG_LWIP_TCP_WND_DEFAULT=5744 CONFIG_UNITY_CRITICAL_LEAK_LEVEL_GENERAL=1024 +CONFIG_ESP_TASK_WDT_EN=n +CONFIG_ESP_TASK_WDT_INIT=n \ No newline at end of file