diff --git a/Kconfig.projbuild b/Kconfig.projbuild index 3147f61..c382c67 100644 --- a/Kconfig.projbuild +++ b/Kconfig.projbuild @@ -46,7 +46,7 @@ choice SR_VADN_MODEL_LOAD config SR_VADN_WEBRTC bool "voice activity detection (WebRTC)" - + config SR_VADN_VADNET1_MEDIUM bool "voice activity detection (vadnet1 medium)" depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 @@ -78,7 +78,7 @@ menu "Load Multiple Wake Words" config SR_WN_WN9_JARVIS_TTS bool "Jarvis (wn9_jarvis_tts)" default False - + config SR_WN_WN9_COMPUTER_TTS bool "computer (wn9_computer_tts)" default False diff --git a/idf_component.yml b/idf_component.yml index f55e925..3123b64 100644 --- a/idf_component.yml +++ b/idf_component.yml @@ -3,7 +3,7 @@ description: esp_sr provides basic algorithms for Speech Recognition application url: https://github.com/espressif/esp-sr dependencies: idf: ">=5.0" - espressif/esp-dsp: "<=1.5.0" + espressif/esp-dsp: ">1.5.1" files: exclude: - ".github" diff --git a/include/esp32/esp_doa.h b/include/esp32/esp_doa.h new file mode 100644 index 0000000..a47cb10 --- /dev/null +++ b/include/esp32/esp_doa.h @@ -0,0 +1,41 @@ +#ifndef _ESP_DOA_H_ +#define _ESP_DOA_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct doa_handle_t doa_handle_t; +/** + * @brief Initialize SRP-PHAT processor + * @param fs Sampling rate (Hz), e.g., 16000 + * @param resolution Angular search resolution (degrees), e.g., 20 + * @param d_mics Microphone spacing (meters), e.g., 0.06 + * @param input_timedate_samples input timedate samples, e.g., 1024 + * @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance + */ +doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples); + +/** + * @brief Release all allocated resources + * @param doa doa_handle_t instance pointer to be freed + */ +void esp_doa_destroy(doa_handle_t *doa); + +/** + * @brief Process audio frame for direction estimation + * @param doa doa_handle_t instance pointer + * @param left Left channel 16-bit PCM data + * @param right Right channel 16-bit PCM data + * @return Estimated sound direction in degrees, e.g., 0-180 + */ +float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right); + + +#ifdef __cplusplus +} +#endif + +#endif /* _ESP_DOA_H_ */ diff --git a/include/esp32/esp_wn_models.h b/include/esp32/esp_wn_models.h index 3a4d7e4..3ac997a 100644 --- a/include/esp32/esp_wn_models.h +++ b/include/esp32/esp_wn_models.h @@ -11,7 +11,7 @@ extern "C" { /** * @brief Get the wakenet handle from model name * - * @param model_name The name of model + * @param model_name The name of model * @returns The handle of wakenet */ const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); @@ -19,10 +19,10 @@ const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); /** * @brief Get the wake word name from model name * - * @param model_name The name of model + * @param model_name The name of model * @returns The wake word name, like "alexa","hilexin","xiaoaitongxue" */ -char* esp_wn_wakeword_from_name(const char *model_name); +char *esp_wn_wakeword_from_name(const char *model_name); #ifdef __cplusplus } diff --git a/include/esp32p4/esp_doa.h b/include/esp32p4/esp_doa.h new file mode 100644 index 0000000..a47cb10 --- /dev/null +++ b/include/esp32p4/esp_doa.h @@ -0,0 +1,41 @@ +#ifndef _ESP_DOA_H_ +#define _ESP_DOA_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct doa_handle_t doa_handle_t; +/** + * @brief Initialize SRP-PHAT processor + * @param fs Sampling rate (Hz), e.g., 16000 + * @param resolution Angular search resolution (degrees), e.g., 20 + * @param d_mics Microphone spacing (meters), e.g., 0.06 + * @param input_timedate_samples input timedate samples, e.g., 1024 + * @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance + */ +doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples); + +/** + * @brief Release all allocated resources + * @param doa doa_handle_t instance pointer to be freed + */ +void esp_doa_destroy(doa_handle_t *doa); + +/** + * @brief Process audio frame for direction estimation + * @param doa doa_handle_t instance pointer + * @param left Left channel 16-bit PCM data + * @param right Right channel 16-bit PCM data + * @return Estimated sound direction in degrees, e.g., 0-180 + */ +float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right); + + +#ifdef __cplusplus +} +#endif + +#endif /* _ESP_DOA_H_ */ diff --git a/include/esp32p4/esp_wn_models.h b/include/esp32p4/esp_wn_models.h index 38972e7..3ac997a 100644 --- a/include/esp32p4/esp_wn_models.h +++ b/include/esp32p4/esp_wn_models.h @@ -11,7 +11,7 @@ extern "C" { /** * @brief Get the wakenet handle from model name * - * @param model_name The name of model + * @param model_name The name of model * @returns The handle of wakenet */ const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); @@ -19,87 +19,10 @@ const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); /** * @brief Get the wake word name from model name * - * @param model_name The name of model + * @param model_name The name of model * @returns The wake word name, like "alexa","hilexin","xiaoaitongxue" */ -char* esp_wn_wakeword_from_name(const char *model_name); - -// /** -// * @brief Get the model coeff from model name -// * -// * @Warning: retuen model_coeff_getter_t, when chip is ESP32, -// * return string for other chips -// * -// * @param model_name The name of model -// * @returns The handle of wakenet -// */ -// void *esp_wn_coeff_from_name(char *model_name); - - -#if defined CONFIG_USE_WAKENET -/* - Configure wake word to use based on what's selected in menuconfig. -*/ -#if CONFIG_SR_WN_WN5_HILEXIN -#include "hilexin_wn5.h" -#define WAKENET_MODEL_NAME "wn5_hilexin" -#define WAKENET_COEFF get_coeff_hilexin_wn5 - -#elif CONFIG_SR_WN_WN5X2_HILEXIN -#include "hilexin_wn5X2.h" -#define WAKENET_MODEL_NAME "wn5_hilexinX2" -#define WAKENET_COEFF get_coeff_hilexin_wn5X2 - - -#elif CONFIG_SR_WN_WN5X3_HILEXIN -#include "hilexin_wn5X3.h" -#define WAKENET_MODEL_NAME "wn5_hilexinX3" -#define WAKENET_COEFF get_coeff_hilexin_wn5X3 - - -#elif CONFIG_SR_WN_WN5_NIHAOXIAOZHI -#include "nihaoxiaozhi_wn5.h" -#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhi" -#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5 - - -#elif CONFIG_SR_WN_WN5X2_NIHAOXIAOZHI -#include "nihaoxiaozhi_wn5X2.h" -#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhiX2" -#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5X2 - - -#elif CONFIG_SR_WN_WN5X3_NIHAOXIAOZHI -#include "nihaoxiaozhi_wn5X3.h" -#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhiX3" -#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5X3 - - -#elif CONFIG_SR_WN_WN5X3_NIHAOXIAOXIN -#include "nihaoxiaoxin_wn5X3.h" -#define WAKENET_MODEL_NAME "wn5_nihaoxiaoxinX3" -#define WAKENET_COEFF get_coeff_nihaoxiaoxin_wn5X3 - - -#elif CONFIG_SR_WN_WN5X3_HIJESON -#include "hijeson_wn5X3.h" -#define WAKENET_MODEL_NAME "wn5_hijesonX3" -#define WAKENET_COEFF get_coeff_hijeson_wn5X3 - -#elif CONFIG_SR_WN_WN5_CUSTOMIZED_WORD -#include "customized_word_wn5.h" -#define WAKENET_MODEL_NAME "wn5_customizedword" -#define WAKENET_COEFF get_coeff_customizedword_wn5 - -#else -#define WAKENET_MODEL_NAME "NULL" -#define WAKENET_COEFF "COEFF_NULL" -#endif - -#else -#define WAKENET_MODEL_NAME "NULL" -#define WAKENET_COEFF "COEFF_NULL" -#endif +char *esp_wn_wakeword_from_name(const char *model_name); #ifdef __cplusplus } diff --git a/include/esp32s3/esp_afe_config.h b/include/esp32s3/esp_afe_config.h index 00ac15b..5bb8311 100644 --- a/include/esp32s3/esp_afe_config.h +++ b/include/esp32s3/esp_afe_config.h @@ -126,7 +126,7 @@ typedef struct { afe_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain. int agc_compression_gain_db; // Compression gain in dB (default 9) - int agc_target_level_dbfs; // Target level in -dBfs of envelope (default -3) + int agc_target_level_dbfs; // Target level in -dBfs of envelope (default 3, means target level is -3 dBFS) /********** General AFE(Audio Front End) parameter **********/ afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function. diff --git a/include/esp32s3/esp_doa.h b/include/esp32s3/esp_doa.h new file mode 100644 index 0000000..a47cb10 --- /dev/null +++ b/include/esp32s3/esp_doa.h @@ -0,0 +1,41 @@ +#ifndef _ESP_DOA_H_ +#define _ESP_DOA_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct doa_handle_t doa_handle_t; +/** + * @brief Initialize SRP-PHAT processor + * @param fs Sampling rate (Hz), e.g., 16000 + * @param resolution Angular search resolution (degrees), e.g., 20 + * @param d_mics Microphone spacing (meters), e.g., 0.06 + * @param input_timedate_samples input timedate samples, e.g., 1024 + * @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance + */ +doa_handle_t *esp_doa_create(int fs, float resolution, float d_mics, int input_timedate_samples); + +/** + * @brief Release all allocated resources + * @param doa doa_handle_t instance pointer to be freed + */ +void esp_doa_destroy(doa_handle_t *doa); + +/** + * @brief Process audio frame for direction estimation + * @param doa doa_handle_t instance pointer + * @param left Left channel 16-bit PCM data + * @param right Right channel 16-bit PCM data + * @return Estimated sound direction in degrees, e.g., 0-180 + */ +float esp_doa_process(doa_handle_t *doa, int16_t* left, int16_t* right); + + +#ifdef __cplusplus +} +#endif + +#endif /* _ESP_DOA_H_ */ diff --git a/include/esp32s3/esp_wn_models.h b/include/esp32s3/esp_wn_models.h index 38972e7..3ac997a 100644 --- a/include/esp32s3/esp_wn_models.h +++ b/include/esp32s3/esp_wn_models.h @@ -11,7 +11,7 @@ extern "C" { /** * @brief Get the wakenet handle from model name * - * @param model_name The name of model + * @param model_name The name of model * @returns The handle of wakenet */ const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); @@ -19,87 +19,10 @@ const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); /** * @brief Get the wake word name from model name * - * @param model_name The name of model + * @param model_name The name of model * @returns The wake word name, like "alexa","hilexin","xiaoaitongxue" */ -char* esp_wn_wakeword_from_name(const char *model_name); - -// /** -// * @brief Get the model coeff from model name -// * -// * @Warning: retuen model_coeff_getter_t, when chip is ESP32, -// * return string for other chips -// * -// * @param model_name The name of model -// * @returns The handle of wakenet -// */ -// void *esp_wn_coeff_from_name(char *model_name); - - -#if defined CONFIG_USE_WAKENET -/* - Configure wake word to use based on what's selected in menuconfig. -*/ -#if CONFIG_SR_WN_WN5_HILEXIN -#include "hilexin_wn5.h" -#define WAKENET_MODEL_NAME "wn5_hilexin" -#define WAKENET_COEFF get_coeff_hilexin_wn5 - -#elif CONFIG_SR_WN_WN5X2_HILEXIN -#include "hilexin_wn5X2.h" -#define WAKENET_MODEL_NAME "wn5_hilexinX2" -#define WAKENET_COEFF get_coeff_hilexin_wn5X2 - - -#elif CONFIG_SR_WN_WN5X3_HILEXIN -#include "hilexin_wn5X3.h" -#define WAKENET_MODEL_NAME "wn5_hilexinX3" -#define WAKENET_COEFF get_coeff_hilexin_wn5X3 - - -#elif CONFIG_SR_WN_WN5_NIHAOXIAOZHI -#include "nihaoxiaozhi_wn5.h" -#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhi" -#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5 - - -#elif CONFIG_SR_WN_WN5X2_NIHAOXIAOZHI -#include "nihaoxiaozhi_wn5X2.h" -#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhiX2" -#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5X2 - - -#elif CONFIG_SR_WN_WN5X3_NIHAOXIAOZHI -#include "nihaoxiaozhi_wn5X3.h" -#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhiX3" -#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5X3 - - -#elif CONFIG_SR_WN_WN5X3_NIHAOXIAOXIN -#include "nihaoxiaoxin_wn5X3.h" -#define WAKENET_MODEL_NAME "wn5_nihaoxiaoxinX3" -#define WAKENET_COEFF get_coeff_nihaoxiaoxin_wn5X3 - - -#elif CONFIG_SR_WN_WN5X3_HIJESON -#include "hijeson_wn5X3.h" -#define WAKENET_MODEL_NAME "wn5_hijesonX3" -#define WAKENET_COEFF get_coeff_hijeson_wn5X3 - -#elif CONFIG_SR_WN_WN5_CUSTOMIZED_WORD -#include "customized_word_wn5.h" -#define WAKENET_MODEL_NAME "wn5_customizedword" -#define WAKENET_COEFF get_coeff_customizedword_wn5 - -#else -#define WAKENET_MODEL_NAME "NULL" -#define WAKENET_COEFF "COEFF_NULL" -#endif - -#else -#define WAKENET_MODEL_NAME "NULL" -#define WAKENET_COEFF "COEFF_NULL" -#endif +char *esp_wn_wakeword_from_name(const char *model_name); #ifdef __cplusplus } diff --git a/lib/esp32/libc_speech_features.a b/lib/esp32/libc_speech_features.a index e820484..238381d 100644 Binary files a/lib/esp32/libc_speech_features.a and b/lib/esp32/libc_speech_features.a differ diff --git a/lib/esp32/libdl_lib.a b/lib/esp32/libdl_lib.a index ebe280d..019d95b 100644 Binary files a/lib/esp32/libdl_lib.a and b/lib/esp32/libdl_lib.a differ diff --git a/lib/esp32/libesp_audio_front_end.a b/lib/esp32/libesp_audio_front_end.a index a07e725..e2f5407 100644 Binary files a/lib/esp32/libesp_audio_front_end.a and b/lib/esp32/libesp_audio_front_end.a differ diff --git a/lib/esp32/libesp_audio_processor.a b/lib/esp32/libesp_audio_processor.a index deceb7d..24f3ed1 100644 Binary files a/lib/esp32/libesp_audio_processor.a and b/lib/esp32/libesp_audio_processor.a differ diff --git a/lib/esp32/libflite_g2p.a b/lib/esp32/libflite_g2p.a index ecd3b94..7d66485 100644 Binary files a/lib/esp32/libflite_g2p.a and b/lib/esp32/libflite_g2p.a differ diff --git a/lib/esp32/libfst.a b/lib/esp32/libfst.a index 66de7bf..435488d 100644 Binary files a/lib/esp32/libfst.a and b/lib/esp32/libfst.a differ diff --git a/lib/esp32/libhufzip.a b/lib/esp32/libhufzip.a index 2160f4e..f12cc97 100644 Binary files a/lib/esp32/libhufzip.a and b/lib/esp32/libhufzip.a differ diff --git a/lib/esp32/libmultinet.a b/lib/esp32/libmultinet.a index b41b492..b0951cb 100644 Binary files a/lib/esp32/libmultinet.a and b/lib/esp32/libmultinet.a differ diff --git a/lib/esp32/libnsnet.a b/lib/esp32/libnsnet.a index 9a2ca12..669ee03 100644 Binary files a/lib/esp32/libnsnet.a and b/lib/esp32/libnsnet.a differ diff --git a/lib/esp32/libvadnet.a b/lib/esp32/libvadnet.a index dcccb75..36538b9 100644 Binary files a/lib/esp32/libvadnet.a and b/lib/esp32/libvadnet.a differ diff --git a/lib/esp32/libwakenet.a b/lib/esp32/libwakenet.a index 4d3fdce..d3c9043 100644 Binary files a/lib/esp32/libwakenet.a and b/lib/esp32/libwakenet.a differ diff --git a/lib/esp32p4/libc_speech_features.a b/lib/esp32p4/libc_speech_features.a index c913ab1..c757463 100644 Binary files a/lib/esp32p4/libc_speech_features.a and b/lib/esp32p4/libc_speech_features.a differ diff --git a/lib/esp32p4/libdl_lib.a b/lib/esp32p4/libdl_lib.a index e9a21be..9492590 100644 Binary files a/lib/esp32p4/libdl_lib.a and b/lib/esp32p4/libdl_lib.a differ diff --git a/lib/esp32p4/libesp_audio_front_end.a b/lib/esp32p4/libesp_audio_front_end.a index bea907d..adc0143 100644 Binary files a/lib/esp32p4/libesp_audio_front_end.a and b/lib/esp32p4/libesp_audio_front_end.a differ diff --git a/lib/esp32p4/libesp_audio_processor.a b/lib/esp32p4/libesp_audio_processor.a index accd23b..6f1e79c 100644 Binary files a/lib/esp32p4/libesp_audio_processor.a and b/lib/esp32p4/libesp_audio_processor.a differ diff --git a/lib/esp32p4/libmultinet.a b/lib/esp32p4/libmultinet.a index 19b8a22..def5ffa 100644 Binary files a/lib/esp32p4/libmultinet.a and b/lib/esp32p4/libmultinet.a differ diff --git a/lib/esp32p4/libnsnet.a b/lib/esp32p4/libnsnet.a index 39ba6ff..a39288b 100644 Binary files a/lib/esp32p4/libnsnet.a and b/lib/esp32p4/libnsnet.a differ diff --git a/lib/esp32p4/libvadnet.a b/lib/esp32p4/libvadnet.a index 4a16559..ee16727 100644 Binary files a/lib/esp32p4/libvadnet.a and b/lib/esp32p4/libvadnet.a differ diff --git a/lib/esp32p4/libwakenet.a b/lib/esp32p4/libwakenet.a index 7551e17..678743f 100644 Binary files a/lib/esp32p4/libwakenet.a and b/lib/esp32p4/libwakenet.a differ diff --git a/lib/esp32s3/libc_speech_features.a b/lib/esp32s3/libc_speech_features.a index 31549e0..0af9791 100644 Binary files a/lib/esp32s3/libc_speech_features.a and b/lib/esp32s3/libc_speech_features.a differ diff --git a/lib/esp32s3/libdl_lib.a b/lib/esp32s3/libdl_lib.a index f27412e..dd37082 100644 Binary files a/lib/esp32s3/libdl_lib.a and b/lib/esp32s3/libdl_lib.a differ diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a index 4b7dc42..191300b 100644 Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ diff --git a/lib/esp32s3/libesp_audio_processor.a b/lib/esp32s3/libesp_audio_processor.a index 924a046..45376f7 100644 Binary files a/lib/esp32s3/libesp_audio_processor.a and b/lib/esp32s3/libesp_audio_processor.a differ diff --git a/lib/esp32s3/libflite_g2p.a b/lib/esp32s3/libflite_g2p.a index 6a99a57..b321195 100644 Binary files a/lib/esp32s3/libflite_g2p.a and b/lib/esp32s3/libflite_g2p.a differ diff --git a/lib/esp32s3/libfst.a b/lib/esp32s3/libfst.a index a2dd373..f970624 100644 Binary files a/lib/esp32s3/libfst.a and b/lib/esp32s3/libfst.a differ diff --git a/lib/esp32s3/libhufzip.a b/lib/esp32s3/libhufzip.a index c0465b1..ff953a2 100644 Binary files a/lib/esp32s3/libhufzip.a and b/lib/esp32s3/libhufzip.a differ diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a index 469514e..3723e0e 100644 Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ diff --git a/lib/esp32s3/libnsnet.a b/lib/esp32s3/libnsnet.a index b206f41..0cae3ef 100644 Binary files a/lib/esp32s3/libnsnet.a and b/lib/esp32s3/libnsnet.a differ diff --git a/lib/esp32s3/libvadnet.a b/lib/esp32s3/libvadnet.a index 96fc6fb..cdf2a82 100644 Binary files a/lib/esp32s3/libvadnet.a and b/lib/esp32s3/libvadnet.a differ diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a index b1c33bf..b418c45 100644 Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ diff --git a/src/model_path.c b/src/model_path.c index acf90b8..77eb9de 100644 --- a/src/model_path.c +++ b/src/model_path.c @@ -1,19 +1,19 @@ -#include "stdio.h" -#include -#include -#include "string.h" #include "model_path.h" #include "esp_wn_models.h" +#include "stdio.h" +#include "string.h" +#include +#include #ifndef CONFIG_IDF_TARGET_ESP32P4 #include "esp_mn_models.h" #endif #ifdef ESP_PLATFORM -#include -#include "sdkconfig.h" +#include "esp_idf_version.h" #include "esp_log.h" #include "esp_spiffs.h" -#include "esp_idf_version.h" +#include "sdkconfig.h" +#include #if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0) #include "spi_flash_mmap.h" #endif @@ -31,20 +31,20 @@ void set_model_base_path(const char *base_path) char *get_model_info(char *data, int size) { char *model_info = NULL; - //Prase - //if the line starts with '#', the line is a comment - //else the line is model information - while(size > 0) { + // Prase + // if the line starts with '#', the line is a comment + // else the line is model information + while (size > 0) { if (*data == '#') { while (*data != '\n' && size > 1) { - data ++; - size --; + data++; + size--; } - data ++; - size --; + data++; + size--; continue; } else if (data != NULL && size > 0) { - model_info = (char*)malloc((size + 1) * sizeof(char)); + model_info = (char *)malloc((size + 1) * sizeof(char)); memcpy(model_info, data, size); if (model_info[size - 1] == '\n') { model_info[size - 1] = '\0'; @@ -63,7 +63,7 @@ char *get_wake_words_from_info(char *model_info) return NULL; int info_len = strlen(model_info); - char *temp = (char *) malloc(info_len + 1); + char *temp = (char *)malloc(info_len + 1); memcpy(temp, model_info, info_len); temp[info_len] = '\0'; char *token = strtok(temp, "_"); @@ -75,15 +75,15 @@ char *get_wake_words_from_info(char *model_info) // find all valid wake word token if (wake_words == NULL) { word_len = strlen(token) + 1; - wake_words = (char*) malloc(word_len); - memcpy(wake_words, token, word_len-1); - wake_words[word_len-1] = '\0'; + wake_words = (char *)malloc(word_len); + memcpy(wake_words, token, word_len - 1); + wake_words[word_len - 1] = '\0'; } else { word_len += strlen(token) + 1; - wake_words = (char*) realloc(wake_words, word_len); + wake_words = (char *)realloc(wake_words, word_len); strcat(wake_words, ";"); strcat(wake_words, token); - wake_words[word_len-1] = '\0'; + wake_words[word_len - 1] = '\0'; } } token = strtok(NULL, "_"); @@ -96,7 +96,7 @@ char *get_wake_words_from_info(char *model_info) static srmodel_list_t *srmodel_list_alloc(void) { - srmodel_list_t *models = (srmodel_list_t *) malloc(sizeof(srmodel_list_t)); + srmodel_list_t *models = (srmodel_list_t *)malloc(sizeof(srmodel_list_t)); models->model_data = NULL; models->model_name = NULL; models->model_info = NULL; @@ -139,7 +139,7 @@ srmodel_list_t *read_models_form_spiffs(esp_vfs_spiffs_conf_t *conf) char *suffix = ret->d_name + len - 12; if (strcmp(suffix, "_MODEL_INFO_") == 0) { - model_num ++; + model_num++; } } @@ -150,7 +150,7 @@ srmodel_list_t *read_models_form_spiffs(esp_vfs_spiffs_conf_t *conf) models->num = model_num; models->model_name = malloc(models->num * sizeof(char *)); for (int i = 0; i < models->num; i++) { - models->model_name[i] = (char *) calloc(MODEL_NAME_MAX_LENGTH, sizeof(char)); + models->model_name[i] = (char *)calloc(MODEL_NAME_MAX_LENGTH, sizeof(char)); } } @@ -168,29 +168,23 @@ srmodel_list_t *read_models_form_spiffs(esp_vfs_spiffs_conf_t *conf) char *suffix = ret->d_name + len - 12; if (strcmp(suffix, "_MODEL_INFO_") == 0) { - memcpy(models->model_name[idx], ret->d_name, (len - 13)*sizeof(char)); + memcpy(models->model_name[idx], ret->d_name, (len - 13) * sizeof(char)); // models->model_name[idx][len-13] = '\0'; - idx ++; + idx++; } } closedir(dir); dir = NULL; - } return models; } - srmodel_list_t *srmodel_spiffs_init(const esp_partition_t *part) { ESP_LOGI(TAG, "\nInitializing models from SPIFFS, partition label: %s\n", part->label); esp_vfs_spiffs_conf_t conf = { - .base_path = SRMODE_BASE_PATH, - .partition_label = part->label, - .max_files = 5, - .format_if_mount_failed = true - }; + .base_path = SRMODE_BASE_PATH, .partition_label = part->label, .max_files = 5, .format_if_mount_failed = true}; // Use settings defined above to initialize and mount SPIFFS filesystem. // Note: esp_vfs_spiffs_register is an all-in-one convenience function. @@ -221,7 +215,6 @@ srmodel_list_t *srmodel_spiffs_init(const esp_partition_t *part) return models; } - void srmodel_spiffs_deinit(srmodel_list_t *models) { if (models->partition != NULL) { @@ -243,15 +236,13 @@ void srmodel_spiffs_deinit(srmodel_list_t *models) free(models); } models = NULL; - } - static uint32_t read_int32(char *data) { uint32_t value = 0; - value |= data[0] << 0; - value |= data[1] << 8; + value |= data[0] << 0; + value |= data[1] << 8; value |= data[2] << 16; value |= data[3] << 24; return value; @@ -269,7 +260,7 @@ srmodel_list_t *srmodel_load(const void *root) char *data = (char *)root; int str_len = SRMODEL_STRING_LENGTH; int int_len = 4; - //read model number + // read model number models->num = read_int32(data); data += int_len; models->model_data = (srmodel_data_t **)malloc(sizeof(srmodel_data_t *) * models->num); @@ -277,29 +268,29 @@ srmodel_list_t *srmodel_load(const void *root) models->model_info = (char **)malloc(sizeof(char *) * models->num); for (int i = 0; i < models->num; i++) { - srmodel_data_t *model_data = (srmodel_data_t *) malloc(sizeof(srmodel_data_t)); + srmodel_data_t *model_data = (srmodel_data_t *)malloc(sizeof(srmodel_data_t)); models->model_info[i] = NULL; // read model name models->model_name[i] = (char *)malloc((strlen(data) + 1) * sizeof(char)); strcpy(models->model_name[i], data); data += str_len; - //read model number + // read model number int file_num = read_int32(data); model_data->num = file_num; data += int_len; - model_data->files = (char **) malloc(sizeof(char *)*file_num); - model_data->data = (char **) malloc(sizeof(void *)*file_num); - model_data->sizes = (int *) malloc(sizeof(int) * file_num); + model_data->files = (char **)malloc(sizeof(char *) * file_num); + model_data->data = (char **)malloc(sizeof(void *) * file_num); + model_data->sizes = (int *)malloc(sizeof(int) * file_num); for (int j = 0; j < file_num; j++) { - //read file name + // read file name model_data->files[j] = data; data += str_len; - //read file start index + // read file start index int index = read_int32(data); data += int_len; model_data->data[j] = start + index; - //read file size + // read file size int size = read_int32(data); data += int_len; model_data->sizes[j] = size; @@ -335,8 +326,9 @@ srmodel_list_t *srmodel_mmap_init(const esp_partition_t *partition) if (storage_size < partition->size) { ESP_LOGE(TAG, "The storage free size of this board is less than %s partition required size", partition->label); } - models->mmap_handle = (esp_partition_mmap_handle_t*)malloc(sizeof(esp_partition_mmap_handle_t)); - ESP_ERROR_CHECK(esp_partition_mmap(partition, 0, partition->size, ESP_PARTITION_MMAP_DATA, &root, models->mmap_handle)); + models->mmap_handle = (esp_partition_mmap_handle_t *)malloc(sizeof(esp_partition_mmap_handle_t)); + ESP_ERROR_CHECK( + esp_partition_mmap(partition, 0, partition->size, ESP_PARTITION_MMAP_DATA, &root, models->mmap_handle)); #else int free_pages = spi_flash_mmap_get_free_pages(SPI_FLASH_MMAP_DATA); uint32_t storage_size = free_pages * 64 * 1024; // Byte @@ -345,10 +337,9 @@ srmodel_list_t *srmodel_mmap_init(const esp_partition_t *partition) if (storage_size < partition->size) { ESP_LOGE(TAG, "The storage free size of board is less than %s partition size", partition->label); } - models->mmap_handle = (spi_flash_mmap_handle_t*)malloc(sizeof(spi_flash_mmap_handle_t)); + models->mmap_handle = (spi_flash_mmap_handle_t *)malloc(sizeof(spi_flash_mmap_handle_t)); ESP_ERROR_CHECK(esp_partition_mmap(partition, 0, partition->size, SPI_FLASH_MMAP_DATA, &root, models->mmap_handle)); #endif - models->partition = (esp_partition_t *)partition; srmodel_load(root); @@ -362,11 +353,10 @@ void srmodel_mmap_deinit(srmodel_list_t *models) #if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0) esp_partition_munmap(*(esp_partition_mmap_handle_t *)models->mmap_handle); // support esp-idf v5 #else - spi_flash_munmap(*(spi_flash_mmap_handle_t *)models->mmap_handle); // support esp-idf v4 -#endif + spi_flash_munmap(*(spi_flash_mmap_handle_t *)models->mmap_handle); // support esp-idf v4 +#endif } - if (models->num > 0) { for (int i = 0; i < models->num; i++) { free(models->model_data[i]->files); @@ -449,7 +439,7 @@ srmodel_list_t *srmodel_sdcard_init(const char *base_path) char *info_file = join_path(sub_path, "_MODEL_INFO_"); fp = fopen(info_file, "r"); if (fp != NULL) { - model_num ++; // If _MODLE_INFO_ file exists, model_num ++ + model_num++; // If _MODLE_INFO_ file exists, model_num ++ } printf("%s -> %s\n", sub_path, info_file); fclose(fp); @@ -470,11 +460,10 @@ srmodel_list_t *srmodel_sdcard_init(const char *base_path) models->model_name = malloc(models->num * sizeof(char *)); models->model_info = malloc(models->num * sizeof(char *)); for (int i = 0; i < models->num; i++) { - models->model_name[i] = (char *) calloc(MODEL_NAME_MAX_LENGTH, sizeof(char)); + models->model_name[i] = (char *)calloc(MODEL_NAME_MAX_LENGTH, sizeof(char)); } } - // read & save model names dir = opendir(base_path); while ((ret = readdir(dir)) != NULL) { @@ -506,7 +495,6 @@ srmodel_list_t *srmodel_sdcard_init(const char *base_path) return models; } - void srmodel_sdcard_deinit(srmodel_list_t *models) { if (models != NULL) { @@ -521,23 +509,18 @@ void srmodel_sdcard_deinit(srmodel_list_t *models) models = NULL; } - - srmodel_list_t *esp_srmodel_init(const char *partition_label) { #ifdef ESP_PLATFORM - #ifdef CONFIG_MODEL_IN_SDCARD - // Read model data from SD card + // Read model data from SD card return srmodel_sdcard_init(partition_label); #else // Read model data from flash partition const esp_partition_t *partition = NULL; // find spiffs partition - partition = esp_partition_find_first( - ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, partition_label - ); + partition = esp_partition_find_first(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, partition_label); if (partition) { return srmodel_mmap_init(partition); @@ -580,7 +563,6 @@ char *esp_srmodel_filter(srmodel_list_t *models, const char *keyword1, const cha // return the first model name including specific keyword for (int i = 0; i < models->num; i++) { - if (esp_strstr(models->model_name[i], keyword1) != NULL) { if (esp_strstr(models->model_name[i], keyword2) != NULL) { return models->model_name[i]; @@ -618,4 +600,4 @@ char *esp_srmodel_get_wake_words(srmodel_list_t *models, char *model_name) } } return NULL; -} \ No newline at end of file +} diff --git a/test_apps/esp-sr/main/test_afe.cpp b/test_apps/esp-sr/main/test_afe.cpp index 50c6c43..9a52a87 100644 --- a/test_apps/esp-sr/main/test_afe.cpp +++ b/test_apps/esp-sr/main/test_afe.cpp @@ -9,6 +9,7 @@ #include "audio_test_file.h" #include "dl_lib_convq_queue.h" #include "esp_afe_aec.h" +#include "esp_doa.h" #include "esp_afe_sr_models.h" #include "esp_heap_caps.h" #include "esp_log.h" @@ -25,7 +26,7 @@ #include #include -#define ARRAY_SIZE_OFFSET 8 // Increase this if audio_sys_get_real_time_stats returns ESP_ERR_INVALID_SIZE +#define ARRAY_SIZE_OFsample_rateET 8 // Increase this if audio_sys_get_real_time_stats returns ESP_ERR_INVALID_SIZE #define AUDIO_SYS_TASKS_ELAPSED_TIME_MS 1000 // Period of stats measurement static const char *TAG = "AFE_TEST"; @@ -381,3 +382,70 @@ TEST_CASE("test afe aec interface", "[afe]") int end_size = heap_caps_get_free_size(MALLOC_CAP_8BIT); TEST_ASSERT_EQUAL(true, end_size == start_size); } + +void generate_test_frame(int16_t *left, int16_t *right, int frame_size, float angle_deg, int sample_rate) +{ + int TEST_FREQ = 1000; + static float phase = 0.0f; + const float d = 0.06f; + const float c = 343.0f; + + float theta = angle_deg * M_PI / 180.0f; + float tau = d * cosf(theta) / c; + + int delay_samples = (int)roundf(tau * sample_rate); + printf("Angle: %f, Delay: %d samples\n", angle_deg, delay_samples); + + for (int i = 0; i < frame_size; i++) { + float t = (float)(i + phase) / sample_rate; + left[i] = (int16_t)(sinf(2 * M_PI * TEST_FREQ * t) * 32767); + + int delayed_index = i - delay_samples; + right[i] = (int16_t)(sinf(2 * M_PI * TEST_FREQ * (delayed_index + phase) / sample_rate) * 32767); + } + phase += frame_size; +} + +TEST_CASE("test doa interface", "[afe]") +{ + // 初始化DOA估计器 + int frame_samples = 1024; + int sample_rate = 16000; + int16_t *left = (int16_t *)malloc(frame_samples * sizeof(int16_t)); + int16_t *right = (int16_t *)malloc(frame_samples * sizeof(int16_t)); + int start_size = heap_caps_get_free_size(MALLOC_CAP_8BIT); + doa_handle_t *doa = esp_doa_create(sample_rate, 20.0f, 0.06f, frame_samples); + + uint32_t c0, c1, t_doa = 0; + int angle = 180; + for (int f = 0; f < angle; f++) { // 1秒多帧 + generate_test_frame(left, right, frame_samples, f*1.0, sample_rate); + c0 = esp_timer_get_time(); + float est_angle = esp_doa_process(doa, left, right); + c1 = esp_timer_get_time(); + t_doa += c1 - c0; + + printf("%.1f\t\t%.1f\n", f*1.0, est_angle); // memory leak + } + int doa_mem_size = start_size - heap_caps_get_free_size(MALLOC_CAP_8BIT); + printf("doa memory size:%d, cpu loading:%f\n", doa_mem_size, (t_doa * 1.0 / 1000000 * sample_rate) / (angle * frame_samples)); + + esp_doa_destroy(doa); + int end_size = heap_caps_get_free_size(MALLOC_CAP_8BIT); + + // create & destroy 5 times + for (int i = 0; i < 5; i++) { + doa = esp_doa_create(sample_rate, 20.0f, 0.06f, frame_samples); + esp_doa_process(doa, left, right); + esp_doa_destroy(doa); + } + + int last_end_size = heap_caps_get_free_size(MALLOC_CAP_8BIT); + printf("memory leak:%d\n", start_size - end_size); + free(left); + free(right); + // return 0; + printf("TEST DONE\n\n"); + TEST_ASSERT_EQUAL(true, (start_size - end_size) < 300); + TEST_ASSERT_EQUAL(true, last_end_size == end_size); +}