budfix(esp32): Fix the bug of memory leak

2025-09-15 15:28:44 +08:00 · 2021-12-10 20:58:37 +08:00 · 2021-12-10 20:58:37 +08:00 · 7f10b67368
commit 7f10b67368
parent 6f927f81fb
15 changed files with 203 additions and 18 deletions
--- a/include/esp32/dl_lib.h
+++ b/include/esp32/dl_lib.h
@ -18,13 +18,66 @@
 #include "dl_lib_matrixq.h"
 #include "dl_lib_matrixq8.h"

+#ifdef ESP_PLATFORM
+#include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "freertos/queue.h"
+#include "esp_system.h"
+#include "esp_heap_caps.h"
+#include "sdkconfig.h"
+#define DL_SPIRAM_SUPPORT 1
+#endif
+
+#ifdef CONFIG_IDF_TARGET_ESP32S3
+#include "esp32s3/rom/cache.h"
+#endif
+
 typedef int padding_state;

+// /**
+//  * @brief Allocate a chunk of memory which has the given capabilities.
+//  *        Equivalent semantics to libc malloc(), for capability-aware memory.
+//  *        In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
+//  * 
+//  * @param size  In bytes, of the amount of memory to allocate
+//  * @param caps  Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
+//  *              MALLOC_CAP_SPIRAM:   Memory must be in SPI RAM
+//  *              MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
+//  *              MALLOC_CAP_DMA:      Memory must be able to accessed by DMA
+//  *              MALLOC_CAP_DEFAULT:  Memory can be returned in a non-capability-specific memory allocation
+//  * @return Pointer to currently allocated heap memory
+//  **/
+// void *heap_caps_malloc(size_t size, uint32_t caps);
+
+/**
+ * @brief Allocate aligned memory from internal memory or external memory.
+ *        if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
+ *        else, allocate memory from PSRAM
+ *
+ * @param cnt    Number of continuing chunks of memory to allocate
+ * @param size   Size, in bytes, of a chunk of memory to allocate     
+ * @param align  Aligned size, in bits
+ * @return Pointer to currently allocated heap memory
+ */
 void *dl_lib_calloc(int cnt, int size, int align);

+/**
+ * @brief Always allocate aligned memory from external memory.
+ *
+ * @param cnt    Number of continuing chunks of memory to allocate
+ * @param size   Size, in bytes, of a chunk of memory to allocate     
+ * @param align  Aligned size, in bits
+ * @return Pointer to currently aligned heap memory
+ */
 void *dl_lib_calloc_psram(int cnt, int size, int align);

-void dl_lib_free(void *d);
+/**
+ * @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram` 
+ * 
+ * @param prt    Pointer to free
+ */
+void dl_lib_free(void *ptr);
+
 /**
 * @brief Does a fast version of the exp() operation on a floating point number.
 *
@ -279,7 +332,8 @@ int16_t dl_tanh_op_q8(int16_t v);

 void load_mat_psram_mn4(void);
 void load_mat_psram_mn3(void);
-
+void free_mat_psram_mn4(void);
+void free_mat_psram_mn3(void);
 qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
 qtp_t dl_hard_tanh_op(qtp_t in, int exponent);

--- a/include/esp32/dl_lib_convq8_queue.h
+++ b/include/esp32/dl_lib_convq8_queue.h
@ -20,11 +20,12 @@
 #include "dl_lib_conv_queue.h"
 #include "dl_lib_convq_queue.h"

+//[nch, n, c]
 typedef struct {
    int n;           /*< the length of queue */
-    int c;           /*< the channel number of queue element*/
+    int c;           /*< the number of queue element*/
    int front;       /*< the front(top) position of queue */
-    int flag;        /*< not used */
+    int nch;         /*< the channel of queue */
    int exponent;    /*< The values in items should be multiplied by pow(2,exponent) 
                         to get the real values */
    q8tp_t *itemq;    /*< Pointer to item array */
@ -34,11 +35,21 @@ typedef struct {
 * @brief Allocate a fixed-point convolution queue
 *
 * @param n     The length of queue
- * @param c     The channel number of elements in the queue
+ * @param c     The number of elements in the queue
 * @return      The convolution queue, or NULL if out of memory
 */
 dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);

+/**
+ * @brief Allocate a fixed-point convolution queue
+ *
+ * @param n     The length of queue
+ * @param c     The number of elements in the queue
+ * @param c     The channel of queue
+ * @return      The convolution queue, or NULL if out of memory
+ */
+dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
+
 /**
 * @brief Free a fixed-point convolution queue
 *
@ -46,6 +57,13 @@ dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
 */
 void dl_convq8_queue_free(dl_convq8_queue_t *cq);

+/**
+ * @brief Set itemq of convolution queue to 0
+ *
+ * @param cq     The fixed-point convolution queue to free
+ */
+void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
+
 /**
 * @brief  Insert the float-point element at the end of queue.
 *         The precision of fixed-point numbers is described by the Qm.f notation,  
@ -66,6 +84,16 @@ void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit,
 */
 q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);

+/**
+ * @brief   Get the pointer of element in the queue by offset
+ *
+ * @param cq      Input fixed-point convolution queue
+ * @param offset  Offset from the front of the queue
+ * @param ch      Channel index of queue
+ * @return        Pointer of the element
+ */
+q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
+
 /**
 * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
 *        based on convolution queue.
@ -120,7 +148,7 @@ void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, in

 dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);

-
+int8_t dl_sigmoid_lutq8(int in);
 /**
 * @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
 *
@ -216,6 +244,8 @@ void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **ou
                                    dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
                                    int offset, int prenum);    

+void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
+
 void print_convq8(dl_convq8_queue_t *cq, int offset);
 void print_convq(dl_convq_queue_t *cq, int offset);
 #endif
--- a/include/esp32/dl_lib_convq_queue.h
+++ b/include/esp32/dl_lib_convq_queue.h
@ -20,11 +20,12 @@


 //fixed-point convolution FIFO queue. 
+//[nch, n, c]
 typedef struct {
    int n;           /*< the length of queue */
-    int c;           /*< the channel number of queue element*/
+    int c;           /*< the number of queue element*/
    int front;       /*< the front(top) position of queue */
-    int flag;        /*< not used */
+    int nch;         /*< the multiple of queue*/
    int exponent;    /*< The values in items should be multiplied by pow(2,exponent) 
                         to get the real values */
    qtp_t *itemq;    /*< Pointer to item array */
@ -34,11 +35,41 @@ typedef struct {
 * @brief Allocate a fixed-point convolution queue
 *
 * @param n     The length of queue
- * @param c     The channel number of elements in the queue
+ * @param c     The number of elements in the queue
 * @return      The convolution queue, or NULL if out of memory
 */
 dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
+
+/**
+ * @brief Allocate a fixed-point convolution queue from PSRAM
+ *
+ * @param n     The length of queue
+ * @param c     The number of elements in the queue
+ * @return      The convolution queue, or NULL if out of memory
+ */
 dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
+
+/**
+ * @brief Allocate a fixed-point multi-channel convolution queue
+ *
+ * @param n     The length of queue
+ * @param c     The number of elements in the queue
+ * @param nch   The channel of conv queue
+ * @return      The convolution queue, or NULL if out of memory
+ */
+dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
+
+/**
+ * @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
+ *
+ * @param n     The length of queue
+ * @param c     The number of elements in the queue
+ * @param nch   The channel of conv queue
+ * @return      The convolution queue, or NULL if out of memory
+ */
+dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
+
+
 void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);

 /**
@ -48,6 +79,13 @@ void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
 */
 void dl_convq_queue_free(dl_convq_queue_t *cq);

+/**
+ * @brief Set itemq of convolution queue to 0
+ *
+ * @param cq     The fixed-point convolution queue point
+ */
+void dl_convq_queue_bzero(dl_convq_queue_t *cq);
+
 /**
 * @brief Move the front pointer of queue forward, 
          the First(oldest) element become the last(newest) element, 
@ -89,6 +127,16 @@ dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
 */
 inline qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);

+/**
+ * @brief   Get the pointer of element in the queue by offset
+ *
+ * @param cq        Input fixed-point convolution queue
+ * @param offset    Offset from the front of the queue
+ * @param ch        Channel index of convolution queue 
+ * @return          Pointer of the element
+ */
+qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
+
 /**
 * @brief   Does a tanh operation on the one of element in the convolution queue.
 *          Gets the pointer of element in the convolution queue by offset, and does a 
@ -321,6 +369,7 @@ qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,

 void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
 void test_lstm_convq(int size, int in_dim, int lstm_cell);
-void dl_nn_tanh_i16(dl_convq_queue_t **cqm, int offset, int nch);
-
+void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
+void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
+void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
 #endif
--- a/include/esp32/dl_lib_matrix.h
+++ b/include/esp32/dl_lib_matrix.h
@ -14,10 +14,17 @@
 #ifndef DL_LIB_MATRIX_H
 #define DL_LIB_MATRIX_H

-#if CONFIG_BT_SHARE_MEM_REUSE
+#ifdef ESP_PLATFORM
 #include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "freertos/queue.h"
+#include "esp_system.h"
 #endif

+// #ifdef CONFIG_IDF_TARGET_ESP32S3
+// #include "dl_tie728_bzero.h"
+// #endif
+
 typedef float fptp_t;

 #if CONFIG_BT_SHARE_MEM_REUSE
--- a/include/esp32/dl_lib_matrixq8.h
+++ b/include/esp32/dl_lib_matrixq8.h
@ -16,6 +16,8 @@

 #include <stdint.h>
 #include "dl_lib_matrix.h"
+#include "dl_lib.h"
+#include "dl_lib_matrixq.h"

 typedef int8_t q8tp_t;

@ -48,6 +50,15 @@ dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
 * @param m     Matrix to free
 */
 void dl_matrixq8_free(dl_matrix2dq8_t *m);
+
+/**
+ * @brief Copy a quantized matrix
+ * Copy a quantized matrix from flash or iram/psram
+ *
+ * @param m     Matrix to copy
+ */
+dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
+
 /**
 * @brief Convert a floating-point matrix to a quantized matrix
 *
--- a/include/esp32/esp_afe_sr_iface.h
+++ b/include/esp32/esp_afe_sr_iface.h
@ -24,6 +24,12 @@ typedef enum {
    AFE_FETCH_WWE_DETECTED = 1        // wwe state: wake word is detected
 } afe_fetch_mode_t;

+typedef enum {
+    AFE_PSRAM_LOW_COST = 1,
+    AFE_PSRAM_MEDIA_COST = 2,
+    AFE_PSRAM_HIGH_COST = 3
+} afe_use_psram_mode_t;
+
 typedef struct {
    bool aec_init;
    bool se_init;
@ -73,7 +79,7 @@ typedef struct {
    .afe_perferred_core = 0, \
    .afe_perferred_priority = 5, \
    .afe_ringbuf_size = 50, \
-    .alloc_from_psram = 2, \
+    .alloc_from_psram = AFE_PSRAM_MEDIA_COST, \
    .agc_mode = 2, \
 }
 #endif
--- a/include/esp32/esp_mn_iface.h
+++ b/include/esp32/esp_mn_iface.h
@ -40,9 +40,27 @@ typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
 * @brief Set the detection threshold to manually abjust the probability 
 *
 * @param model The model object to query
- * @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.5~0.9999
+ * @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
 */
 typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
+/**
+ * @brief Set the detection threshold to manually abjust the probability 
+ *
+ * @param model The model object to query
+ * @param phrase_id The ID of speech command phrase
+ * @param det_treshold The threshold to trigger speech command phrases
+ */
+typedef void (*esp_mn_iface_op_set_command_det_threshold_t)(model_iface_data_t *model, int phrase_id, float det_threshold);
+
+/**
+ * @brief Get the detection threshold by phrase ID 
+ *
+ * @param model The model object to query
+ * @param phrase_id The ID of speech command phrase
+ * 
+ * @return The threshold of speech command phrases
+ */
+typedef float (*esp_mn_iface_op_get_command_det_threshold_t)(model_iface_data_t *model, int phrase_id);

 /**
 * @brief Get the sample rate of the samples to feed to the detect function
@ -74,7 +92,7 @@ typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
 * @brief Reset the speech commands recognition model
 *
 */
-typedef void (*esp_mn_iface_op_reset_t)(model_iface_data_t *model, char *command_str, char *err_phrase_id);
+typedef void (*esp_mn_iface_op_reset_t)(model_iface_data_t *model_data, char *command_str, char *err_phrase_id);


 typedef struct {
@ -83,6 +101,8 @@ typedef struct {
    esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
    esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
    esp_mn_iface_op_set_det_threshold_t set_det_threshold;
+    esp_mn_iface_op_set_command_det_threshold_t set_command_det_threshold;
+    esp_mn_iface_op_get_command_det_threshold_t get_command_det_threshold;
    esp_mn_iface_op_detect_t detect; 
    esp_mn_iface_op_destroy_t destroy;
    esp_mn_iface_op_reset_t reset;
--- a/include/esp32/esp_wn_iface.h
+++ b/include/esp32/esp_wn_iface.h
@ -9,8 +9,8 @@ typedef struct model_iface_data_t model_iface_data_t;
 //The probability of being wake words is increased with increasing mode, 
 //As a consequence also the false alarm rate goes up
 typedef enum {
-	DET_MODE_90 = 0,  //Normal, response accuracy rate about 90%
-	DET_MODE_95 = 1,       //Aggressive, response accuracy rate about 95%
+	DET_MODE_90 = 0,       // Normal
+	DET_MODE_95 = 1,       // Aggressive
    DET_MODE_2CH_90 = 2,
    DET_MODE_2CH_95 = 3,
    DET_MODE_3CH_90 = 4,
@ -129,6 +129,13 @@ typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float
 */
 typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);

+/**
+ * @brief Clean all states of model
+ *
+ * @param model The model object to query
+ */
+typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
+
 /**
 * @brief Destroy a speech recognition model
 *
@ -152,5 +159,6 @@ typedef struct {
    esp_wn_iface_op_get_triggered_channel_t  get_triggered_channel;
    esp_wn_iface_op_get_vol_gain_t get_vol_gain;
    esp_wn_iface_op_detect_t detect;
+    esp_wn_iface_op_clean_t clean;
    esp_wn_iface_op_destroy_t destroy;
 } esp_wn_iface_t;
--- a/lib/esp32/libc_speech_features.a
+++ b/lib/esp32/libc_speech_features.a
--- a/lib/esp32/libdl_lib.a
+++ b/lib/esp32/libdl_lib.a
--- a/lib/esp32/libesp_audio_front_end.a
+++ b/lib/esp32/libesp_audio_front_end.a
--- a/lib/esp32/libesp_audio_processor.a
+++ b/lib/esp32/libesp_audio_processor.a
--- a/lib/esp32/libmultinet.a
+++ b/lib/esp32/libmultinet.a
--- a/lib/esp32/libwakenet.a
+++ b/lib/esp32/libwakenet.a
--- a/2
+++ b/2
@ -1 +1 @@
-0b92ff7435549f1ad3443e45f5d6816efa851a59
+2a4b651c5cf88d88066f4eec3fc2d9f38b158988