diff --git a/include/esp32/dl_lib.h b/include/esp32/dl_lib.h
index 5afe563..d7b6d8f 100644
--- a/include/esp32/dl_lib.h
+++ b/include/esp32/dl_lib.h
@@ -18,13 +18,66 @@
 #include "dl_lib_matrixq.h"
 #include "dl_lib_matrixq8.h"
 
+#ifdef ESP_PLATFORM
+#include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "freertos/queue.h"
+#include "esp_system.h"
+#include "esp_heap_caps.h"
+#include "sdkconfig.h"
+#define DL_SPIRAM_SUPPORT 1
+#endif
+
+#ifdef CONFIG_IDF_TARGET_ESP32S3
+#include "esp32s3/rom/cache.h"
+#endif
+
 typedef int padding_state;
 
+// /**
+//  * @brief Allocate a chunk of memory which has the given capabilities.
+//  *        Equivalent semantics to libc malloc(), for capability-aware memory.
+//  *        In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
+//  * 
+//  * @param size  In bytes, of the amount of memory to allocate
+//  * @param caps  Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
+//  *              MALLOC_CAP_SPIRAM:   Memory must be in SPI RAM
+//  *              MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
+//  *              MALLOC_CAP_DMA:      Memory must be able to accessed by DMA
+//  *              MALLOC_CAP_DEFAULT:  Memory can be returned in a non-capability-specific memory allocation
+//  * @return Pointer to currently allocated heap memory
+//  **/
+// void *heap_caps_malloc(size_t size, uint32_t caps);
+
+/**
+ * @brief Allocate aligned memory from internal memory or external memory.
+ *        if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
+ *        else, allocate memory from PSRAM
+ *
+ * @param cnt    Number of continuing chunks of memory to allocate
+ * @param size   Size, in bytes, of a chunk of memory to allocate     
+ * @param align  Aligned size, in bits
+ * @return Pointer to currently allocated heap memory
+ */
 void *dl_lib_calloc(int cnt, int size, int align);
 
+/**
+ * @brief Always allocate aligned memory from external memory.
+ *
+ * @param cnt    Number of continuing chunks of memory to allocate
+ * @param size   Size, in bytes, of a chunk of memory to allocate     
+ * @param align  Aligned size, in bits
+ * @return Pointer to currently aligned heap memory
+ */
 void *dl_lib_calloc_psram(int cnt, int size, int align);
 
-void dl_lib_free(void *d);
+/**
+ * @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram` 
+ * 
+ * @param prt    Pointer to free
+ */
+void dl_lib_free(void *ptr);
+
 /**
  * @brief Does a fast version of the exp() operation on a floating point number.
  *
@@ -279,7 +332,8 @@ int16_t dl_tanh_op_q8(int16_t v);
 
 void load_mat_psram_mn4(void);
 void load_mat_psram_mn3(void);
-
+void free_mat_psram_mn4(void);
+void free_mat_psram_mn3(void);
 qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
 qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
 
diff --git a/include/esp32/dl_lib_convq8_queue.h b/include/esp32/dl_lib_convq8_queue.h
index c2df87f..c596419 100644
--- a/include/esp32/dl_lib_convq8_queue.h
+++ b/include/esp32/dl_lib_convq8_queue.h
@@ -20,11 +20,12 @@
 #include "dl_lib_conv_queue.h"
 #include "dl_lib_convq_queue.h"
 
+//[nch, n, c]
 typedef struct {
     int n;           /*< the length of queue */
-    int c;           /*< the channel number of queue element*/
+    int c;           /*< the number of queue element*/
     int front;       /*< the front(top) position of queue */
-    int flag;        /*< not used */
+    int nch;         /*< the channel of queue */
     int exponent;    /*< The values in items should be multiplied by pow(2,exponent) 
                          to get the real values */
     q8tp_t *itemq;    /*< Pointer to item array */
@@ -34,11 +35,21 @@ typedef struct {
  * @brief Allocate a fixed-point convolution queue
  *
  * @param n     The length of queue
- * @param c     The channel number of elements in the queue
+ * @param c     The number of elements in the queue
  * @return      The convolution queue, or NULL if out of memory
  */
 dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
 
+/**
+ * @brief Allocate a fixed-point convolution queue
+ *
+ * @param n     The length of queue
+ * @param c     The number of elements in the queue
+ * @param c     The channel of queue
+ * @return      The convolution queue, or NULL if out of memory
+ */
+dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
+
 /**
  * @brief Free a fixed-point convolution queue
  *
@@ -46,6 +57,13 @@ dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
  */
 void dl_convq8_queue_free(dl_convq8_queue_t *cq);
 
+/**
+ * @brief Set itemq of convolution queue to 0
+ *
+ * @param cq     The fixed-point convolution queue to free
+ */
+void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
+
 /**
  * @brief  Insert the float-point element at the end of queue.
  *         The precision of fixed-point numbers is described by the Qm.f notation,  
@@ -66,6 +84,16 @@ void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit,
  */
 q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
 
+/**
+ * @brief   Get the pointer of element in the queue by offset
+ *
+ * @param cq      Input fixed-point convolution queue
+ * @param offset  Offset from the front of the queue
+ * @param ch      Channel index of queue
+ * @return        Pointer of the element
+ */
+q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
+
 /**
  * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
  *        based on convolution queue.
@@ -120,7 +148,7 @@ void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, in
 
 dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
 
-
+int8_t dl_sigmoid_lutq8(int in);
 /**
  * @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
  *
@@ -216,6 +244,8 @@ void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **ou
                                     dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
                                     int offset, int prenum);    
 
+void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
+
 void print_convq8(dl_convq8_queue_t *cq, int offset);
 void print_convq(dl_convq_queue_t *cq, int offset);
 #endif
\ No newline at end of file
diff --git a/include/esp32/dl_lib_convq_queue.h b/include/esp32/dl_lib_convq_queue.h
index e9df1e5..8069371 100644
--- a/include/esp32/dl_lib_convq_queue.h
+++ b/include/esp32/dl_lib_convq_queue.h
@@ -20,11 +20,12 @@
 
 
 //fixed-point convolution FIFO queue. 
+//[nch, n, c]
 typedef struct {
     int n;           /*< the length of queue */
-    int c;           /*< the channel number of queue element*/
+    int c;           /*< the number of queue element*/
     int front;       /*< the front(top) position of queue */
-    int flag;        /*< not used */
+    int nch;         /*< the multiple of queue*/
     int exponent;    /*< The values in items should be multiplied by pow(2,exponent) 
                          to get the real values */
     qtp_t *itemq;    /*< Pointer to item array */
@@ -34,11 +35,41 @@ typedef struct {
  * @brief Allocate a fixed-point convolution queue
  *
  * @param n     The length of queue
- * @param c     The channel number of elements in the queue
+ * @param c     The number of elements in the queue
  * @return      The convolution queue, or NULL if out of memory
  */
 dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
+
+/**
+ * @brief Allocate a fixed-point convolution queue from PSRAM
+ *
+ * @param n     The length of queue
+ * @param c     The number of elements in the queue
+ * @return      The convolution queue, or NULL if out of memory
+ */
 dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
+
+/**
+ * @brief Allocate a fixed-point multi-channel convolution queue
+ *
+ * @param n     The length of queue
+ * @param c     The number of elements in the queue
+ * @param nch   The channel of conv queue
+ * @return      The convolution queue, or NULL if out of memory
+ */
+dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
+
+/**
+ * @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
+ *
+ * @param n     The length of queue
+ * @param c     The number of elements in the queue
+ * @param nch   The channel of conv queue
+ * @return      The convolution queue, or NULL if out of memory
+ */
+dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
+
+
 void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
 
 /**
@@ -48,6 +79,13 @@ void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
  */
 void dl_convq_queue_free(dl_convq_queue_t *cq);
 
+/**
+ * @brief Set itemq of convolution queue to 0
+ *
+ * @param cq     The fixed-point convolution queue point
+ */
+void dl_convq_queue_bzero(dl_convq_queue_t *cq);
+
 /**
  * @brief Move the front pointer of queue forward, 
           the First(oldest) element become the last(newest) element, 
@@ -89,6 +127,16 @@ dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
  */
 inline qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
 
+/**
+ * @brief   Get the pointer of element in the queue by offset
+ *
+ * @param cq        Input fixed-point convolution queue
+ * @param offset    Offset from the front of the queue
+ * @param ch        Channel index of convolution queue 
+ * @return          Pointer of the element
+ */
+qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
+
 /**
  * @brief   Does a tanh operation on the one of element in the convolution queue.
  *          Gets the pointer of element in the convolution queue by offset, and does a 
@@ -321,6 +369,7 @@ qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
 
 void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
 void test_lstm_convq(int size, int in_dim, int lstm_cell);
-void dl_nn_tanh_i16(dl_convq_queue_t **cqm, int offset, int nch);
-
+void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
+void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
+void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
 #endif
\ No newline at end of file
diff --git a/include/esp32/dl_lib_matrix.h b/include/esp32/dl_lib_matrix.h
index 9e25c84..d046e24 100644
--- a/include/esp32/dl_lib_matrix.h
+++ b/include/esp32/dl_lib_matrix.h
@@ -14,10 +14,17 @@
 #ifndef DL_LIB_MATRIX_H
 #define DL_LIB_MATRIX_H
 
-#if CONFIG_BT_SHARE_MEM_REUSE
+#ifdef ESP_PLATFORM
 #include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "freertos/queue.h"
+#include "esp_system.h"
 #endif
 
+// #ifdef CONFIG_IDF_TARGET_ESP32S3
+// #include "dl_tie728_bzero.h"
+// #endif
+
 typedef float fptp_t;
 
 #if CONFIG_BT_SHARE_MEM_REUSE
diff --git a/include/esp32/dl_lib_matrixq8.h b/include/esp32/dl_lib_matrixq8.h
index f7dc471..579b1c0 100644
--- a/include/esp32/dl_lib_matrixq8.h
+++ b/include/esp32/dl_lib_matrixq8.h
@@ -16,6 +16,8 @@
 
 #include <stdint.h>
 #include "dl_lib_matrix.h"
+#include "dl_lib.h"
+#include "dl_lib_matrixq.h"
 
 typedef int8_t q8tp_t;
 
@@ -48,6 +50,15 @@ dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
  * @param m     Matrix to free
  */
 void dl_matrixq8_free(dl_matrix2dq8_t *m);
+
+/**
+ * @brief Copy a quantized matrix
+ * Copy a quantized matrix from flash or iram/psram
+ *
+ * @param m     Matrix to copy
+ */
+dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
+
 /**
  * @brief Convert a floating-point matrix to a quantized matrix
  *
diff --git a/include/esp32/esp_afe_sr_iface.h b/include/esp32/esp_afe_sr_iface.h
index ce3c13d..4ba7fd5 100644
--- a/include/esp32/esp_afe_sr_iface.h
+++ b/include/esp32/esp_afe_sr_iface.h
@@ -24,6 +24,12 @@ typedef enum {
     AFE_FETCH_WWE_DETECTED = 1        // wwe state: wake word is detected
 } afe_fetch_mode_t;
 
+typedef enum {
+    AFE_PSRAM_LOW_COST = 1,
+    AFE_PSRAM_MEDIA_COST = 2,
+    AFE_PSRAM_HIGH_COST = 3
+} afe_use_psram_mode_t;
+
 typedef struct {
     bool aec_init;
     bool se_init;
@@ -73,7 +79,7 @@ typedef struct {
     .afe_perferred_core = 0, \
     .afe_perferred_priority = 5, \
     .afe_ringbuf_size = 50, \
-    .alloc_from_psram = 2, \
+    .alloc_from_psram = AFE_PSRAM_MEDIA_COST, \
     .agc_mode = 2, \
 }
 #endif
diff --git a/include/esp32/esp_mn_iface.h b/include/esp32/esp_mn_iface.h
index 4cbd53e..d4e5aa3 100644
--- a/include/esp32/esp_mn_iface.h
+++ b/include/esp32/esp_mn_iface.h
@@ -40,9 +40,27 @@ typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
  * @brief Set the detection threshold to manually abjust the probability 
  *
  * @param model The model object to query
- * @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.5~0.9999
+ * @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
  */
 typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
+/**
+ * @brief Set the detection threshold to manually abjust the probability 
+ *
+ * @param model The model object to query
+ * @param phrase_id The ID of speech command phrase
+ * @param det_treshold The threshold to trigger speech command phrases
+ */
+typedef void (*esp_mn_iface_op_set_command_det_threshold_t)(model_iface_data_t *model, int phrase_id, float det_threshold);
+
+/**
+ * @brief Get the detection threshold by phrase ID 
+ *
+ * @param model The model object to query
+ * @param phrase_id The ID of speech command phrase
+ * 
+ * @return The threshold of speech command phrases
+ */
+typedef float (*esp_mn_iface_op_get_command_det_threshold_t)(model_iface_data_t *model, int phrase_id);
 
 /**
  * @brief Get the sample rate of the samples to feed to the detect function
@@ -74,7 +92,7 @@ typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
  * @brief Reset the speech commands recognition model
  *
  */
-typedef void (*esp_mn_iface_op_reset_t)(model_iface_data_t *model, char *command_str, char *err_phrase_id);
+typedef void (*esp_mn_iface_op_reset_t)(model_iface_data_t *model_data, char *command_str, char *err_phrase_id);
 
 
 typedef struct {
@@ -83,6 +101,8 @@ typedef struct {
     esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
     esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
     esp_mn_iface_op_set_det_threshold_t set_det_threshold;
+    esp_mn_iface_op_set_command_det_threshold_t set_command_det_threshold;
+    esp_mn_iface_op_get_command_det_threshold_t get_command_det_threshold;
     esp_mn_iface_op_detect_t detect; 
     esp_mn_iface_op_destroy_t destroy;
     esp_mn_iface_op_reset_t reset;
diff --git a/include/esp32/esp_wn_iface.h b/include/esp32/esp_wn_iface.h
index 08c7494..6843af1 100644
--- a/include/esp32/esp_wn_iface.h
+++ b/include/esp32/esp_wn_iface.h
@@ -9,8 +9,8 @@ typedef struct model_iface_data_t model_iface_data_t;
 //The probability of being wake words is increased with increasing mode, 
 //As a consequence also the false alarm rate goes up
 typedef enum {
-	DET_MODE_90 = 0,  //Normal, response accuracy rate about 90%
-	DET_MODE_95 = 1,       //Aggressive, response accuracy rate about 95%
+	DET_MODE_90 = 0,       // Normal
+	DET_MODE_95 = 1,       // Aggressive
     DET_MODE_2CH_90 = 2,
     DET_MODE_2CH_95 = 3,
     DET_MODE_3CH_90 = 4,
@@ -129,6 +129,13 @@ typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float
  */
 typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
 
+/**
+ * @brief Clean all states of model
+ *
+ * @param model The model object to query
+ */
+typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
+
 /**
  * @brief Destroy a speech recognition model
  *
@@ -152,5 +159,6 @@ typedef struct {
     esp_wn_iface_op_get_triggered_channel_t  get_triggered_channel;
     esp_wn_iface_op_get_vol_gain_t get_vol_gain;
     esp_wn_iface_op_detect_t detect;
+    esp_wn_iface_op_clean_t clean;
     esp_wn_iface_op_destroy_t destroy;
 } esp_wn_iface_t;
diff --git a/lib/esp32/libc_speech_features.a b/lib/esp32/libc_speech_features.a
index b120332..0baefde 100644
Binary files a/lib/esp32/libc_speech_features.a and b/lib/esp32/libc_speech_features.a differ
diff --git a/lib/esp32/libdl_lib.a b/lib/esp32/libdl_lib.a
index e6403c1..8da69ad 100644
Binary files a/lib/esp32/libdl_lib.a and b/lib/esp32/libdl_lib.a differ
diff --git a/lib/esp32/libesp_audio_front_end.a b/lib/esp32/libesp_audio_front_end.a
index c1fbc85..8e68bed 100644
Binary files a/lib/esp32/libesp_audio_front_end.a and b/lib/esp32/libesp_audio_front_end.a differ
diff --git a/lib/esp32/libesp_audio_processor.a b/lib/esp32/libesp_audio_processor.a
index 74c11d3..df49110 100644
Binary files a/lib/esp32/libesp_audio_processor.a and b/lib/esp32/libesp_audio_processor.a differ
diff --git a/lib/esp32/libmultinet.a b/lib/esp32/libmultinet.a
index 073b913..b67af4d 100644
Binary files a/lib/esp32/libmultinet.a and b/lib/esp32/libmultinet.a differ
diff --git a/lib/esp32/libwakenet.a b/lib/esp32/libwakenet.a
index c0af12e..674fe6f 100644
Binary files a/lib/esp32/libwakenet.a and b/lib/esp32/libwakenet.a differ
diff --git a/libversion b/libversion
index 9a7124f..fb83085 100644
--- a/libversion
+++ b/libversion
@@ -1 +1 @@
-0b92ff7435549f1ad3443e45f5d6816efa851a59
\ No newline at end of file
+2a4b651c5cf88d88066f4eec3fc2d9f38b158988
\ No newline at end of file