diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a649f77..16bef86 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -100,8 +100,6 @@ before_script:
   artifacts:
     when: always
     paths:
-      - "**/build*/size.json"
-      - "**/build*/build_log.txt"
       - "**/build*/*.bin"
       # upload to s3 server to save the artifacts size
       - "**/build*/*.map"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c405182..5f768f9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,6 +74,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32s3")
 
     add_prebuilt_library(flite_g2p "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libflite_g2p.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(esp_audio_processor "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libesp_audio_processor.a" PRIV_REQUIRES ${COMPONENT_NAME})
+    add_prebuilt_library(vadnet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libvadnet.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(multinet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libmultinet.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(esp_audio_front_end "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libesp_audio_front_end.a" PRIV_REQUIRES ${COMPONENT_NAME})
@@ -95,6 +96,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32s3")
         esp_tts_chinese
         voice_set_xiaole
         nsnet
+        vadnet
         wakenet
         "-Wl,--end-group")
 
@@ -153,6 +155,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32p4")
     add_prebuilt_library(flite_g2p "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libflite_g2p.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(esp_audio_processor "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libesp_audio_processor.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME})
+    add_prebuilt_library(vadnet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libvadnet.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(multinet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libmultinet.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(esp_audio_front_end "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libesp_audio_front_end.a" PRIV_REQUIRES ${COMPONENT_NAME})
     add_prebuilt_library(hufzip "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libhufzip.a" PRIV_REQUIRES ${COMPONENT_NAME})
@@ -173,6 +176,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32p4")
         esp_tts_chinese
         voice_set_xiaole
         wakenet
+        vadnet
         nsnet
         "-Wl,--end-group")
 
diff --git a/Kconfig.projbuild b/Kconfig.projbuild
index dde6613..f6b3071 100644
--- a/Kconfig.projbuild
+++ b/Kconfig.projbuild
@@ -13,14 +13,9 @@ choice MODEL_DATA_PATH
 endchoice
 
 
-config USE_AFE
-	bool "use afe"
-	default "y"
-
 choice AFE_INTERFACE_SEL
 	prompt "Afe interface"
 	default AFE_INTERFACE_V1
-	depends on USE_AFE
 	help
 		Select the afe interface to be used.
 
@@ -29,306 +24,175 @@ choice AFE_INTERFACE_SEL
 
 endchoice
 
-config USE_NSNET
-    bool "use nsnet"
-    default "n"
-
 choice SR_NSN_MODEL_LOAD
-    prompt "Select deep noise suppression"
-    default SR_NSN_NSNET2
-    depends on USE_NSNET
+    prompt "Select noise suppression model"
+    default SR_NSN_WEBRTC
     help
-        Select the deep noise suppression to be loaded.
+        Select the noise suppression model to be loaded.
 
-    config SR_NSN_NONE
-        bool "None"
+    config SR_NSN_WEBRTC
+        bool "noise suppression (WebRTC)"
 
-    config SR_NSN_NSNET1
-        bool "Deep noise suppression v1 (nsnet1)"
-        depends on IDF_TARGET_ESP32S3
     config SR_NSN_NSNET2
         bool "Deep noise suppression v2 (nsnet2)"
-	depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+	    depends on IDF_TARGET_ESP32S3 ||  IDF_TARGET_ESP32P4
 endchoice
 
-config USE_WAKENET
-    bool "use wakenet"
-    default "y"
+choice SR_VADN_MODEL_LOAD
+    prompt "Select voice activity detection"
+    default SR_VADN_WEBRTC
+    help
+        Select the vad model to be loaded.
+
+    config SR_VADN_WEBRTC
+        bool "voice activity detection (WebRTC)"
+    
+    config SR_VADN_VADNET1_MEDIUM
+        bool "voice activity detection (vadnet1 medium)"
+	    depends on IDF_TARGET_ESP32S3 ||  IDF_TARGET_ESP32P4
+endchoice
 
 choice SR_WN_MODEL_LOAD
     prompt "Select wake words"
-    default SR_WN_WN9_HILEXIN
-    depends on USE_WAKENET
+    default SR_WN_WN5_HILEXIN
+    depends on IDF_TARGET_ESP32
     help
         Select the Wake Words to be loaded.
 
     config SR_WN_WN5_HILEXIN
-        bool "Hi,乐鑫 (wn5_hilexin)"
-        depends on IDF_TARGET_ESP32
+        bool "Hi,Lexin (wn5_hilexin)"
 
     config SR_WN_WN5X3_HILEXIN
-        bool "Hi,乐鑫 (wn5_hilexinX3)"
-        depends on IDF_TARGET_ESP32
+        bool "Hi,Lexin (wn5_hilexinX3)"
 
     config SR_WN_WN5_NIHAOXIAOZHI
-        bool "你好小智 (wn5_nihaoxiaozhi)"
-        depends on IDF_TARGET_ESP32
+        bool "nihaoxiaozhi (wn5_nihaoxiaozhi)"
 
     config SR_WN_WN5X3_NIHAOXIAOZHI
-        bool "你好小智 (wn5_nihaoxiaozhiX3)"
-        depends on IDF_TARGET_ESP32
+        bool "nihaoxiaozhi (wn5_nihaoxiaozhiX3)"
 
     config SR_WN_WN5X3_NIHAOXIAOXIN
-        bool "你好小鑫 (wn5_nihaoxiaoxinX3)"
-        depends on IDF_TARGET_ESP32
-
-    config SR_WN_WN8_ALEXA
-        bool "Alexa (wn8_alexa)"
-        depends on IDF_TARGET_ESP32S3
-
-    config SR_WN_WN9_HILEXIN
-        bool "Hi,乐鑫 (wn9_hilexin)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_XIAOAITONGXUE
-        bool "小爱同学 (wn9_xiaoaitongxue)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_ALEXA
-        bool "Alexa (wn9_alexa)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HIESP
-        bool "Hi,ESP (wn9_hiesp)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HIMFIVE
-        bool "Hi,M Five (wn9_himfive)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-    
-    config SR_WN_WN9_NIHAOXIAOZHI_TTS
-        bool "你好小智 (wn9_nihaoxiaozhi_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-    
-    config SR_WN_WN9_JARVIS_TTS
-        bool "Jarvis (wn9_jarvis_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-    
-    config SR_WN_WN9_COMPUTER_TTS
-        bool "computer (wn9_computer_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HEYWILLOW_TTS
-        bool "Hey,Willow (wn9_heywillow_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_SOPHIA_TTS
-        bool "Sophia (wn9_sophia_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_NIHAOXIAOXIN_TTS
-        bool "你好小鑫 (wn9_nihaoxiaoxin_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_XIAOMEITONGXUE_TTS
-        bool "小美同学 (wn9_xiaomeitongxue_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HIXIAOXING_TTS
-        bool "Hi,小星 (wn9_hixiaoxing_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_MYCROFT_TTS
-        bool "Mycroft (wn9_mycroft_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HEYPRINTER_TTS
-        bool "Hey,Printer (wn9_heyprinter_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_XIAOLONGXIAOLONG_TTS
-        bool "小龙小龙 (wn9_xiaolongxiaolong_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_MIAOMIAOTONGXUE_TTS
-        bool "喵喵同学 (wn9_miaomiaotongxue_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HIJOY_TTS
-        bool "Hi,Joy (wn9_hijoy_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HILILI_TTS
-        bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HITELLY_TTS
-        bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HEYWANDA_TTS
-        bool "Hey,Wanda (wn9_heywanda_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HIMIAOMIAO_TTS
-        bool "Hi,喵喵 (wn9_himiaomiao_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_XIAOBINXIAOBIN_TTS
-        bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_HAIXIAOWU_TTS
-        bool "Hi,小巫 (wn9_haixiaowu_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_ASTROLABE_TTS
-        bool "Astrolabe (wn9_astrolabe_tts)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_XIAOYAXIAOYA_TTS2
-        bool "小鸭小鸭 (wn9_xiaoyaxiaoya_tts2)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-    
-    config SR_WN_WN9_HIJASON_TTS2
-        bool "Hi,Jason (wn9_hijason_tts2)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_LINAIBAN_TTS2
-        bool "璃奈板 (wn9_linaiban_tts2)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_WN9_CUSTOMWORD
-        bool "customized word (wn9_customword)"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
-
-    config SR_WN_LOAD_MULIT_WORD
-        bool "Load Multiple Wake Words"
-        depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+        bool "nihaoxiaoxin (wn5_nihaoxiaoxinX3)"
 
 endchoice
 
 menu "Load Multiple Wake Words"
-    depends on SR_WN_LOAD_MULIT_WORD
+    depends on IDF_TARGET_ESP32S3 ||  IDF_TARGET_ESP32P4
 
-    config SR_WN_WN9_HILEXIN_MULTI
+    config SR_WN_WN9_HILEXIN
     bool "Hi,乐鑫 (wn9_hilexin)"
     default False
 
-    config SR_WN_WN9_XIAOAITONGXUE_MULTI
+    config SR_WN_WN9_XIAOAITONGXUE
     bool "小爱同学 (wn9_xiaoaitongxue)"
     default False
 
-    config SR_WN_WN9_NIHAOXIAOZHI_TTS_MULTI
+    config SR_WN_WN9_NIHAOXIAOZHI_TTS
     bool "你好小智 (wn9_nihaoxiaozhi_tts)"
     default False
 
-    config SR_WN_WN9_ALEXA_MULTI
+    config SR_WN_WN9_ALEXA
     bool "Alexa (wn9_alexa)"
     default False
 
-    config SR_WN_WN9_HIESP_MULTI
+    config SR_WN_WN9_HIESP
     bool "Hi,ESP (wn9_hiesp)"
     default False
 
-    config SR_WN_WN9_JARVIS_TTS_MULTI
+    config SR_WN_WN9_JARVIS_TTS
     bool "Jarvis (wn9_jarvis_tts)"
     default False
     
-    config SR_WN_WN9_COMPUTER_TTS_MULTI
+    config SR_WN_WN9_COMPUTER_TTS
     bool "computer (wn9_computer_tts)"
     default False
 
-    config SR_WN_WN9_HEYWILLOW_TTS_MULTI
+    config SR_WN_WN9_HEYWILLOW_TTS
     bool "Hey,Willow (wn9_heywillow_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_SOPHIA_TTS_MULTI
+    config SR_WN_WN9_SOPHIA_TTS
     bool "Sophia (wn9_sophia_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_NIHAOXIAOXIN_TTS_MULTI
+    config SR_WN_WN9_NIHAOXIAOXIN_TTS
     bool "你好小鑫 (wn9_nihaoxiaoxin_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_XIAOMEITONGXUE_TTS_MULTI
+    config SR_WN_WN9_XIAOMEITONGXUE_TTS
     bool "小美同学 (wn9_xiaomeitongxue_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_HEYPRINTER_TTS_MULTI
+    config SR_WN_WN9_HEYPRINTER_TTS
     bool "Hey,Printer (wn9_heyprinter_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_XIAOLONGXIAOLONG_TTS_MULTI
+    config SR_WN_WN9_XIAOLONGXIAOLONG_TTS
     bool "小龙小龙 (wn9_xiaolongxiaolong_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_MIAOMIAOTONGXUE_TTS_MULTI
+    config SR_WN_WN9_MIAOMIAOTONGXUE_TTS
     bool "喵喵同学 (wn9_miaomiaotongxue_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
 
-    config SR_WN_WN9_HEYWANDA_TTS_MULTI
+    config SR_WN_WN9_HEYWANDA_TTS
     bool "Hey,Wanda (wn9_heywanda_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_HIMIAOMIAO_TTS_MULTI
+    config SR_WN_WN9_HIMIAOMIAO_TTS
     bool "Hi,喵喵 (wn9_himiaomiao_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
 
-    config SR_WN_WN9_MYCROFT_TTS_MULTI
+    config SR_WN_WN9_MYCROFT_TTS
     bool "Mycroft (wn9_mycroft_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_HIJOY_TTS_MULTI
+    config SR_WN_WN9_HIJOY_TTS
     bool "Hi,Joy (wn9_hijoy_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_HILILI_TTS_MULTI
+    config SR_WN_WN9_HILILI_TTS
     bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_HITELLY_TTS_MULTI
+    config SR_WN_WN9_HITELLY_TTS
     bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_XIAOBINXIAOBIN_TTS_MULTI
+    config SR_WN_WN9_XIAOBINXIAOBIN_TTS
     bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_HAIXIAOWU_TTS_MULTI
+    config SR_WN_WN9_HAIXIAOWU_TTS
     bool "Hi,小巫 (wn9_haixiaowu_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_ASTROLABE_TTS_MULTI
+    config SR_WN_WN9_ASTROLABE_TTS
     bool "Astrolabe (wn9_astrolabe_tts)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_XIAOYAXIAOYA_TTS2_MULTI
+    config SR_WN_WN9_XIAOYAXIAOYA_TTS2
     bool "小鸭小鸭 (wn9_xiaoyaxiaoya_tts2)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_HIJASON_TTS2_MULTI
+    config SR_WN_WN9_HIJASON_TTS2
     bool "Hi,Jason (wn9_hijason_tts2)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
-    config SR_WN_WN9_LINAIBAN_TTS2_MULTI
+    config SR_WN_WN9_LINAIBAN_TTS2
     bool "璃奈板 (wn9_linaiban_tts2)"
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
+    default False
 
 endmenu
 
-config USE_MULTINET
-    bool "use multinet"
-    default "y"
 
 choice CHINESE_SR_MN_MODEL_SEL
     prompt "Chinese Speech Commands Model"
-    default SR_MN_CN_MULTINET6_QUANT
-    depends on USE_MULTINET
+    default SR_MN_CN_NONE
     help
-        Select the Wake Word Engine to be used.
+        Select the Chinese Speech Commands Model.
 
     config SR_MN_CN_NONE
         bool "None"
@@ -362,9 +226,8 @@ endchoice
 choice ENGLISH_SR_MN_MODEL_SEL
     prompt "English Speech Commands Model"
     default SR_MN_EN_NONE
-    depends on USE_MULTINET
     help
-        Select the Wake Word Engine to be used.
+        Select the English Speech Commands Model.
 
     config SR_MN_EN_NONE
         bool "None"
diff --git a/conftest.py b/conftest.py
index 4c063b3..c6a3038 100644
--- a/conftest.py
+++ b/conftest.py
@@ -202,7 +202,7 @@ class IdfPytestEmbedded:
         for item in items:
             # default timeout 5 mins
             if 'timeout' not in item.keywords:
-                item.add_marker(pytest.mark.timeout(8 * 60))
+                item.add_marker(pytest.mark.timeout(500 * 60))
 
         # filter all the test cases with "--target"
         if self.target:
diff --git a/include/esp32/dl_lib.h b/include/esp32/dl_lib.h
index 63ba6da..47e7c86 100644
--- a/include/esp32/dl_lib.h
+++ b/include/esp32/dl_lib.h
@@ -78,7 +78,7 @@ void *dl_lib_calloc_psram(int cnt, int size, int align);
 /**
  * @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram` 
  * 
- * @param prt    Pointer to free
+ * @param ptr    Pointer to free
  */
 void dl_lib_free(void *ptr);
 
@@ -415,4 +415,4 @@ dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, con
 }
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/esp32/dl_lib_convq8_queue.h b/include/esp32/dl_lib_convq8_queue.h
index 0e53902..28c5da7 100644
--- a/include/esp32/dl_lib_convq8_queue.h
+++ b/include/esp32/dl_lib_convq8_queue.h
@@ -292,6 +292,7 @@ qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, in
 
 void print_convq8(dl_convq8_queue_t *cq, int offset);
 void print_convq(dl_convq_queue_t *cq, int offset);
+void dl_relu_convq8(dl_convq8_queue_t *cq);
 
 void lstmq8_free(void);
 
diff --git a/include/esp32/dl_lib_convq_queue.h b/include/esp32/dl_lib_convq_queue.h
index c71d5ca..ff190fe 100644
--- a/include/esp32/dl_lib_convq_queue.h
+++ b/include/esp32/dl_lib_convq_queue.h
@@ -279,9 +279,9 @@ dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t
 dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
                                        const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
 
-dl_matrix2dq_t *dl_convq16_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
-                                       dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
-                                       const dl_matrix2dq_t *bias, int prenum);
+dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
+                                       dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
+                                       dl_matrix2dq_t *bias, int prenum);
 
 /**
  * @brief Allocate a fixed-point multi channel convolution queue 
diff --git a/include/esp32/dl_lib_matrix.h b/include/esp32/dl_lib_matrix.h
index b5fae74..59f7d79 100644
--- a/include/esp32/dl_lib_matrix.h
+++ b/include/esp32/dl_lib_matrix.h
@@ -25,10 +25,6 @@
 extern "C" {
 #endif
 
-// #ifdef CONFIG_IDF_TARGET_ESP32S3
-// #include "dl_tie728_bzero.h"
-// #endif
-
 typedef float fptp_t;
 
 #if CONFIG_BT_SHARE_MEM_REUSE
diff --git a/include/esp32/esp_aec.h b/include/esp32/esp_aec.h
index 03afc90..deb031c 100644
--- a/include/esp32/esp_aec.h
+++ b/include/esp32/esp_aec.h
@@ -23,7 +23,8 @@ extern "C" {
 #define USE_AEC_FFT                      // Not kiss_fft
 #define AEC_USE_SPIRAM      0
 #define AEC_SAMPLE_RATE     16000        // Only Support 16000Hz
-#define AEC_FRAME_LENGTH_MS 16
+//#define AEC_FRAME_LENGTH_MS 16
+#define AEC_FRAME_LENGTH_MS 32
 #define AEC_FILTER_LENGTH   1200         // Number of samples of echo to cancel
 
 typedef void* aec_handle_t;
diff --git a/include/esp32/esp_afe_config.h b/include/esp32/esp_afe_config.h
index 702d859..c32689d 100644
--- a/include/esp32/esp_afe_config.h
+++ b/include/esp32/esp_afe_config.h
@@ -90,6 +90,12 @@ typedef struct {
     afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
     afe_ns_mode_t afe_ns_mode;
     char *afe_ns_model_name;
+    bool fixed_first_channel;                // If true, the channel after first wake-up is fixed to raw data of microphone
+                                             // otherwise, select channel number by wakenet
+    char *vad_model_name;                    // The model name of vad, support vadnet1 and vadnet1_small
+    int vad_min_speech_ms;                   // The minimum duration of speech in ms. It should be bigger than 32 ms
+    int vad_min_noise_ms;                    // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
+    bool vad_mute_playback;                  // If true, the playback will be muted for vad detection
 } afe_config_t;
 
 
@@ -123,6 +129,47 @@ typedef struct {
     .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
     .afe_ns_mode = NS_MODE_SSP, \
     .afe_ns_model_name = NULL, \
+    .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
+}
+#elif CONFIG_IDF_TARGET_ESP32P4
+#define AFE_CONFIG_DEFAULT() { \
+    .aec_init = true, \
+    .se_init = true, \
+    .vad_init = true, \
+    .wakenet_init = true, \
+    .voice_communication_init = false, \
+    .voice_communication_agc_init = false, \
+    .voice_communication_agc_gain = 15, \
+    .vad_mode = VAD_MODE_3, \
+    .wakenet_model_name = NULL, \
+    .wakenet_model_name_2 = NULL, \
+    .wakenet_mode = DET_MODE_90, \
+    .afe_mode = SR_MODE_LOW_COST, \
+    .afe_perferred_core = 0, \
+    .afe_perferred_priority = 5, \
+    .afe_ringbuf_size = 50, \
+    .memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
+    .afe_linear_gain = 1.0, \
+    .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
+    .pcm_config = { \
+        .total_ch_num = 2, \
+        .mic_num = 1, \
+        .ref_num = 1, \
+        .sample_rate = 16000, \
+    }, \
+    .debug_init = false, \
+    .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
+    .afe_ns_mode = NS_MODE_SSP, \
+    .afe_ns_model_name = NULL, \
+    .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
 }
 #elif CONFIG_IDF_TARGET_ESP32S3
 #define AFE_CONFIG_DEFAULT() { \
@@ -154,6 +201,11 @@ typedef struct {
     .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
     .afe_ns_mode = NS_MODE_SSP, \
     .afe_ns_model_name = NULL, \
+    .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
 }
 #endif
 
diff --git a/include/esp32/esp_afe_sr_iface.h b/include/esp32/esp_afe_sr_iface.h
index daf5b92..84d7000 100644
--- a/include/esp32/esp_afe_sr_iface.h
+++ b/include/esp32/esp_afe_sr_iface.h
@@ -29,6 +29,8 @@ typedef struct afe_fetch_result_t
 {
     int16_t *data;                          // the data of audio.
     int data_size;                          // the size of data. The unit is byte.
+    int16_t *vad_cache;                     // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
+    int vad_cache_size;                     // the size of vad_cache. The unit is byte.
     float data_volume;                      // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
                                             // if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length. 
     wakenet_state_t wakeup_state;           // the value is wakenet_state_t
@@ -36,7 +38,7 @@ typedef struct afe_fetch_result_t
     int wakenet_model_index;                // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
     afe_vad_state_t vad_state;              // the value is afe_vad_state_t
     int trigger_channel_id;                 // the channel index of output
-    int wake_word_length;                   // the length of wake word. It's unit is the number of samples.
+    int wake_word_length;                   // the length of wake word. The unit is the number of samples.
     int ret_value;                          // the return state of fetch function
     void* reserved;                         // reserved for future use
 } afe_fetch_result_t;
@@ -112,7 +114,7 @@ typedef afe_fetch_result_t* (*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *af
  * @brief reset ringbuf of AFE.
  *
  * @param afe          The AFE_SR object to query
- * @return             -1: fail, 0: success
+ * @return             -1: fail, 1: success
  */
 typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
 
@@ -122,7 +124,7 @@ typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
  *
  * @param afe                The AFE_SR object to query
  * @param wakenet_word       The wakenet word, should be DEFAULT_WAKE_WORD or EXTRA_WAKE_WORD
- * @return             0: fail, 1: success
+ * @return             -1: fail, 1: success
  */
 typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* model_name);
 
@@ -130,7 +132,7 @@ typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* m
  * @brief Disable wakenet model.
  *
  * @param afe          The AFE_SR object to query
- * @return             0: fail, 1: success
+ * @return             -1: fail, 0: disabled, 1: enabled
  */
 typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
 
@@ -138,7 +140,7 @@ typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
  * @brief Enable wakenet model.
  *
  * @param afe          The AFE_SR object to query
- * @return             0: fail, 1: success
+ * @return             -1: fail, 0: disabled, 1: enabled
  */
 typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
 
@@ -146,7 +148,7 @@ typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
  * @brief Disable AEC algorithm.
  *
  * @param afe          The AFE_SR object to query
- * @return             0: fail, 1: success
+ * @return             -1: fail, 0: disabled, 1: enabled
  */
 typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
 
@@ -154,7 +156,7 @@ typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
  * @brief Enable AEC algorithm.
  *
  * @param afe          The AFE_SR object to query
- * @return             0: fail, 1: success
+ * @return             -1: fail, 0: disabled, 1: enabled
  */
 typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
 
@@ -162,7 +164,7 @@ typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
  * @brief Disable SE algorithm.
  *
  * @param afe          The AFE_SR object to query
- * @return             0: fail, 1: success
+ * @return             -1: fail, 0: disabled, 1: enabled
  */
 typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
 
@@ -170,7 +172,7 @@ typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
  * @brief Enable SE algorithm.
  *
  * @param afe          The AFE_SR object to query
- * @return             0: fail, 1: success
+ * @return             -1: fail, 0: disabled, 1: enabled
  */
 typedef int (*esp_afe_sr_iface_op_enable_se_t)(esp_afe_sr_data_t *afe);
 
diff --git a/include/esp32/esp_afe_sr_models.h b/include/esp32/esp_afe_sr_models.h
index feaad43..39de63f 100644
--- a/include/esp32/esp_afe_sr_models.h
+++ b/include/esp32/esp_afe_sr_models.h
@@ -4,7 +4,6 @@
 extern "C" {
 #endif
 
-#if defined CONFIG_USE_AFE
 #include "esp_afe_sr_iface.h"
 
 
@@ -19,17 +18,6 @@ extern const esp_afe_sr_iface_t esp_afe_vc_v1;
 #endif
 
 
-#else
-
-
-#include "esp_afe_sr_iface.h"
-extern const esp_afe_sr_iface_t esp_afe_sr_v1;
-extern const esp_afe_sr_iface_t esp_afe_vc_v1;
-#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
-#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
-
-#endif
-
 #ifdef __cplusplus
 }
 #endif
\ No newline at end of file
diff --git a/include/esp32/esp_nsn_models.h b/include/esp32/esp_nsn_models.h
index 0a7e334..8165e27 100644
--- a/include/esp32/esp_nsn_models.h
+++ b/include/esp32/esp_nsn_models.h
@@ -2,8 +2,16 @@
 
 #include "esp_nsn_iface.h"
 
-// The prefix of nsnet model name is used to filter all wakenet from availabel models.
+/*
+The prefix of nset
+Now there are nsnet1 and nsnet2
+*/
 #define ESP_NSNET_PREFIX "nsnet"
 
-extern const esp_nsn_iface_t esp_nsnet1_quantized;
-#define ESP_NSN_HANDLE esp_nsnet1_quantized
\ No newline at end of file
+/**
+ * @brief Get the nsnet handle from model name
+ *
+ * @param model_name   The name of model 
+ * @returns The handle of multinet
+ */
+esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);
diff --git a/include/esp32/esp_vad.h b/include/esp32/esp_vad.h
index 2440d39..90f8e20 100644
--- a/include/esp32/esp_vad.h
+++ b/include/esp32/esp_vad.h
@@ -25,22 +25,65 @@ extern "C" {
 
 /**
  * @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
- * restrictive in reporting speech.
+ * restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
  */
 typedef enum {
-    VAD_MODE_0 = 0,
-    VAD_MODE_1,
-    VAD_MODE_2,
-    VAD_MODE_3,
-    VAD_MODE_4
+    VAD_MODE_0 = 0,  // Normal
+    VAD_MODE_1,      // Aggressive
+    VAD_MODE_2,      // Very Aggressive
+    VAD_MODE_3,      // Very Very Aggressive
+    VAD_MODE_4       // Very Very Very Aggressive
 } vad_mode_t;
 
 typedef enum {
     VAD_SILENCE = 0,
-    VAD_SPEECH
+    VAD_SPEECH = 1,
 } vad_state_t;
 
-typedef void* vad_handle_t;
+typedef struct vad_trigger_tag {
+    vad_state_t state;
+    unsigned int min_speech_len;
+    unsigned int noise_len;
+    unsigned int min_noise_len;
+    unsigned int speech_len;
+} vad_trigger_t;
+
+#define vad_MAX_LEN INT32_MAX - 1
+/**
+ * @brief Allocate wakenet trigger
+ * 
+ * @param min_speech_len  Minimum frame number of speech duration
+ * @param min_noise_len   Minimum frame number of noise duration
+ * 
+ * @return Trigger pointer
+ **/
+vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
+
+/**
+ * @brief Free wakenet trigger
+ **/
+void vad_trigger_free(vad_trigger_t *trigger);
+
+/**
+ * @brief Reset wakenet trigger
+ **/
+void vad_trigger_reset(vad_trigger_t *trigger);
+
+/**
+ * @brief detect activaty voice by trigger
+ **/
+vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
+
+
+typedef struct {
+    vad_trigger_t *trigger;
+    void *vad_inst;
+}vad_handle_with_trigger_t;
+
+typedef vad_handle_with_trigger_t* vad_handle_t;
+
+// typedef vad_handle_tag * vad_handle_t;
+
 
 /**
  * @brief Creates an instance to the VAD structure.
@@ -53,6 +96,18 @@ typedef void* vad_handle_t;
  */
 vad_handle_t vad_create(vad_mode_t vad_mode);
 
+/**
+ * @brief Creates an instance to the VAD structure.
+ *
+ * @param vad_mode          Sets the VAD operating mode.
+ * @param min_speech_len    Minimum frame number of speech duration
+ * @param min_noise_len     Minimum frame number of noise duration
+ * @return
+ *         - NULL: Create failed
+ *         - Others: The instance of VAD
+ */
+vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
+
 /**
  * @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
  *
diff --git a/include/esp32/flite_g2p.h b/include/esp32/flite_g2p.h
index 55aeaa6..e91425e 100644
--- a/include/esp32/flite_g2p.h
+++ b/include/esp32/flite_g2p.h
@@ -9,7 +9,7 @@ typedef struct {
 
 void flite_g2p_result_free(flite_g2p_result *result);
 
-flite_g2p_result *flite_g2p_get_result(char *grapheme);
+flite_g2p_result *flite_g2p_get_result(const char *grapheme);
 
 void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
 
diff --git a/include/esp32p4/esp_afe_config.h b/include/esp32p4/esp_afe_config.h
index 6cac4c6..c32689d 100644
--- a/include/esp32p4/esp_afe_config.h
+++ b/include/esp32p4/esp_afe_config.h
@@ -92,6 +92,10 @@ typedef struct {
     char *afe_ns_model_name;
     bool fixed_first_channel;                // If true, the channel after first wake-up is fixed to raw data of microphone
                                              // otherwise, select channel number by wakenet
+    char *vad_model_name;                    // The model name of vad, support vadnet1 and vadnet1_small
+    int vad_min_speech_ms;                   // The minimum duration of speech in ms. It should be bigger than 32 ms
+    int vad_min_noise_ms;                    // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
+    bool vad_mute_playback;                  // If true, the playback will be muted for vad detection
 } afe_config_t;
 
 
@@ -126,6 +130,10 @@ typedef struct {
     .afe_ns_mode = NS_MODE_SSP, \
     .afe_ns_model_name = NULL, \
     .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
 }
 #elif CONFIG_IDF_TARGET_ESP32P4
 #define AFE_CONFIG_DEFAULT() { \
@@ -158,6 +166,10 @@ typedef struct {
     .afe_ns_mode = NS_MODE_SSP, \
     .afe_ns_model_name = NULL, \
     .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
 }
 #elif CONFIG_IDF_TARGET_ESP32S3
 #define AFE_CONFIG_DEFAULT() { \
@@ -190,6 +202,10 @@ typedef struct {
     .afe_ns_mode = NS_MODE_SSP, \
     .afe_ns_model_name = NULL, \
     .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
 }
 #endif
 
diff --git a/include/esp32p4/esp_afe_sr_iface.h b/include/esp32p4/esp_afe_sr_iface.h
index 0b52ea4..84d7000 100644
--- a/include/esp32p4/esp_afe_sr_iface.h
+++ b/include/esp32p4/esp_afe_sr_iface.h
@@ -29,6 +29,8 @@ typedef struct afe_fetch_result_t
 {
     int16_t *data;                          // the data of audio.
     int data_size;                          // the size of data. The unit is byte.
+    int16_t *vad_cache;                     // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
+    int vad_cache_size;                     // the size of vad_cache. The unit is byte.
     float data_volume;                      // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
                                             // if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length. 
     wakenet_state_t wakeup_state;           // the value is wakenet_state_t
@@ -36,7 +38,7 @@ typedef struct afe_fetch_result_t
     int wakenet_model_index;                // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
     afe_vad_state_t vad_state;              // the value is afe_vad_state_t
     int trigger_channel_id;                 // the channel index of output
-    int wake_word_length;                   // the length of wake word. It's unit is the number of samples.
+    int wake_word_length;                   // the length of wake word. The unit is the number of samples.
     int ret_value;                          // the return state of fetch function
     void* reserved;                         // reserved for future use
 } afe_fetch_result_t;
diff --git a/include/esp32p4/esp_afe_sr_models.h b/include/esp32p4/esp_afe_sr_models.h
index feaad43..39de63f 100644
--- a/include/esp32p4/esp_afe_sr_models.h
+++ b/include/esp32p4/esp_afe_sr_models.h
@@ -4,7 +4,6 @@
 extern "C" {
 #endif
 
-#if defined CONFIG_USE_AFE
 #include "esp_afe_sr_iface.h"
 
 
@@ -19,17 +18,6 @@ extern const esp_afe_sr_iface_t esp_afe_vc_v1;
 #endif
 
 
-#else
-
-
-#include "esp_afe_sr_iface.h"
-extern const esp_afe_sr_iface_t esp_afe_sr_v1;
-extern const esp_afe_sr_iface_t esp_afe_vc_v1;
-#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
-#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
-
-#endif
-
 #ifdef __cplusplus
 }
 #endif
\ No newline at end of file
diff --git a/include/esp32p4/esp_vad.h b/include/esp32p4/esp_vad.h
index 2440d39..90f8e20 100644
--- a/include/esp32p4/esp_vad.h
+++ b/include/esp32p4/esp_vad.h
@@ -25,22 +25,65 @@ extern "C" {
 
 /**
  * @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
- * restrictive in reporting speech.
+ * restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
  */
 typedef enum {
-    VAD_MODE_0 = 0,
-    VAD_MODE_1,
-    VAD_MODE_2,
-    VAD_MODE_3,
-    VAD_MODE_4
+    VAD_MODE_0 = 0,  // Normal
+    VAD_MODE_1,      // Aggressive
+    VAD_MODE_2,      // Very Aggressive
+    VAD_MODE_3,      // Very Very Aggressive
+    VAD_MODE_4       // Very Very Very Aggressive
 } vad_mode_t;
 
 typedef enum {
     VAD_SILENCE = 0,
-    VAD_SPEECH
+    VAD_SPEECH = 1,
 } vad_state_t;
 
-typedef void* vad_handle_t;
+typedef struct vad_trigger_tag {
+    vad_state_t state;
+    unsigned int min_speech_len;
+    unsigned int noise_len;
+    unsigned int min_noise_len;
+    unsigned int speech_len;
+} vad_trigger_t;
+
+#define vad_MAX_LEN INT32_MAX - 1
+/**
+ * @brief Allocate wakenet trigger
+ * 
+ * @param min_speech_len  Minimum frame number of speech duration
+ * @param min_noise_len   Minimum frame number of noise duration
+ * 
+ * @return Trigger pointer
+ **/
+vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
+
+/**
+ * @brief Free wakenet trigger
+ **/
+void vad_trigger_free(vad_trigger_t *trigger);
+
+/**
+ * @brief Reset wakenet trigger
+ **/
+void vad_trigger_reset(vad_trigger_t *trigger);
+
+/**
+ * @brief detect activaty voice by trigger
+ **/
+vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
+
+
+typedef struct {
+    vad_trigger_t *trigger;
+    void *vad_inst;
+}vad_handle_with_trigger_t;
+
+typedef vad_handle_with_trigger_t* vad_handle_t;
+
+// typedef vad_handle_tag * vad_handle_t;
+
 
 /**
  * @brief Creates an instance to the VAD structure.
@@ -53,6 +96,18 @@ typedef void* vad_handle_t;
  */
 vad_handle_t vad_create(vad_mode_t vad_mode);
 
+/**
+ * @brief Creates an instance to the VAD structure.
+ *
+ * @param vad_mode          Sets the VAD operating mode.
+ * @param min_speech_len    Minimum frame number of speech duration
+ * @param min_noise_len     Minimum frame number of noise duration
+ * @return
+ *         - NULL: Create failed
+ *         - Others: The instance of VAD
+ */
+vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
+
 /**
  * @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
  *
diff --git a/include/esp32p4/esp_vadn_iface.h b/include/esp32p4/esp_vadn_iface.h
new file mode 100644
index 0000000..1ec8bb9
--- /dev/null
+++ b/include/esp32p4/esp_vadn_iface.h
@@ -0,0 +1,142 @@
+#pragma once
+#include "esp_vad.h"
+#include "stdint.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Opaque model data container
+typedef struct model_iface_data_t model_iface_data_t;
+
+// /**
+//  * @brief The state of vad
+//  */
+// typedef enum {
+//     VAD_NOISE = -1,  // Noise
+//     VADNET_STATE_SILENCE = 0, // Silence
+//     VAD_SPEECH = 1   // Speech
+// } vad_state_t;
+
+/**
+ * @brief Easy function type to initialze a model instance with a detection mode
+ * and specified model name
+ *
+ * @param model_name  The specified model name
+ * @param mode        The voice activity detection mode
+ * @param channel_num The number of input audio channels
+ * @param min_speech_ms  The minimum duration of speech in ms to trigger vad
+ * speech
+ * @param min_noise_ms   The minimum duration of noise in ms to trigger vad
+ * noise
+ * @returns Handle to the model data
+ */
+typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
+    const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
+
+/**
+ * @brief Get the amount of samples that need to be passed to the detect
+ * function
+ *
+ * Every speech recognition model processes a certain number of samples at the
+ * same time. This function can be used to query that amount. Note that the
+ * returned amount is in 16-bit samples, not in bytes.
+ *
+ * @param model The model object to query
+ * @return The amount of samples to feed the detect function
+ */
+typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
+
+/**
+ * @brief Get the channel number of samples that need to be passed to the detect
+ * function
+ *
+ * Every speech recognition model processes a certain number of samples at the
+ * same time. This function can be used to query that amount. Note that the
+ * returned amount is in 16-bit samples, not in bytes.
+ *
+ * @param model The model object to query
+ * @return The amount of samples to feed the detect function
+ */
+typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
+
+/**
+ * @brief Get the sample rate of the samples to feed to the detect function
+ *
+ * @param model The model object to query
+ * @return The sample rate, in hz
+ */
+typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
+
+/**
+ * @brief Set the detection threshold to manually abjust the probability
+ *
+ * @param model The model object to query
+ * @param det_treshold The threshold to trigger wake words, the range of
+ * det_threshold is 0.5~0.9999
+ * @return 0: setting failed, 1: setting success
+ */
+typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
+
+/**
+ * @brief Get the voice activity detection threshold
+ *
+ * @param model The model object to query
+ * @returns the detection threshold
+ */
+typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
+
+/**
+ * @brief Feed samples of an audio stream to the vad model and detect whether is
+ * voice.
+ *
+ * @param model The model object to query
+ * @param samples An array of 16-bit signed audio samples. The array size used
+ * can be queried by the get_samp_chunksize function.
+ * @return The index of wake words, return 0 if no wake word is detected, else
+ * the index of the wake words.
+ */
+typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
+
+/**
+ * @brief Get the triggered channel index. Channel index starts from zero
+ *
+ * @param model The model object to query
+ * @return The channel index
+ */
+typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
+
+/**
+ * @brief Clean all states of model
+ *
+ * @param model The model object to query
+ */
+typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
+
+/**
+ * @brief Destroy a model object
+ *
+ * @param model Model object to destroy
+ */
+typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
+
+/**
+ * This structure contains the functions used to do operations on a voice
+ * activity detection model.
+ */
+typedef struct {
+    esp_vadn_iface_op_create_t create;
+    esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
+    esp_vadn_iface_op_get_channel_num_t get_channel_num;
+    esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
+    esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
+    esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
+    esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
+    esp_vadn_iface_op_detect_t detect;
+    esp_vadn_iface_op_clean_t clean;
+    esp_vadn_iface_op_destroy_t destroy;
+} esp_vadn_iface_t;
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/include/esp32p4/esp_vadn_models.h b/include/esp32p4/esp_vadn_models.h
new file mode 100644
index 0000000..eadc55f
--- /dev/null
+++ b/include/esp32p4/esp_vadn_models.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "esp_vadn_iface.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The prefix of vadnet model name is used to filter all wakenet from availabel models.
+#define ESP_VADN_PREFIX "vadnet"
+
+/**
+ * @brief Get the wakenet handle from model name
+ *
+ * @param model_name   The name of model 
+ * @returns The handle of wakenet
+ */
+const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/esp32s3/esp_afe_config.h b/include/esp32s3/esp_afe_config.h
index 6cac4c6..5f70735 100644
--- a/include/esp32s3/esp_afe_config.h
+++ b/include/esp32s3/esp_afe_config.h
@@ -92,6 +92,10 @@ typedef struct {
     char *afe_ns_model_name;
     bool fixed_first_channel;                // If true, the channel after first wake-up is fixed to raw data of microphone
                                              // otherwise, select channel number by wakenet
+    char *vad_model_name;                    // The model name of vad, support vadnet1 and vadnet1_small
+    int vad_min_speech_ms;                   // The minimum duration of speech in ms. It should be bigger than 32 ms
+    int vad_min_noise_ms;                    // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
+    bool vad_mute_playback;                  // If true, the playback will be muted for vad detection
 } afe_config_t;
 
 
@@ -104,7 +108,7 @@ typedef struct {
     .voice_communication_init = false, \
     .voice_communication_agc_init = false, \
     .voice_communication_agc_gain = 15, \
-    .vad_mode = VAD_MODE_3, \
+    .vad_mode = VAD_MODE_0, \
     .wakenet_model_name = NULL, \
     .wakenet_model_name_2 = NULL, \
     .wakenet_mode = DET_MODE_90, \
@@ -126,6 +130,10 @@ typedef struct {
     .afe_ns_mode = NS_MODE_SSP, \
     .afe_ns_model_name = NULL, \
     .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
 }
 #elif CONFIG_IDF_TARGET_ESP32P4
 #define AFE_CONFIG_DEFAULT() { \
@@ -136,7 +144,7 @@ typedef struct {
     .voice_communication_init = false, \
     .voice_communication_agc_init = false, \
     .voice_communication_agc_gain = 15, \
-    .vad_mode = VAD_MODE_3, \
+    .vad_mode = VAD_MODE_0, \
     .wakenet_model_name = NULL, \
     .wakenet_model_name_2 = NULL, \
     .wakenet_mode = DET_MODE_90, \
@@ -158,6 +166,10 @@ typedef struct {
     .afe_ns_mode = NS_MODE_SSP, \
     .afe_ns_model_name = NULL, \
     .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
 }
 #elif CONFIG_IDF_TARGET_ESP32S3
 #define AFE_CONFIG_DEFAULT() { \
@@ -168,7 +180,7 @@ typedef struct {
     .voice_communication_init = false, \
     .voice_communication_agc_init = false, \
     .voice_communication_agc_gain = 15, \
-    .vad_mode = VAD_MODE_3, \
+    .vad_mode = VAD_MODE_0, \
     .wakenet_model_name = NULL, \
     .wakenet_model_name_2 = NULL, \
     .wakenet_mode = DET_MODE_2CH_90, \
@@ -190,6 +202,10 @@ typedef struct {
     .afe_ns_mode = NS_MODE_SSP, \
     .afe_ns_model_name = NULL, \
     .fixed_first_channel = true, \
+    .vad_model_name = NULL, \
+    .vad_min_speech_ms = 64, \
+    .vad_min_noise_ms = 256, \
+    .vad_mute_playback = false, \
 }
 #endif
 
diff --git a/include/esp32s3/esp_afe_sr_iface.h b/include/esp32s3/esp_afe_sr_iface.h
index 0b52ea4..84d7000 100644
--- a/include/esp32s3/esp_afe_sr_iface.h
+++ b/include/esp32s3/esp_afe_sr_iface.h
@@ -29,6 +29,8 @@ typedef struct afe_fetch_result_t
 {
     int16_t *data;                          // the data of audio.
     int data_size;                          // the size of data. The unit is byte.
+    int16_t *vad_cache;                     // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
+    int vad_cache_size;                     // the size of vad_cache. The unit is byte.
     float data_volume;                      // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
                                             // if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length. 
     wakenet_state_t wakeup_state;           // the value is wakenet_state_t
@@ -36,7 +38,7 @@ typedef struct afe_fetch_result_t
     int wakenet_model_index;                // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
     afe_vad_state_t vad_state;              // the value is afe_vad_state_t
     int trigger_channel_id;                 // the channel index of output
-    int wake_word_length;                   // the length of wake word. It's unit is the number of samples.
+    int wake_word_length;                   // the length of wake word. The unit is the number of samples.
     int ret_value;                          // the return state of fetch function
     void* reserved;                         // reserved for future use
 } afe_fetch_result_t;
diff --git a/include/esp32s3/esp_afe_sr_models.h b/include/esp32s3/esp_afe_sr_models.h
index feaad43..39de63f 100644
--- a/include/esp32s3/esp_afe_sr_models.h
+++ b/include/esp32s3/esp_afe_sr_models.h
@@ -4,7 +4,6 @@
 extern "C" {
 #endif
 
-#if defined CONFIG_USE_AFE
 #include "esp_afe_sr_iface.h"
 
 
@@ -19,17 +18,6 @@ extern const esp_afe_sr_iface_t esp_afe_vc_v1;
 #endif
 
 
-#else
-
-
-#include "esp_afe_sr_iface.h"
-extern const esp_afe_sr_iface_t esp_afe_sr_v1;
-extern const esp_afe_sr_iface_t esp_afe_vc_v1;
-#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
-#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
-
-#endif
-
 #ifdef __cplusplus
 }
 #endif
\ No newline at end of file
diff --git a/include/esp32s3/esp_vad.h b/include/esp32s3/esp_vad.h
index 2440d39..90f8e20 100644
--- a/include/esp32s3/esp_vad.h
+++ b/include/esp32s3/esp_vad.h
@@ -25,22 +25,65 @@ extern "C" {
 
 /**
  * @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
- * restrictive in reporting speech.
+ * restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
  */
 typedef enum {
-    VAD_MODE_0 = 0,
-    VAD_MODE_1,
-    VAD_MODE_2,
-    VAD_MODE_3,
-    VAD_MODE_4
+    VAD_MODE_0 = 0,  // Normal
+    VAD_MODE_1,      // Aggressive
+    VAD_MODE_2,      // Very Aggressive
+    VAD_MODE_3,      // Very Very Aggressive
+    VAD_MODE_4       // Very Very Very Aggressive
 } vad_mode_t;
 
 typedef enum {
     VAD_SILENCE = 0,
-    VAD_SPEECH
+    VAD_SPEECH = 1,
 } vad_state_t;
 
-typedef void* vad_handle_t;
+typedef struct vad_trigger_tag {
+    vad_state_t state;
+    unsigned int min_speech_len;
+    unsigned int noise_len;
+    unsigned int min_noise_len;
+    unsigned int speech_len;
+} vad_trigger_t;
+
+#define vad_MAX_LEN INT32_MAX - 1
+/**
+ * @brief Allocate wakenet trigger
+ * 
+ * @param min_speech_len  Minimum frame number of speech duration
+ * @param min_noise_len   Minimum frame number of noise duration
+ * 
+ * @return Trigger pointer
+ **/
+vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
+
+/**
+ * @brief Free wakenet trigger
+ **/
+void vad_trigger_free(vad_trigger_t *trigger);
+
+/**
+ * @brief Reset wakenet trigger
+ **/
+void vad_trigger_reset(vad_trigger_t *trigger);
+
+/**
+ * @brief detect activaty voice by trigger
+ **/
+vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
+
+
+typedef struct {
+    vad_trigger_t *trigger;
+    void *vad_inst;
+}vad_handle_with_trigger_t;
+
+typedef vad_handle_with_trigger_t* vad_handle_t;
+
+// typedef vad_handle_tag * vad_handle_t;
+
 
 /**
  * @brief Creates an instance to the VAD structure.
@@ -53,6 +96,18 @@ typedef void* vad_handle_t;
  */
 vad_handle_t vad_create(vad_mode_t vad_mode);
 
+/**
+ * @brief Creates an instance to the VAD structure.
+ *
+ * @param vad_mode          Sets the VAD operating mode.
+ * @param min_speech_len    Minimum frame number of speech duration
+ * @param min_noise_len     Minimum frame number of noise duration
+ * @return
+ *         - NULL: Create failed
+ *         - Others: The instance of VAD
+ */
+vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
+
 /**
  * @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
  *
diff --git a/include/esp32s3/esp_vadn_iface.h b/include/esp32s3/esp_vadn_iface.h
new file mode 100644
index 0000000..1ec8bb9
--- /dev/null
+++ b/include/esp32s3/esp_vadn_iface.h
@@ -0,0 +1,142 @@
+#pragma once
+#include "esp_vad.h"
+#include "stdint.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Opaque model data container
+typedef struct model_iface_data_t model_iface_data_t;
+
+// /**
+//  * @brief The state of vad
+//  */
+// typedef enum {
+//     VAD_NOISE = -1,  // Noise
+//     VADNET_STATE_SILENCE = 0, // Silence
+//     VAD_SPEECH = 1   // Speech
+// } vad_state_t;
+
+/**
+ * @brief Easy function type to initialze a model instance with a detection mode
+ * and specified model name
+ *
+ * @param model_name  The specified model name
+ * @param mode        The voice activity detection mode
+ * @param channel_num The number of input audio channels
+ * @param min_speech_ms  The minimum duration of speech in ms to trigger vad
+ * speech
+ * @param min_noise_ms   The minimum duration of noise in ms to trigger vad
+ * noise
+ * @returns Handle to the model data
+ */
+typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
+    const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
+
+/**
+ * @brief Get the amount of samples that need to be passed to the detect
+ * function
+ *
+ * Every speech recognition model processes a certain number of samples at the
+ * same time. This function can be used to query that amount. Note that the
+ * returned amount is in 16-bit samples, not in bytes.
+ *
+ * @param model The model object to query
+ * @return The amount of samples to feed the detect function
+ */
+typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
+
+/**
+ * @brief Get the channel number of samples that need to be passed to the detect
+ * function
+ *
+ * Every speech recognition model processes a certain number of samples at the
+ * same time. This function can be used to query that amount. Note that the
+ * returned amount is in 16-bit samples, not in bytes.
+ *
+ * @param model The model object to query
+ * @return The amount of samples to feed the detect function
+ */
+typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
+
+/**
+ * @brief Get the sample rate of the samples to feed to the detect function
+ *
+ * @param model The model object to query
+ * @return The sample rate, in hz
+ */
+typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
+
+/**
+ * @brief Set the detection threshold to manually abjust the probability
+ *
+ * @param model The model object to query
+ * @param det_treshold The threshold to trigger wake words, the range of
+ * det_threshold is 0.5~0.9999
+ * @return 0: setting failed, 1: setting success
+ */
+typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
+
+/**
+ * @brief Get the voice activity detection threshold
+ *
+ * @param model The model object to query
+ * @returns the detection threshold
+ */
+typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
+
+/**
+ * @brief Feed samples of an audio stream to the vad model and detect whether is
+ * voice.
+ *
+ * @param model The model object to query
+ * @param samples An array of 16-bit signed audio samples. The array size used
+ * can be queried by the get_samp_chunksize function.
+ * @return The index of wake words, return 0 if no wake word is detected, else
+ * the index of the wake words.
+ */
+typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
+
+/**
+ * @brief Get the triggered channel index. Channel index starts from zero
+ *
+ * @param model The model object to query
+ * @return The channel index
+ */
+typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
+
+/**
+ * @brief Clean all states of model
+ *
+ * @param model The model object to query
+ */
+typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
+
+/**
+ * @brief Destroy a model object
+ *
+ * @param model Model object to destroy
+ */
+typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
+
+/**
+ * This structure contains the functions used to do operations on a voice
+ * activity detection model.
+ */
+typedef struct {
+    esp_vadn_iface_op_create_t create;
+    esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
+    esp_vadn_iface_op_get_channel_num_t get_channel_num;
+    esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
+    esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
+    esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
+    esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
+    esp_vadn_iface_op_detect_t detect;
+    esp_vadn_iface_op_clean_t clean;
+    esp_vadn_iface_op_destroy_t destroy;
+} esp_vadn_iface_t;
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/include/esp32s3/esp_vadn_models.h b/include/esp32s3/esp_vadn_models.h
new file mode 100644
index 0000000..eadc55f
--- /dev/null
+++ b/include/esp32s3/esp_vadn_models.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "esp_vadn_iface.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The prefix of vadnet model name is used to filter all wakenet from availabel models.
+#define ESP_VADN_PREFIX "vadnet"
+
+/**
+ * @brief Get the wakenet handle from model name
+ *
+ * @param model_name   The name of model 
+ * @returns The handle of wakenet
+ */
+const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/lib/esp32/libc_speech_features.a b/lib/esp32/libc_speech_features.a
index 4cfc32c..a105141 100644
Binary files a/lib/esp32/libc_speech_features.a and b/lib/esp32/libc_speech_features.a differ
diff --git a/lib/esp32/libdl_lib.a b/lib/esp32/libdl_lib.a
index 173deb4..97717e0 100644
Binary files a/lib/esp32/libdl_lib.a and b/lib/esp32/libdl_lib.a differ
diff --git a/lib/esp32/libesp_audio_front_end.a b/lib/esp32/libesp_audio_front_end.a
index cd719c4..41d45b4 100644
Binary files a/lib/esp32/libesp_audio_front_end.a and b/lib/esp32/libesp_audio_front_end.a differ
diff --git a/lib/esp32/libesp_audio_processor.a b/lib/esp32/libesp_audio_processor.a
index 71d2ef1..8cdf8cf 100644
Binary files a/lib/esp32/libesp_audio_processor.a and b/lib/esp32/libesp_audio_processor.a differ
diff --git a/lib/esp32/libmultinet.a b/lib/esp32/libmultinet.a
index 526f735..024f5c9 100644
Binary files a/lib/esp32/libmultinet.a and b/lib/esp32/libmultinet.a differ
diff --git a/lib/esp32/libwakenet.a b/lib/esp32/libwakenet.a
index 81960a7..cee6bd3 100644
Binary files a/lib/esp32/libwakenet.a and b/lib/esp32/libwakenet.a differ
diff --git a/lib/esp32/libwakeword_model.a b/lib/esp32/libwakeword_model.a
index 44714b8..b17e140 100644
Binary files a/lib/esp32/libwakeword_model.a and b/lib/esp32/libwakeword_model.a differ
diff --git a/lib/esp32p4/libdl_lib.a b/lib/esp32p4/libdl_lib.a
index f6c1fda..664b727 100644
Binary files a/lib/esp32p4/libdl_lib.a and b/lib/esp32p4/libdl_lib.a differ
diff --git a/lib/esp32p4/libesp_audio_front_end.a b/lib/esp32p4/libesp_audio_front_end.a
index 6ea5129..0bcdd96 100644
Binary files a/lib/esp32p4/libesp_audio_front_end.a and b/lib/esp32p4/libesp_audio_front_end.a differ
diff --git a/lib/esp32p4/libesp_audio_processor.a b/lib/esp32p4/libesp_audio_processor.a
index 730fb62..a4b6de2 100644
Binary files a/lib/esp32p4/libesp_audio_processor.a and b/lib/esp32p4/libesp_audio_processor.a differ
diff --git a/lib/esp32p4/libmultinet.a b/lib/esp32p4/libmultinet.a
index e31499d..1c73d70 100644
Binary files a/lib/esp32p4/libmultinet.a and b/lib/esp32p4/libmultinet.a differ
diff --git a/lib/esp32p4/libvadnet.a b/lib/esp32p4/libvadnet.a
new file mode 100644
index 0000000..b654035
Binary files /dev/null and b/lib/esp32p4/libvadnet.a differ
diff --git a/lib/esp32p4/libwakenet.a b/lib/esp32p4/libwakenet.a
index a10f40a..4080869 100644
Binary files a/lib/esp32p4/libwakenet.a and b/lib/esp32p4/libwakenet.a differ
diff --git a/lib/esp32s3/libc_speech_features.a b/lib/esp32s3/libc_speech_features.a
index 1cd372e..108af2e 100644
Binary files a/lib/esp32s3/libc_speech_features.a and b/lib/esp32s3/libc_speech_features.a differ
diff --git a/lib/esp32s3/libdl_lib.a b/lib/esp32s3/libdl_lib.a
index 21626fa..29525a6 100644
Binary files a/lib/esp32s3/libdl_lib.a and b/lib/esp32s3/libdl_lib.a differ
diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a
index ed917cb..7c1a1cc 100644
Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ
diff --git a/lib/esp32s3/libesp_audio_processor.a b/lib/esp32s3/libesp_audio_processor.a
index 8e8db4e..a444b22 100644
Binary files a/lib/esp32s3/libesp_audio_processor.a and b/lib/esp32s3/libesp_audio_processor.a differ
diff --git a/lib/esp32s3/libflite_g2p.a b/lib/esp32s3/libflite_g2p.a
index 76538e2..6a99a57 100644
Binary files a/lib/esp32s3/libflite_g2p.a and b/lib/esp32s3/libflite_g2p.a differ
diff --git a/lib/esp32s3/libfst.a b/lib/esp32s3/libfst.a
index 086a928..a2dd373 100644
Binary files a/lib/esp32s3/libfst.a and b/lib/esp32s3/libfst.a differ
diff --git a/lib/esp32s3/libhufzip.a b/lib/esp32s3/libhufzip.a
index b790f14..c0465b1 100644
Binary files a/lib/esp32s3/libhufzip.a and b/lib/esp32s3/libhufzip.a differ
diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a
index b7418f8..319a43c 100644
Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ
diff --git a/lib/esp32s3/libnsnet.a b/lib/esp32s3/libnsnet.a
index 3b00050..7cca9b0 100644
Binary files a/lib/esp32s3/libnsnet.a and b/lib/esp32s3/libnsnet.a differ
diff --git a/lib/esp32s3/libvadnet.a b/lib/esp32s3/libvadnet.a
new file mode 100644
index 0000000..e07fec7
Binary files /dev/null and b/lib/esp32s3/libvadnet.a differ
diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a
index 82c5c27..16d6ec9 100644
Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ
diff --git a/model/movemodel.py b/model/movemodel.py
index b49aa8a..e3bb4e0 100644
--- a/model/movemodel.py
+++ b/model/movemodel.py
@@ -28,6 +28,8 @@ def copy_wakenet_from_sdkconfig(model_path, sdkconfig_path, target_path):
         for label in f:
             label = label.strip("\n")
             if 'CONFIG_SR_WN' in label and  '#' not in label[0]:
+                if '_NONE' in label:
+                    continue
                 if '=' in label:
                     label = label.split("=")[0]
                 if '_MULTI' in label:
@@ -113,13 +115,13 @@ def copy_vadnet_from_sdkconfig(model_path, sdkconfig_path, target_path):
         models_string = ''
         for label in f:
             label = label.strip("\n")
-            if 'CONFIG_SR_VADNET' in label and label[0] != '#':
+            if 'CONFIG_SR_VADN' in label and label[0] != '#':
                 models_string += label
 
     models = []
-    if "CONFIG_SR_VADNET_MODLE_SMALL" in models_string:
+    if "CONFIG_SR_VADN_VADNET1_SMALL" in models_string:
         models.append('vadnet1_small')
-    elif "CONFIG_SR_VADNET_MODLE_MEDIUM" in models_string:
+    elif "CONFIG_SR_VADN_VADNET1_MEDIUM" in models_string:
         models.append('vadnet1_medium')
     
     for item in models:
diff --git a/model/vadnet_model/vadnet1_medium/_MODEL_INFO_ b/model/vadnet_model/vadnet1_medium/_MODEL_INFO_
new file mode 100644
index 0000000..5ba7d5f
--- /dev/null
+++ b/model/vadnet_model/vadnet1_medium/_MODEL_INFO_
@@ -0,0 +1 @@
+vadnet1_mediumv1_Speech_3_0.5_0.1
\ No newline at end of file
diff --git a/model/vadnet_model/vadnet1_medium/vadn1_data b/model/vadnet_model/vadnet1_medium/vadn1_data
new file mode 100644
index 0000000..55c694e
Binary files /dev/null and b/model/vadnet_model/vadnet1_medium/vadn1_data differ
diff --git a/model/vadnet_model/vadnet1_medium/vadn1_index b/model/vadnet_model/vadnet1_medium/vadn1_index
new file mode 100644
index 0000000..9ce8fa7
Binary files /dev/null and b/model/vadnet_model/vadnet1_medium/vadn1_index differ
diff --git a/model/wakenet_model/wn9_nihaodameng.zip b/model/wakenet_model/wn9_nihaodameng.zip
new file mode 100644
index 0000000..e33779c
Binary files /dev/null and b/model/wakenet_model/wn9_nihaodameng.zip differ
diff --git a/model/wakenet_model/wn9_nihaodameng/_MODEL_INFO_ b/model/wakenet_model/wn9_nihaodameng/_MODEL_INFO_
new file mode 100644
index 0000000..6d28a56
--- /dev/null
+++ b/model/wakenet_model/wn9_nihaodameng/_MODEL_INFO_
@@ -0,0 +1 @@
+wakenet9l_tts2h12_你好达蒙_3_0.634_0.640
diff --git a/model/wakenet_model/wn9_nihaodameng/wn9_data b/model/wakenet_model/wn9_nihaodameng/wn9_data
new file mode 100644
index 0000000..1b13ec6
Binary files /dev/null and b/model/wakenet_model/wn9_nihaodameng/wn9_data differ
diff --git a/model/wakenet_model/wn9_nihaodameng/wn9_index b/model/wakenet_model/wn9_nihaodameng/wn9_index
new file mode 100644
index 0000000..5e7c881
Binary files /dev/null and b/model/wakenet_model/wn9_nihaodameng/wn9_index differ
diff --git a/src/esp_process_sdkconfig.c b/src/esp_process_sdkconfig.c
index 1fe6d09..626e195 100644
--- a/src/esp_process_sdkconfig.c
+++ b/src/esp_process_sdkconfig.c
@@ -57,8 +57,16 @@ void check_chip_config(void)
     ESP_LOGW(TAG, "PSRAM freq should be 200MHz");
 #endif
 
+#ifdef CONFIG_ESP32P4_DATA_CACHE_128KB
+    ESP_LOGW(TAG, "Recommend data cache larger than 128KB");
+#endif
+
+#ifdef CONFIG_ESP32P4_DATA_CACHE_LINE_64B
+    ESP_LOGW(TAG, "Recommend data cache line larger than 64B");
+#endif
+
 #else
-    ESP_LOGW(TAG, "ESP-SR-AFE only support ESP32/ESP32S3");
+    ESP_LOGW(TAG, "ESP-SR-AFE only support ESP32/ESP32S3/ESP32P4");
 #endif
 }
 
@@ -476,7 +484,7 @@ char *get_id_name_cn(int i)
 
 char *get_id_name_en(int i)
 {
-#if defined CONFIG_USE_MULTINET && defined CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8
+#if CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8
     if (i == 0) {
         return CONFIG_EN_SPEECH_COMMAND_ID0;
     } else if (i == 1) {
diff --git a/test_apps/esp-sr/main/test_afe.cpp b/test_apps/esp-sr/main/test_afe.cpp
index 35fa630..975d5fd 100644
--- a/test_apps/esp-sr/main/test_afe.cpp
+++ b/test_apps/esp-sr/main/test_afe.cpp
@@ -23,6 +23,8 @@
 #if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4)
 #include "esp_nsn_models.h"
 #include "esp_nsn_iface.h"
+#include "esp_vadn_models.h"
+#include "esp_vadn_iface.h"
 #endif
 
 #define ARRAY_SIZE_OFFSET                   8       // Increase this if audio_sys_get_real_time_stats returns ESP_ERR_INVALID_SIZE
@@ -69,6 +71,10 @@ TEST_CASE(">>>>>>>> audio_front_end SR create/destroy API & memory leak <<<<<<<<
                     int start_internal_size = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
                     srmodel_list_t *models = esp_srmodel_init("model");
                     char *model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
+                    char *vad_model_name = NULL;
+#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4)
+                    vad_model_name = esp_srmodel_filter(models, ESP_VADN_PREFIX, NULL);
+#endif
 
                     esp_afe_sr_iface_t *afe_handle = (esp_afe_sr_iface_t *)&ESP_AFE_SR_HANDLE;
                     afe_config_t afe_config = AFE_CONFIG_DEFAULT();
@@ -79,7 +85,10 @@ TEST_CASE(">>>>>>>> audio_front_end SR create/destroy API & memory leak <<<<<<<<
                     afe_config.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM;
                     afe_config.wakenet_model_name = model_name;
                     afe_config.voice_communication_init = false;
-
+                    afe_config.vad_model_name = vad_model_name;
+                    if (vad_model_name) {
+                        printf("vad_model_name:%s\n", vad_model_name);
+                    }
 
                     // test model loading time
                     struct timeval tv_start, tv_end;
@@ -106,7 +115,11 @@ TEST_CASE(">>>>>>>> audio_front_end SR create/destroy API & memory leak <<<<<<<<
                         printf("init partition ...\n");
                         models = esp_srmodel_init("model");
                         model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
+#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4)
+                        vad_model_name = esp_srmodel_filter(models, ESP_VADN_PREFIX, NULL);
+#endif
                         afe_config.wakenet_model_name = model_name;
+                        afe_config.vad_model_name = vad_model_name;
 
                         printf("create ...\n");
                         afe_data = afe_handle->create_from_config(&afe_config);
diff --git a/test_apps/esp-sr/main/test_multinet.cpp b/test_apps/esp-sr/main/test_multinet.cpp
index 49778f0..9db0a1c 100644
--- a/test_apps/esp-sr/main/test_multinet.cpp
+++ b/test_apps/esp-sr/main/test_multinet.cpp
@@ -93,6 +93,7 @@ TEST_CASE("multinet cpu loading", "[mn]")
     struct timeval tv_start, tv_end;
     gettimeofday(&tv_start, NULL);
     esp_mn_state_t mn_state;
+    multinet->print_active_speech_commands(model_data);
 
     while (1) {
         if ((chunks + 1)*audio_chunksize <= data_size) {
diff --git a/test_apps/esp-sr/pytest_esp_sr.py b/test_apps/esp-sr/pytest_esp_sr.py
index 53de838..cfd3e41 100644
--- a/test_apps/esp-sr/pytest_esp_sr.py
+++ b/test_apps/esp-sr/pytest_esp_sr.py
@@ -15,10 +15,7 @@ from pytest_embedded import Dut
     ],
 )
 def test_multinet_s3(dut: Dut)-> None:
-    # dut.run_all_single_board_cases(group="mn")
-    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[mn]')
-    dut.expect_unity_test_output(timeout = 1000)
+    dut.run_all_single_board_cases(group="mn")
 
 @pytest.mark.target('esp32p4')
 @pytest.mark.env('esp32p4')
@@ -30,10 +27,7 @@ def test_multinet_s3(dut: Dut)-> None:
     ],
 )
 def test_multinet_p4(dut: Dut)-> None:
-    # dut.run_all_single_board_cases(group="mn")
-    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[mn]')
-    dut.expect_unity_test_output(timeout = 1000)
+    dut.run_all_single_board_cases(group="mn")
 
 
 @pytest.mark.target('esp32s3')
@@ -46,10 +40,7 @@ def test_multinet_p4(dut: Dut)-> None:
     ],
 )
 def test_wakenet(dut: Dut)-> None:
-    # dut.run_all_single_board_cases(group="wn")
-    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[wn]')
-    dut.expect_unity_test_output(timeout = 1000)
+    dut.run_all_single_board_cases(group="wn")
 
 @pytest.mark.target('esp32p4')
 @pytest.mark.env('esp32p4')
@@ -61,10 +52,7 @@ def test_wakenet(dut: Dut)-> None:
     ],
 )
 def test_wakenet_p4(dut: Dut)-> None:
-    # dut.run_all_single_board_cases(group="wn")
-    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[wn]')
-    dut.expect_unity_test_output(timeout = 1000)
+    dut.run_all_single_board_cases(group="wn")
 
 @pytest.mark.target('esp32s3')
 @pytest.mark.env('esp32s3')
@@ -72,13 +60,11 @@ def test_wakenet_p4(dut: Dut)-> None:
     'config',
     [
         'wn9_hilexin',
+        'vadnet',
     ],
 )
 def test_sr_afe(dut: Dut)-> None:
-    # dut.run_all_single_board_cases(group="afe")
-    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[afe_sr]')
-    dut.expect_unity_test_output(timeout = 1000)
+    dut.run_all_single_board_cases(group="afe_sr", timeout=100000)
 
 @pytest.mark.target('esp32p4')
 @pytest.mark.env('esp32p4')
@@ -89,10 +75,7 @@ def test_sr_afe(dut: Dut)-> None:
     ],
 )
 def test_sr_afe_p4(dut: Dut)-> None:
-    # dut.run_all_single_board_cases(group="afe")
-    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[afe_sr]')
-    dut.expect_unity_test_output(timeout = 1000)
+    dut.run_all_single_board_cases(group="afe_sr", timeout=100000)
 
 
 @pytest.mark.target('esp32s3')
@@ -104,10 +87,7 @@ def test_sr_afe_p4(dut: Dut)-> None:
     ],
 )
 def test_vc_afe(dut: Dut)-> None:
-    # dut.run_all_single_board_cases(group="afe")
-    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[afe_vc]')
-    dut.expect_unity_test_output(timeout = 100000)
+    dut.run_all_single_board_cases(group="afe_vc", timeout=100000)
 
 
 @pytest.mark.target('esp32p4')
@@ -119,7 +99,4 @@ def test_vc_afe(dut: Dut)-> None:
     ],
 )
 def test_vc_afe_p4(dut: Dut)-> None:
-    # dut.run_all_single_board_cases(group="afe")
-    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[afe_vc]')
-    dut.expect_unity_test_output(timeout = 100000)
\ No newline at end of file
+    dut.run_all_single_board_cases(group="afe_vc", timeout=100000)
\ No newline at end of file
diff --git a/test_apps/esp-sr/sdkconfig.ci.mn5q8_cn b/test_apps/esp-sr/sdkconfig.ci.mn5q8_cn
index b83bd36..ac866c2 100644
--- a/test_apps/esp-sr/sdkconfig.ci.mn5q8_cn
+++ b/test_apps/esp-sr/sdkconfig.ci.mn5q8_cn
@@ -6,6 +6,7 @@ CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
+CONFIG_SR_WN_WN9_HILEXIN=y
 CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8=y
 CONFIG_SPIRAM=y
 CONFIG_SPIRAM_MODE_OCT=y
diff --git a/test_apps/esp-sr/sdkconfig.ci.mn5q8_en b/test_apps/esp-sr/sdkconfig.ci.mn5q8_en
index fe47163..37eb87f 100644
--- a/test_apps/esp-sr/sdkconfig.ci.mn5q8_en
+++ b/test_apps/esp-sr/sdkconfig.ci.mn5q8_en
@@ -1,5 +1,5 @@
 # This file was generated using idf.py save-defconfig. It can be edited manually.
-# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
+# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
 #
 CONFIG_IDF_TARGET="esp32s3"
 CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
@@ -7,7 +7,6 @@ CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
 CONFIG_SR_WN_WN9_HIESP=y
-CONFIG_SR_MN_CN_NONE=y
 CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8=y
 CONFIG_ESP_PHY_REDUCE_TX_POWER=y
 CONFIG_SPIRAM=y
diff --git a/test_apps/esp-sr/sdkconfig.ci.mn6_cn b/test_apps/esp-sr/sdkconfig.ci.mn6_cn
index 1b40076..d8705c2 100644
--- a/test_apps/esp-sr/sdkconfig.ci.mn6_cn
+++ b/test_apps/esp-sr/sdkconfig.ci.mn6_cn
@@ -1,11 +1,13 @@
 # This file was generated using idf.py save-defconfig. It can be edited manually.
-# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
+# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
 #
 CONFIG_IDF_TARGET="esp32s3"
 CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
+CONFIG_SR_WN_WN9_HILEXIN=y
+CONFIG_SR_MN_CN_MULTINET6_QUANT=y
 CONFIG_SPIRAM=y
 CONFIG_SPIRAM_MODE_OCT=y
 CONFIG_SPIRAM_SPEED_80M=y
diff --git a/test_apps/esp-sr/sdkconfig.ci.mn6_en b/test_apps/esp-sr/sdkconfig.ci.mn6_en
index 0e8fbe2..bef8835 100644
--- a/test_apps/esp-sr/sdkconfig.ci.mn6_en
+++ b/test_apps/esp-sr/sdkconfig.ci.mn6_en
@@ -1,12 +1,12 @@
 # This file was generated using idf.py save-defconfig. It can be edited manually.
-# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
+# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
 #
 CONFIG_IDF_TARGET="esp32s3"
 CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
-CONFIG_SR_MN_CN_NONE=y
+CONFIG_SR_WN_WN9_HIESP=y
 CONFIG_SR_MN_EN_MULTINET6_QUANT=y
 CONFIG_ESP_PHY_REDUCE_TX_POWER=y
 CONFIG_SPIRAM=y
diff --git a/test_apps/esp-sr/sdkconfig.ci.mn7_en b/test_apps/esp-sr/sdkconfig.ci.mn7_en
index 3231fe7..fe88911 100644
--- a/test_apps/esp-sr/sdkconfig.ci.mn7_en
+++ b/test_apps/esp-sr/sdkconfig.ci.mn7_en
@@ -1,12 +1,12 @@
 # This file was generated using idf.py save-defconfig. It can be edited manually.
-# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
+# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
 #
 CONFIG_IDF_TARGET="esp32s3"
 CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
-CONFIG_SR_MN_CN_NONE=y
+CONFIG_SR_WN_WN9_HIESP=y
 CONFIG_SR_MN_EN_MULTINET7_QUANT=y
 CONFIG_SPIRAM=y
 CONFIG_SPIRAM_MODE_OCT=y
diff --git a/test_apps/esp-sr/sdkconfig.ci.nsnet2 b/test_apps/esp-sr/sdkconfig.ci.nsnet2
index bfce388..e421ae1 100644
--- a/test_apps/esp-sr/sdkconfig.ci.nsnet2
+++ b/test_apps/esp-sr/sdkconfig.ci.nsnet2
@@ -1,13 +1,12 @@
 # This file was generated using idf.py save-defconfig. It can be edited manually.
-# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
+# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
 #
 CONFIG_IDF_TARGET="esp32s3"
 CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
-CONFIG_USE_NSNET=y
-CONFIG_USE_MULTINET=n
+CONFIG_SR_NSN_NSNET2=y
 CONFIG_SPIRAM=y
 CONFIG_SPIRAM_MODE_OCT=y
 CONFIG_SPIRAM_SPEED_80M=y
diff --git a/test_apps/esp-sr/sdkconfig.ci.p4_mn7_cn b/test_apps/esp-sr/sdkconfig.ci.p4_mn7_cn
index 32a2bbf..36f99e6 100644
--- a/test_apps/esp-sr/sdkconfig.ci.p4_mn7_cn
+++ b/test_apps/esp-sr/sdkconfig.ci.p4_mn7_cn
@@ -5,6 +5,7 @@ CONFIG_IDF_TARGET="esp32p4"
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
+CONFIG_SR_WN_WN9_HILEXIN=y
 CONFIG_SR_MN_CN_MULTINET7_QUANT=y
 CONFIG_COMPILER_OPTIMIZATION_PERF=y
 CONFIG_ESP32P4_REV_MIN_0=y
diff --git a/test_apps/esp-sr/sdkconfig.ci.p4_nsnet2 b/test_apps/esp-sr/sdkconfig.ci.p4_nsnet2
index f7c4e63..f9e7d8d 100644
--- a/test_apps/esp-sr/sdkconfig.ci.p4_nsnet2
+++ b/test_apps/esp-sr/sdkconfig.ci.p4_nsnet2
@@ -5,9 +5,8 @@ CONFIG_IDF_TARGET="esp32p4"
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
-CONFIG_USE_NSNET=y
 CONFIG_SR_WN_WN9_HIESP=y
-CONFIG_USE_MULTINET=n
+CONFIG_SR_NSN_NSNET2=y
 CONFIG_COMPILER_OPTIMIZATION_PERF=y
 CONFIG_ESP32P4_REV_MIN_0=y
 CONFIG_SPIRAM=y
diff --git a/test_apps/esp-sr/sdkconfig.ci.vadnet b/test_apps/esp-sr/sdkconfig.ci.vadnet
new file mode 100644
index 0000000..235a812
--- /dev/null
+++ b/test_apps/esp-sr/sdkconfig.ci.vadnet
@@ -0,0 +1,24 @@
+# This file was generated using idf.py save-defconfig. It can be edited manually.
+# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
+#
+CONFIG_IDF_TARGET="esp32s3"
+CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
+CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
+CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
+CONFIG_PARTITION_TABLE_CUSTOM=y
+CONFIG_SR_VADN_VADNET1_MEDIUM=y
+CONFIG_SR_WN_WN9_HILEXIN=y
+CONFIG_SPIRAM=y
+CONFIG_SPIRAM_MODE_OCT=y
+CONFIG_SPIRAM_SPEED_80M=y
+CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
+CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
+CONFIG_ESP32S3_DATA_CACHE_64KB=y
+CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
+CONFIG_ESP_MAIN_TASK_STACK_SIZE=8192
+CONFIG_ESP_WIFI_GMAC_SUPPORT=n
+CONFIG_FREERTOS_VTASKLIST_INCLUDE_COREID=y
+CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS=y
+CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744
+CONFIG_LWIP_TCP_WND_DEFAULT=5744
+CONFIG_UNITY_CRITICAL_LEAK_LEVEL_GENERAL=1024
diff --git a/test_apps/esp-sr/sdkconfig.ci.wn9_hilexin b/test_apps/esp-sr/sdkconfig.ci.wn9_hilexin
index 1ea1ba0..e002d55 100644
--- a/test_apps/esp-sr/sdkconfig.ci.wn9_hilexin
+++ b/test_apps/esp-sr/sdkconfig.ci.wn9_hilexin
@@ -1,13 +1,12 @@
 # This file was generated using idf.py save-defconfig. It can be edited manually.
-# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
+# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
 #
 CONFIG_IDF_TARGET="esp32s3"
 CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
-CONFIG_USE_NSNET=y
-CONFIG_USE_MULTINET=n
+CONFIG_SR_WN_WN9_HILEXIN=y
 CONFIG_ESP_PHY_REDUCE_TX_POWER=y
 CONFIG_SPIRAM=y
 CONFIG_SPIRAM_MODE_OCT=y
diff --git a/test_apps/esp-tts/sdkconfig.ci.p4 b/test_apps/esp-tts/sdkconfig.ci.p4
index 7fdc121..b35b0b5 100644
--- a/test_apps/esp-tts/sdkconfig.ci.p4
+++ b/test_apps/esp-tts/sdkconfig.ci.p4
@@ -1,13 +1,10 @@
 # This file was generated using idf.py save-defconfig. It can be edited manually.
-# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
+# Espressif IoT Development Framework (ESP-IDF) 5.3.1 Project Minimal Configuration
 #
 CONFIG_IDF_TARGET="esp32p4"
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_PARTITION_TABLE_CUSTOM=y
-CONFIG_USE_AFE=n
-CONFIG_USE_WAKENET=n
-CONFIG_USE_MULTINET=n
 CONFIG_COMPILER_OPTIMIZATION_PERF=y
 CONFIG_ESP32P4_REV_MIN_0=y
 CONFIG_SPIRAM=y
diff --git a/test_apps/esp-tts/sdkconfig.ci.s3 b/test_apps/esp-tts/sdkconfig.ci.s3
index 1b40076..67f900f 100644
--- a/test_apps/esp-tts/sdkconfig.ci.s3
+++ b/test_apps/esp-tts/sdkconfig.ci.s3
@@ -1,5 +1,5 @@
 # This file was generated using idf.py save-defconfig. It can be edited manually.
-# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
+# Espressif IoT Development Framework (ESP-IDF) 5.3.1 Project Minimal Configuration
 #
 CONFIG_IDF_TARGET="esp32s3"
 CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
@@ -13,6 +13,9 @@ CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
 CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
 CONFIG_ESP32S3_DATA_CACHE_64KB=y
 CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
+CONFIG_ESP_SYSTEM_ALLOW_RTC_FAST_MEM_AS_HEAP=n
+CONFIG_ESP_INT_WDT=n
+CONFIG_ESP_TASK_WDT_EN=n
 CONFIG_ESP_WIFI_GMAC_SUPPORT=n
 CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744
 CONFIG_LWIP_TCP_WND_DEFAULT=5744