mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
Merge branch 'feat/vadnet' into 'master'
Feat/vadnet See merge request speech-recognition-framework/esp-sr!127
This commit is contained in:
commit
1379c906b9
@ -100,8 +100,6 @@ before_script:
|
|||||||
artifacts:
|
artifacts:
|
||||||
when: always
|
when: always
|
||||||
paths:
|
paths:
|
||||||
- "**/build*/size.json"
|
|
||||||
- "**/build*/build_log.txt"
|
|
||||||
- "**/build*/*.bin"
|
- "**/build*/*.bin"
|
||||||
# upload to s3 server to save the artifacts size
|
# upload to s3 server to save the artifacts size
|
||||||
- "**/build*/*.map"
|
- "**/build*/*.map"
|
||||||
|
|||||||
@ -74,6 +74,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32s3")
|
|||||||
|
|
||||||
add_prebuilt_library(flite_g2p "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libflite_g2p.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(flite_g2p "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libflite_g2p.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(esp_audio_processor "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libesp_audio_processor.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(esp_audio_processor "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libesp_audio_processor.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
|
add_prebuilt_library(vadnet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libvadnet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(multinet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libmultinet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(multinet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libmultinet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(esp_audio_front_end "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libesp_audio_front_end.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(esp_audio_front_end "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32s3/libesp_audio_front_end.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
@ -95,6 +96,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32s3")
|
|||||||
esp_tts_chinese
|
esp_tts_chinese
|
||||||
voice_set_xiaole
|
voice_set_xiaole
|
||||||
nsnet
|
nsnet
|
||||||
|
vadnet
|
||||||
wakenet
|
wakenet
|
||||||
"-Wl,--end-group")
|
"-Wl,--end-group")
|
||||||
|
|
||||||
@ -153,6 +155,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32p4")
|
|||||||
add_prebuilt_library(flite_g2p "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libflite_g2p.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(flite_g2p "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libflite_g2p.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(esp_audio_processor "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libesp_audio_processor.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(esp_audio_processor "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libesp_audio_processor.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
|
add_prebuilt_library(vadnet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libvadnet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(multinet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libmultinet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(multinet "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libmultinet.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(esp_audio_front_end "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libesp_audio_front_end.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(esp_audio_front_end "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libesp_audio_front_end.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
add_prebuilt_library(hufzip "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libhufzip.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
add_prebuilt_library(hufzip "${CMAKE_CURRENT_SOURCE_DIR}/lib/esp32p4/libhufzip.a" PRIV_REQUIRES ${COMPONENT_NAME})
|
||||||
@ -173,6 +176,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32p4")
|
|||||||
esp_tts_chinese
|
esp_tts_chinese
|
||||||
voice_set_xiaole
|
voice_set_xiaole
|
||||||
wakenet
|
wakenet
|
||||||
|
vadnet
|
||||||
nsnet
|
nsnet
|
||||||
"-Wl,--end-group")
|
"-Wl,--end-group")
|
||||||
|
|
||||||
|
|||||||
@ -13,14 +13,9 @@ choice MODEL_DATA_PATH
|
|||||||
endchoice
|
endchoice
|
||||||
|
|
||||||
|
|
||||||
config USE_AFE
|
|
||||||
bool "use afe"
|
|
||||||
default "y"
|
|
||||||
|
|
||||||
choice AFE_INTERFACE_SEL
|
choice AFE_INTERFACE_SEL
|
||||||
prompt "Afe interface"
|
prompt "Afe interface"
|
||||||
default AFE_INTERFACE_V1
|
default AFE_INTERFACE_V1
|
||||||
depends on USE_AFE
|
|
||||||
help
|
help
|
||||||
Select the afe interface to be used.
|
Select the afe interface to be used.
|
||||||
|
|
||||||
@ -29,306 +24,175 @@ choice AFE_INTERFACE_SEL
|
|||||||
|
|
||||||
endchoice
|
endchoice
|
||||||
|
|
||||||
config USE_NSNET
|
|
||||||
bool "use nsnet"
|
|
||||||
default "n"
|
|
||||||
|
|
||||||
choice SR_NSN_MODEL_LOAD
|
choice SR_NSN_MODEL_LOAD
|
||||||
prompt "Select deep noise suppression"
|
prompt "Select noise suppression model"
|
||||||
default SR_NSN_NSNET2
|
default SR_NSN_WEBRTC
|
||||||
depends on USE_NSNET
|
|
||||||
help
|
help
|
||||||
Select the deep noise suppression to be loaded.
|
Select the noise suppression model to be loaded.
|
||||||
|
|
||||||
config SR_NSN_NONE
|
config SR_NSN_WEBRTC
|
||||||
bool "None"
|
bool "noise suppression (WebRTC)"
|
||||||
|
|
||||||
config SR_NSN_NSNET1
|
|
||||||
bool "Deep noise suppression v1 (nsnet1)"
|
|
||||||
depends on IDF_TARGET_ESP32S3
|
|
||||||
config SR_NSN_NSNET2
|
config SR_NSN_NSNET2
|
||||||
bool "Deep noise suppression v2 (nsnet2)"
|
bool "Deep noise suppression v2 (nsnet2)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
||||||
endchoice
|
endchoice
|
||||||
|
|
||||||
config USE_WAKENET
|
choice SR_VADN_MODEL_LOAD
|
||||||
bool "use wakenet"
|
prompt "Select voice activity detection"
|
||||||
default "y"
|
default SR_VADN_WEBRTC
|
||||||
|
help
|
||||||
|
Select the vad model to be loaded.
|
||||||
|
|
||||||
|
config SR_VADN_WEBRTC
|
||||||
|
bool "voice activity detection (WebRTC)"
|
||||||
|
|
||||||
|
config SR_VADN_VADNET1_MEDIUM
|
||||||
|
bool "voice activity detection (vadnet1 medium)"
|
||||||
|
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
||||||
|
endchoice
|
||||||
|
|
||||||
choice SR_WN_MODEL_LOAD
|
choice SR_WN_MODEL_LOAD
|
||||||
prompt "Select wake words"
|
prompt "Select wake words"
|
||||||
default SR_WN_WN9_HILEXIN
|
default SR_WN_WN5_HILEXIN
|
||||||
depends on USE_WAKENET
|
depends on IDF_TARGET_ESP32
|
||||||
help
|
help
|
||||||
Select the Wake Words to be loaded.
|
Select the Wake Words to be loaded.
|
||||||
|
|
||||||
config SR_WN_WN5_HILEXIN
|
config SR_WN_WN5_HILEXIN
|
||||||
bool "Hi,乐鑫 (wn5_hilexin)"
|
bool "Hi,Lexin (wn5_hilexin)"
|
||||||
depends on IDF_TARGET_ESP32
|
|
||||||
|
|
||||||
config SR_WN_WN5X3_HILEXIN
|
config SR_WN_WN5X3_HILEXIN
|
||||||
bool "Hi,乐鑫 (wn5_hilexinX3)"
|
bool "Hi,Lexin (wn5_hilexinX3)"
|
||||||
depends on IDF_TARGET_ESP32
|
|
||||||
|
|
||||||
config SR_WN_WN5_NIHAOXIAOZHI
|
config SR_WN_WN5_NIHAOXIAOZHI
|
||||||
bool "你好小智 (wn5_nihaoxiaozhi)"
|
bool "nihaoxiaozhi (wn5_nihaoxiaozhi)"
|
||||||
depends on IDF_TARGET_ESP32
|
|
||||||
|
|
||||||
config SR_WN_WN5X3_NIHAOXIAOZHI
|
config SR_WN_WN5X3_NIHAOXIAOZHI
|
||||||
bool "你好小智 (wn5_nihaoxiaozhiX3)"
|
bool "nihaoxiaozhi (wn5_nihaoxiaozhiX3)"
|
||||||
depends on IDF_TARGET_ESP32
|
|
||||||
|
|
||||||
config SR_WN_WN5X3_NIHAOXIAOXIN
|
config SR_WN_WN5X3_NIHAOXIAOXIN
|
||||||
bool "你好小鑫 (wn5_nihaoxiaoxinX3)"
|
bool "nihaoxiaoxin (wn5_nihaoxiaoxinX3)"
|
||||||
depends on IDF_TARGET_ESP32
|
|
||||||
|
|
||||||
config SR_WN_WN8_ALEXA
|
|
||||||
bool "Alexa (wn8_alexa)"
|
|
||||||
depends on IDF_TARGET_ESP32S3
|
|
||||||
|
|
||||||
config SR_WN_WN9_HILEXIN
|
|
||||||
bool "Hi,乐鑫 (wn9_hilexin)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOAITONGXUE
|
|
||||||
bool "小爱同学 (wn9_xiaoaitongxue)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_ALEXA
|
|
||||||
bool "Alexa (wn9_alexa)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HIESP
|
|
||||||
bool "Hi,ESP (wn9_hiesp)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HIMFIVE
|
|
||||||
bool "Hi,M Five (wn9_himfive)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_NIHAOXIAOZHI_TTS
|
|
||||||
bool "你好小智 (wn9_nihaoxiaozhi_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_JARVIS_TTS
|
|
||||||
bool "Jarvis (wn9_jarvis_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_COMPUTER_TTS
|
|
||||||
bool "computer (wn9_computer_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HEYWILLOW_TTS
|
|
||||||
bool "Hey,Willow (wn9_heywillow_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_SOPHIA_TTS
|
|
||||||
bool "Sophia (wn9_sophia_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_NIHAOXIAOXIN_TTS
|
|
||||||
bool "你好小鑫 (wn9_nihaoxiaoxin_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOMEITONGXUE_TTS
|
|
||||||
bool "小美同学 (wn9_xiaomeitongxue_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HIXIAOXING_TTS
|
|
||||||
bool "Hi,小星 (wn9_hixiaoxing_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_MYCROFT_TTS
|
|
||||||
bool "Mycroft (wn9_mycroft_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HEYPRINTER_TTS
|
|
||||||
bool "Hey,Printer (wn9_heyprinter_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOLONGXIAOLONG_TTS
|
|
||||||
bool "小龙小龙 (wn9_xiaolongxiaolong_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_MIAOMIAOTONGXUE_TTS
|
|
||||||
bool "喵喵同学 (wn9_miaomiaotongxue_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HIJOY_TTS
|
|
||||||
bool "Hi,Joy (wn9_hijoy_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HILILI_TTS
|
|
||||||
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HITELLY_TTS
|
|
||||||
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HEYWANDA_TTS
|
|
||||||
bool "Hey,Wanda (wn9_heywanda_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HIMIAOMIAO_TTS
|
|
||||||
bool "Hi,喵喵 (wn9_himiaomiao_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOBINXIAOBIN_TTS
|
|
||||||
bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HAIXIAOWU_TTS
|
|
||||||
bool "Hi,小巫 (wn9_haixiaowu_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_ASTROLABE_TTS
|
|
||||||
bool "Astrolabe (wn9_astrolabe_tts)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOYAXIAOYA_TTS2
|
|
||||||
bool "小鸭小鸭 (wn9_xiaoyaxiaoya_tts2)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_HIJASON_TTS2
|
|
||||||
bool "Hi,Jason (wn9_hijason_tts2)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_LINAIBAN_TTS2
|
|
||||||
bool "璃奈板 (wn9_linaiban_tts2)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_WN9_CUSTOMWORD
|
|
||||||
bool "customized word (wn9_customword)"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
config SR_WN_LOAD_MULIT_WORD
|
|
||||||
bool "Load Multiple Wake Words"
|
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
|
||||||
|
|
||||||
endchoice
|
endchoice
|
||||||
|
|
||||||
menu "Load Multiple Wake Words"
|
menu "Load Multiple Wake Words"
|
||||||
depends on SR_WN_LOAD_MULIT_WORD
|
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
||||||
|
|
||||||
config SR_WN_WN9_HILEXIN_MULTI
|
config SR_WN_WN9_HILEXIN
|
||||||
bool "Hi,乐鑫 (wn9_hilexin)"
|
bool "Hi,乐鑫 (wn9_hilexin)"
|
||||||
default False
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOAITONGXUE_MULTI
|
config SR_WN_WN9_XIAOAITONGXUE
|
||||||
bool "小爱同学 (wn9_xiaoaitongxue)"
|
bool "小爱同学 (wn9_xiaoaitongxue)"
|
||||||
default False
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_NIHAOXIAOZHI_TTS_MULTI
|
config SR_WN_WN9_NIHAOXIAOZHI_TTS
|
||||||
bool "你好小智 (wn9_nihaoxiaozhi_tts)"
|
bool "你好小智 (wn9_nihaoxiaozhi_tts)"
|
||||||
default False
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_ALEXA_MULTI
|
config SR_WN_WN9_ALEXA
|
||||||
bool "Alexa (wn9_alexa)"
|
bool "Alexa (wn9_alexa)"
|
||||||
default False
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HIESP_MULTI
|
config SR_WN_WN9_HIESP
|
||||||
bool "Hi,ESP (wn9_hiesp)"
|
bool "Hi,ESP (wn9_hiesp)"
|
||||||
default False
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_JARVIS_TTS_MULTI
|
config SR_WN_WN9_JARVIS_TTS
|
||||||
bool "Jarvis (wn9_jarvis_tts)"
|
bool "Jarvis (wn9_jarvis_tts)"
|
||||||
default False
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_COMPUTER_TTS_MULTI
|
config SR_WN_WN9_COMPUTER_TTS
|
||||||
bool "computer (wn9_computer_tts)"
|
bool "computer (wn9_computer_tts)"
|
||||||
default False
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HEYWILLOW_TTS_MULTI
|
config SR_WN_WN9_HEYWILLOW_TTS
|
||||||
bool "Hey,Willow (wn9_heywillow_tts)"
|
bool "Hey,Willow (wn9_heywillow_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_SOPHIA_TTS_MULTI
|
config SR_WN_WN9_SOPHIA_TTS
|
||||||
bool "Sophia (wn9_sophia_tts)"
|
bool "Sophia (wn9_sophia_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_NIHAOXIAOXIN_TTS_MULTI
|
config SR_WN_WN9_NIHAOXIAOXIN_TTS
|
||||||
bool "你好小鑫 (wn9_nihaoxiaoxin_tts)"
|
bool "你好小鑫 (wn9_nihaoxiaoxin_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOMEITONGXUE_TTS_MULTI
|
config SR_WN_WN9_XIAOMEITONGXUE_TTS
|
||||||
bool "小美同学 (wn9_xiaomeitongxue_tts)"
|
bool "小美同学 (wn9_xiaomeitongxue_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HEYPRINTER_TTS_MULTI
|
config SR_WN_WN9_HEYPRINTER_TTS
|
||||||
bool "Hey,Printer (wn9_heyprinter_tts)"
|
bool "Hey,Printer (wn9_heyprinter_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOLONGXIAOLONG_TTS_MULTI
|
config SR_WN_WN9_XIAOLONGXIAOLONG_TTS
|
||||||
bool "小龙小龙 (wn9_xiaolongxiaolong_tts)"
|
bool "小龙小龙 (wn9_xiaolongxiaolong_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_MIAOMIAOTONGXUE_TTS_MULTI
|
config SR_WN_WN9_MIAOMIAOTONGXUE_TTS
|
||||||
bool "喵喵同学 (wn9_miaomiaotongxue_tts)"
|
bool "喵喵同学 (wn9_miaomiaotongxue_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
|
|
||||||
config SR_WN_WN9_HEYWANDA_TTS_MULTI
|
config SR_WN_WN9_HEYWANDA_TTS
|
||||||
bool "Hey,Wanda (wn9_heywanda_tts)"
|
bool "Hey,Wanda (wn9_heywanda_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HIMIAOMIAO_TTS_MULTI
|
config SR_WN_WN9_HIMIAOMIAO_TTS
|
||||||
bool "Hi,喵喵 (wn9_himiaomiao_tts)"
|
bool "Hi,喵喵 (wn9_himiaomiao_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
|
|
||||||
config SR_WN_WN9_MYCROFT_TTS_MULTI
|
config SR_WN_WN9_MYCROFT_TTS
|
||||||
bool "Mycroft (wn9_mycroft_tts)"
|
bool "Mycroft (wn9_mycroft_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HIJOY_TTS_MULTI
|
config SR_WN_WN9_HIJOY_TTS
|
||||||
bool "Hi,Joy (wn9_hijoy_tts)"
|
bool "Hi,Joy (wn9_hijoy_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HILILI_TTS_MULTI
|
config SR_WN_WN9_HILILI_TTS
|
||||||
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
|
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HITELLY_TTS_MULTI
|
config SR_WN_WN9_HITELLY_TTS
|
||||||
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
|
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOBINXIAOBIN_TTS_MULTI
|
config SR_WN_WN9_XIAOBINXIAOBIN_TTS
|
||||||
bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
|
bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HAIXIAOWU_TTS_MULTI
|
config SR_WN_WN9_HAIXIAOWU_TTS
|
||||||
bool "Hi,小巫 (wn9_haixiaowu_tts)"
|
bool "Hi,小巫 (wn9_haixiaowu_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_ASTROLABE_TTS_MULTI
|
config SR_WN_WN9_ASTROLABE_TTS
|
||||||
bool "Astrolabe (wn9_astrolabe_tts)"
|
bool "Astrolabe (wn9_astrolabe_tts)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_XIAOYAXIAOYA_TTS2_MULTI
|
config SR_WN_WN9_XIAOYAXIAOYA_TTS2
|
||||||
bool "小鸭小鸭 (wn9_xiaoyaxiaoya_tts2)"
|
bool "小鸭小鸭 (wn9_xiaoyaxiaoya_tts2)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_HIJASON_TTS2_MULTI
|
config SR_WN_WN9_HIJASON_TTS2
|
||||||
bool "Hi,Jason (wn9_hijason_tts2)"
|
bool "Hi,Jason (wn9_hijason_tts2)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
config SR_WN_WN9_LINAIBAN_TTS2_MULTI
|
config SR_WN_WN9_LINAIBAN_TTS2
|
||||||
bool "璃奈板 (wn9_linaiban_tts2)"
|
bool "璃奈板 (wn9_linaiban_tts2)"
|
||||||
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
|
default False
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
|
||||||
config USE_MULTINET
|
|
||||||
bool "use multinet"
|
|
||||||
default "y"
|
|
||||||
|
|
||||||
choice CHINESE_SR_MN_MODEL_SEL
|
choice CHINESE_SR_MN_MODEL_SEL
|
||||||
prompt "Chinese Speech Commands Model"
|
prompt "Chinese Speech Commands Model"
|
||||||
default SR_MN_CN_MULTINET6_QUANT
|
default SR_MN_CN_NONE
|
||||||
depends on USE_MULTINET
|
|
||||||
help
|
help
|
||||||
Select the Wake Word Engine to be used.
|
Select the Chinese Speech Commands Model.
|
||||||
|
|
||||||
config SR_MN_CN_NONE
|
config SR_MN_CN_NONE
|
||||||
bool "None"
|
bool "None"
|
||||||
@ -362,9 +226,8 @@ endchoice
|
|||||||
choice ENGLISH_SR_MN_MODEL_SEL
|
choice ENGLISH_SR_MN_MODEL_SEL
|
||||||
prompt "English Speech Commands Model"
|
prompt "English Speech Commands Model"
|
||||||
default SR_MN_EN_NONE
|
default SR_MN_EN_NONE
|
||||||
depends on USE_MULTINET
|
|
||||||
help
|
help
|
||||||
Select the Wake Word Engine to be used.
|
Select the English Speech Commands Model.
|
||||||
|
|
||||||
config SR_MN_EN_NONE
|
config SR_MN_EN_NONE
|
||||||
bool "None"
|
bool "None"
|
||||||
|
|||||||
@ -202,7 +202,7 @@ class IdfPytestEmbedded:
|
|||||||
for item in items:
|
for item in items:
|
||||||
# default timeout 5 mins
|
# default timeout 5 mins
|
||||||
if 'timeout' not in item.keywords:
|
if 'timeout' not in item.keywords:
|
||||||
item.add_marker(pytest.mark.timeout(8 * 60))
|
item.add_marker(pytest.mark.timeout(500 * 60))
|
||||||
|
|
||||||
# filter all the test cases with "--target"
|
# filter all the test cases with "--target"
|
||||||
if self.target:
|
if self.target:
|
||||||
|
|||||||
@ -78,7 +78,7 @@ void *dl_lib_calloc_psram(int cnt, int size, int align);
|
|||||||
/**
|
/**
|
||||||
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
|
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
|
||||||
*
|
*
|
||||||
* @param prt Pointer to free
|
* @param ptr Pointer to free
|
||||||
*/
|
*/
|
||||||
void dl_lib_free(void *ptr);
|
void dl_lib_free(void *ptr);
|
||||||
|
|
||||||
|
|||||||
@ -292,6 +292,7 @@ qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, in
|
|||||||
|
|
||||||
void print_convq8(dl_convq8_queue_t *cq, int offset);
|
void print_convq8(dl_convq8_queue_t *cq, int offset);
|
||||||
void print_convq(dl_convq_queue_t *cq, int offset);
|
void print_convq(dl_convq_queue_t *cq, int offset);
|
||||||
|
void dl_relu_convq8(dl_convq8_queue_t *cq);
|
||||||
|
|
||||||
void lstmq8_free(void);
|
void lstmq8_free(void);
|
||||||
|
|
||||||
|
|||||||
@ -279,9 +279,9 @@ dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t
|
|||||||
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
|
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
|
||||||
|
|
||||||
dl_matrix2dq_t *dl_convq16_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
|
||||||
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
|
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
|
||||||
const dl_matrix2dq_t *bias, int prenum);
|
dl_matrix2dq_t *bias, int prenum);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Allocate a fixed-point multi channel convolution queue
|
* @brief Allocate a fixed-point multi channel convolution queue
|
||||||
|
|||||||
@ -25,10 +25,6 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// #ifdef CONFIG_IDF_TARGET_ESP32S3
|
|
||||||
// #include "dl_tie728_bzero.h"
|
|
||||||
// #endif
|
|
||||||
|
|
||||||
typedef float fptp_t;
|
typedef float fptp_t;
|
||||||
|
|
||||||
#if CONFIG_BT_SHARE_MEM_REUSE
|
#if CONFIG_BT_SHARE_MEM_REUSE
|
||||||
|
|||||||
@ -23,7 +23,8 @@ extern "C" {
|
|||||||
#define USE_AEC_FFT // Not kiss_fft
|
#define USE_AEC_FFT // Not kiss_fft
|
||||||
#define AEC_USE_SPIRAM 0
|
#define AEC_USE_SPIRAM 0
|
||||||
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
||||||
#define AEC_FRAME_LENGTH_MS 16
|
//#define AEC_FRAME_LENGTH_MS 16
|
||||||
|
#define AEC_FRAME_LENGTH_MS 32
|
||||||
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
|
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
|
||||||
|
|
||||||
typedef void* aec_handle_t;
|
typedef void* aec_handle_t;
|
||||||
|
|||||||
@ -90,6 +90,12 @@ typedef struct {
|
|||||||
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
|
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
|
||||||
afe_ns_mode_t afe_ns_mode;
|
afe_ns_mode_t afe_ns_mode;
|
||||||
char *afe_ns_model_name;
|
char *afe_ns_model_name;
|
||||||
|
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||||
|
// otherwise, select channel number by wakenet
|
||||||
|
char *vad_model_name; // The model name of vad, support vadnet1 and vadnet1_small
|
||||||
|
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms
|
||||||
|
int vad_min_noise_ms; // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
|
||||||
|
bool vad_mute_playback; // If true, the playback will be muted for vad detection
|
||||||
} afe_config_t;
|
} afe_config_t;
|
||||||
|
|
||||||
|
|
||||||
@ -123,6 +129,47 @@ typedef struct {
|
|||||||
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||||
.afe_ns_mode = NS_MODE_SSP, \
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
.afe_ns_model_name = NULL, \
|
.afe_ns_model_name = NULL, \
|
||||||
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
|
}
|
||||||
|
#elif CONFIG_IDF_TARGET_ESP32P4
|
||||||
|
#define AFE_CONFIG_DEFAULT() { \
|
||||||
|
.aec_init = true, \
|
||||||
|
.se_init = true, \
|
||||||
|
.vad_init = true, \
|
||||||
|
.wakenet_init = true, \
|
||||||
|
.voice_communication_init = false, \
|
||||||
|
.voice_communication_agc_init = false, \
|
||||||
|
.voice_communication_agc_gain = 15, \
|
||||||
|
.vad_mode = VAD_MODE_3, \
|
||||||
|
.wakenet_model_name = NULL, \
|
||||||
|
.wakenet_model_name_2 = NULL, \
|
||||||
|
.wakenet_mode = DET_MODE_90, \
|
||||||
|
.afe_mode = SR_MODE_LOW_COST, \
|
||||||
|
.afe_perferred_core = 0, \
|
||||||
|
.afe_perferred_priority = 5, \
|
||||||
|
.afe_ringbuf_size = 50, \
|
||||||
|
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
|
||||||
|
.afe_linear_gain = 1.0, \
|
||||||
|
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||||
|
.pcm_config = { \
|
||||||
|
.total_ch_num = 2, \
|
||||||
|
.mic_num = 1, \
|
||||||
|
.ref_num = 1, \
|
||||||
|
.sample_rate = 16000, \
|
||||||
|
}, \
|
||||||
|
.debug_init = false, \
|
||||||
|
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||||
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
|
.afe_ns_model_name = NULL, \
|
||||||
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
}
|
}
|
||||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||||
#define AFE_CONFIG_DEFAULT() { \
|
#define AFE_CONFIG_DEFAULT() { \
|
||||||
@ -154,6 +201,11 @@ typedef struct {
|
|||||||
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
|
||||||
.afe_ns_mode = NS_MODE_SSP, \
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
.afe_ns_model_name = NULL, \
|
.afe_ns_model_name = NULL, \
|
||||||
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -29,6 +29,8 @@ typedef struct afe_fetch_result_t
|
|||||||
{
|
{
|
||||||
int16_t *data; // the data of audio.
|
int16_t *data; // the data of audio.
|
||||||
int data_size; // the size of data. The unit is byte.
|
int data_size; // the size of data. The unit is byte.
|
||||||
|
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
|
||||||
|
int vad_cache_size; // the size of vad_cache. The unit is byte.
|
||||||
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
|
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
|
||||||
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
|
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
|
||||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||||
@ -36,7 +38,7 @@ typedef struct afe_fetch_result_t
|
|||||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
|
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
|
||||||
afe_vad_state_t vad_state; // the value is afe_vad_state_t
|
afe_vad_state_t vad_state; // the value is afe_vad_state_t
|
||||||
int trigger_channel_id; // the channel index of output
|
int trigger_channel_id; // the channel index of output
|
||||||
int wake_word_length; // the length of wake word. It's unit is the number of samples.
|
int wake_word_length; // the length of wake word. The unit is the number of samples.
|
||||||
int ret_value; // the return state of fetch function
|
int ret_value; // the return state of fetch function
|
||||||
void* reserved; // reserved for future use
|
void* reserved; // reserved for future use
|
||||||
} afe_fetch_result_t;
|
} afe_fetch_result_t;
|
||||||
@ -112,7 +114,7 @@ typedef afe_fetch_result_t* (*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *af
|
|||||||
* @brief reset ringbuf of AFE.
|
* @brief reset ringbuf of AFE.
|
||||||
*
|
*
|
||||||
* @param afe The AFE_SR object to query
|
* @param afe The AFE_SR object to query
|
||||||
* @return -1: fail, 0: success
|
* @return -1: fail, 1: success
|
||||||
*/
|
*/
|
||||||
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
|
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
|
||||||
|
|
||||||
@ -122,7 +124,7 @@ typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
|
|||||||
*
|
*
|
||||||
* @param afe The AFE_SR object to query
|
* @param afe The AFE_SR object to query
|
||||||
* @param wakenet_word The wakenet word, should be DEFAULT_WAKE_WORD or EXTRA_WAKE_WORD
|
* @param wakenet_word The wakenet word, should be DEFAULT_WAKE_WORD or EXTRA_WAKE_WORD
|
||||||
* @return 0: fail, 1: success
|
* @return -1: fail, 1: success
|
||||||
*/
|
*/
|
||||||
typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* model_name);
|
typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* model_name);
|
||||||
|
|
||||||
@ -130,7 +132,7 @@ typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* m
|
|||||||
* @brief Disable wakenet model.
|
* @brief Disable wakenet model.
|
||||||
*
|
*
|
||||||
* @param afe The AFE_SR object to query
|
* @param afe The AFE_SR object to query
|
||||||
* @return 0: fail, 1: success
|
* @return -1: fail, 0: disabled, 1: enabled
|
||||||
*/
|
*/
|
||||||
typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
|
typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
|
||||||
|
|
||||||
@ -138,7 +140,7 @@ typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
|
|||||||
* @brief Enable wakenet model.
|
* @brief Enable wakenet model.
|
||||||
*
|
*
|
||||||
* @param afe The AFE_SR object to query
|
* @param afe The AFE_SR object to query
|
||||||
* @return 0: fail, 1: success
|
* @return -1: fail, 0: disabled, 1: enabled
|
||||||
*/
|
*/
|
||||||
typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
|
typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
|
||||||
|
|
||||||
@ -146,7 +148,7 @@ typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
|
|||||||
* @brief Disable AEC algorithm.
|
* @brief Disable AEC algorithm.
|
||||||
*
|
*
|
||||||
* @param afe The AFE_SR object to query
|
* @param afe The AFE_SR object to query
|
||||||
* @return 0: fail, 1: success
|
* @return -1: fail, 0: disabled, 1: enabled
|
||||||
*/
|
*/
|
||||||
typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
|
typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
|
||||||
|
|
||||||
@ -154,7 +156,7 @@ typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
|
|||||||
* @brief Enable AEC algorithm.
|
* @brief Enable AEC algorithm.
|
||||||
*
|
*
|
||||||
* @param afe The AFE_SR object to query
|
* @param afe The AFE_SR object to query
|
||||||
* @return 0: fail, 1: success
|
* @return -1: fail, 0: disabled, 1: enabled
|
||||||
*/
|
*/
|
||||||
typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
|
typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
|
||||||
|
|
||||||
@ -162,7 +164,7 @@ typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
|
|||||||
* @brief Disable SE algorithm.
|
* @brief Disable SE algorithm.
|
||||||
*
|
*
|
||||||
* @param afe The AFE_SR object to query
|
* @param afe The AFE_SR object to query
|
||||||
* @return 0: fail, 1: success
|
* @return -1: fail, 0: disabled, 1: enabled
|
||||||
*/
|
*/
|
||||||
typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
|
typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
|
||||||
|
|
||||||
@ -170,7 +172,7 @@ typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
|
|||||||
* @brief Enable SE algorithm.
|
* @brief Enable SE algorithm.
|
||||||
*
|
*
|
||||||
* @param afe The AFE_SR object to query
|
* @param afe The AFE_SR object to query
|
||||||
* @return 0: fail, 1: success
|
* @return -1: fail, 0: disabled, 1: enabled
|
||||||
*/
|
*/
|
||||||
typedef int (*esp_afe_sr_iface_op_enable_se_t)(esp_afe_sr_data_t *afe);
|
typedef int (*esp_afe_sr_iface_op_enable_se_t)(esp_afe_sr_data_t *afe);
|
||||||
|
|
||||||
|
|||||||
@ -4,7 +4,6 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined CONFIG_USE_AFE
|
|
||||||
#include "esp_afe_sr_iface.h"
|
#include "esp_afe_sr_iface.h"
|
||||||
|
|
||||||
|
|
||||||
@ -19,17 +18,6 @@ extern const esp_afe_sr_iface_t esp_afe_vc_v1;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
|
|
||||||
#include "esp_afe_sr_iface.h"
|
|
||||||
extern const esp_afe_sr_iface_t esp_afe_sr_v1;
|
|
||||||
extern const esp_afe_sr_iface_t esp_afe_vc_v1;
|
|
||||||
#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
|
|
||||||
#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -2,8 +2,16 @@
|
|||||||
|
|
||||||
#include "esp_nsn_iface.h"
|
#include "esp_nsn_iface.h"
|
||||||
|
|
||||||
// The prefix of nsnet model name is used to filter all wakenet from availabel models.
|
/*
|
||||||
|
The prefix of nset
|
||||||
|
Now there are nsnet1 and nsnet2
|
||||||
|
*/
|
||||||
#define ESP_NSNET_PREFIX "nsnet"
|
#define ESP_NSNET_PREFIX "nsnet"
|
||||||
|
|
||||||
extern const esp_nsn_iface_t esp_nsnet1_quantized;
|
/**
|
||||||
#define ESP_NSN_HANDLE esp_nsnet1_quantized
|
* @brief Get the nsnet handle from model name
|
||||||
|
*
|
||||||
|
* @param model_name The name of model
|
||||||
|
* @returns The handle of multinet
|
||||||
|
*/
|
||||||
|
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);
|
||||||
|
|||||||
@ -25,22 +25,65 @@ extern "C" {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
||||||
* restrictive in reporting speech.
|
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
VAD_MODE_0 = 0,
|
VAD_MODE_0 = 0, // Normal
|
||||||
VAD_MODE_1,
|
VAD_MODE_1, // Aggressive
|
||||||
VAD_MODE_2,
|
VAD_MODE_2, // Very Aggressive
|
||||||
VAD_MODE_3,
|
VAD_MODE_3, // Very Very Aggressive
|
||||||
VAD_MODE_4
|
VAD_MODE_4 // Very Very Very Aggressive
|
||||||
} vad_mode_t;
|
} vad_mode_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
VAD_SILENCE = 0,
|
VAD_SILENCE = 0,
|
||||||
VAD_SPEECH
|
VAD_SPEECH = 1,
|
||||||
} vad_state_t;
|
} vad_state_t;
|
||||||
|
|
||||||
typedef void* vad_handle_t;
|
typedef struct vad_trigger_tag {
|
||||||
|
vad_state_t state;
|
||||||
|
unsigned int min_speech_len;
|
||||||
|
unsigned int noise_len;
|
||||||
|
unsigned int min_noise_len;
|
||||||
|
unsigned int speech_len;
|
||||||
|
} vad_trigger_t;
|
||||||
|
|
||||||
|
#define vad_MAX_LEN INT32_MAX - 1
|
||||||
|
/**
|
||||||
|
* @brief Allocate wakenet trigger
|
||||||
|
*
|
||||||
|
* @param min_speech_len Minimum frame number of speech duration
|
||||||
|
* @param min_noise_len Minimum frame number of noise duration
|
||||||
|
*
|
||||||
|
* @return Trigger pointer
|
||||||
|
**/
|
||||||
|
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Free wakenet trigger
|
||||||
|
**/
|
||||||
|
void vad_trigger_free(vad_trigger_t *trigger);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reset wakenet trigger
|
||||||
|
**/
|
||||||
|
void vad_trigger_reset(vad_trigger_t *trigger);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief detect activaty voice by trigger
|
||||||
|
**/
|
||||||
|
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vad_trigger_t *trigger;
|
||||||
|
void *vad_inst;
|
||||||
|
}vad_handle_with_trigger_t;
|
||||||
|
|
||||||
|
typedef vad_handle_with_trigger_t* vad_handle_t;
|
||||||
|
|
||||||
|
// typedef vad_handle_tag * vad_handle_t;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates an instance to the VAD structure.
|
* @brief Creates an instance to the VAD structure.
|
||||||
@ -53,6 +96,18 @@ typedef void* vad_handle_t;
|
|||||||
*/
|
*/
|
||||||
vad_handle_t vad_create(vad_mode_t vad_mode);
|
vad_handle_t vad_create(vad_mode_t vad_mode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Creates an instance to the VAD structure.
|
||||||
|
*
|
||||||
|
* @param vad_mode Sets the VAD operating mode.
|
||||||
|
* @param min_speech_len Minimum frame number of speech duration
|
||||||
|
* @param min_noise_len Minimum frame number of noise duration
|
||||||
|
* @return
|
||||||
|
* - NULL: Create failed
|
||||||
|
* - Others: The instance of VAD
|
||||||
|
*/
|
||||||
|
vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||||
*
|
*
|
||||||
|
|||||||
@ -9,7 +9,7 @@ typedef struct {
|
|||||||
|
|
||||||
void flite_g2p_result_free(flite_g2p_result *result);
|
void flite_g2p_result_free(flite_g2p_result *result);
|
||||||
|
|
||||||
flite_g2p_result *flite_g2p_get_result(char *grapheme);
|
flite_g2p_result *flite_g2p_get_result(const char *grapheme);
|
||||||
|
|
||||||
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
|
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
|
||||||
|
|
||||||
|
|||||||
@ -92,6 +92,10 @@ typedef struct {
|
|||||||
char *afe_ns_model_name;
|
char *afe_ns_model_name;
|
||||||
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||||
// otherwise, select channel number by wakenet
|
// otherwise, select channel number by wakenet
|
||||||
|
char *vad_model_name; // The model name of vad, support vadnet1 and vadnet1_small
|
||||||
|
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms
|
||||||
|
int vad_min_noise_ms; // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
|
||||||
|
bool vad_mute_playback; // If true, the playback will be muted for vad detection
|
||||||
} afe_config_t;
|
} afe_config_t;
|
||||||
|
|
||||||
|
|
||||||
@ -126,6 +130,10 @@ typedef struct {
|
|||||||
.afe_ns_mode = NS_MODE_SSP, \
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
.afe_ns_model_name = NULL, \
|
.afe_ns_model_name = NULL, \
|
||||||
.fixed_first_channel = true, \
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
}
|
}
|
||||||
#elif CONFIG_IDF_TARGET_ESP32P4
|
#elif CONFIG_IDF_TARGET_ESP32P4
|
||||||
#define AFE_CONFIG_DEFAULT() { \
|
#define AFE_CONFIG_DEFAULT() { \
|
||||||
@ -158,6 +166,10 @@ typedef struct {
|
|||||||
.afe_ns_mode = NS_MODE_SSP, \
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
.afe_ns_model_name = NULL, \
|
.afe_ns_model_name = NULL, \
|
||||||
.fixed_first_channel = true, \
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
}
|
}
|
||||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||||
#define AFE_CONFIG_DEFAULT() { \
|
#define AFE_CONFIG_DEFAULT() { \
|
||||||
@ -190,6 +202,10 @@ typedef struct {
|
|||||||
.afe_ns_mode = NS_MODE_SSP, \
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
.afe_ns_model_name = NULL, \
|
.afe_ns_model_name = NULL, \
|
||||||
.fixed_first_channel = true, \
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -29,6 +29,8 @@ typedef struct afe_fetch_result_t
|
|||||||
{
|
{
|
||||||
int16_t *data; // the data of audio.
|
int16_t *data; // the data of audio.
|
||||||
int data_size; // the size of data. The unit is byte.
|
int data_size; // the size of data. The unit is byte.
|
||||||
|
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
|
||||||
|
int vad_cache_size; // the size of vad_cache. The unit is byte.
|
||||||
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
|
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
|
||||||
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
|
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
|
||||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||||
@ -36,7 +38,7 @@ typedef struct afe_fetch_result_t
|
|||||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
|
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
|
||||||
afe_vad_state_t vad_state; // the value is afe_vad_state_t
|
afe_vad_state_t vad_state; // the value is afe_vad_state_t
|
||||||
int trigger_channel_id; // the channel index of output
|
int trigger_channel_id; // the channel index of output
|
||||||
int wake_word_length; // the length of wake word. It's unit is the number of samples.
|
int wake_word_length; // the length of wake word. The unit is the number of samples.
|
||||||
int ret_value; // the return state of fetch function
|
int ret_value; // the return state of fetch function
|
||||||
void* reserved; // reserved for future use
|
void* reserved; // reserved for future use
|
||||||
} afe_fetch_result_t;
|
} afe_fetch_result_t;
|
||||||
|
|||||||
@ -4,7 +4,6 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined CONFIG_USE_AFE
|
|
||||||
#include "esp_afe_sr_iface.h"
|
#include "esp_afe_sr_iface.h"
|
||||||
|
|
||||||
|
|
||||||
@ -19,17 +18,6 @@ extern const esp_afe_sr_iface_t esp_afe_vc_v1;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
|
|
||||||
#include "esp_afe_sr_iface.h"
|
|
||||||
extern const esp_afe_sr_iface_t esp_afe_sr_v1;
|
|
||||||
extern const esp_afe_sr_iface_t esp_afe_vc_v1;
|
|
||||||
#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
|
|
||||||
#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -25,22 +25,65 @@ extern "C" {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
||||||
* restrictive in reporting speech.
|
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
VAD_MODE_0 = 0,
|
VAD_MODE_0 = 0, // Normal
|
||||||
VAD_MODE_1,
|
VAD_MODE_1, // Aggressive
|
||||||
VAD_MODE_2,
|
VAD_MODE_2, // Very Aggressive
|
||||||
VAD_MODE_3,
|
VAD_MODE_3, // Very Very Aggressive
|
||||||
VAD_MODE_4
|
VAD_MODE_4 // Very Very Very Aggressive
|
||||||
} vad_mode_t;
|
} vad_mode_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
VAD_SILENCE = 0,
|
VAD_SILENCE = 0,
|
||||||
VAD_SPEECH
|
VAD_SPEECH = 1,
|
||||||
} vad_state_t;
|
} vad_state_t;
|
||||||
|
|
||||||
typedef void* vad_handle_t;
|
typedef struct vad_trigger_tag {
|
||||||
|
vad_state_t state;
|
||||||
|
unsigned int min_speech_len;
|
||||||
|
unsigned int noise_len;
|
||||||
|
unsigned int min_noise_len;
|
||||||
|
unsigned int speech_len;
|
||||||
|
} vad_trigger_t;
|
||||||
|
|
||||||
|
#define vad_MAX_LEN INT32_MAX - 1
|
||||||
|
/**
|
||||||
|
* @brief Allocate wakenet trigger
|
||||||
|
*
|
||||||
|
* @param min_speech_len Minimum frame number of speech duration
|
||||||
|
* @param min_noise_len Minimum frame number of noise duration
|
||||||
|
*
|
||||||
|
* @return Trigger pointer
|
||||||
|
**/
|
||||||
|
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Free wakenet trigger
|
||||||
|
**/
|
||||||
|
void vad_trigger_free(vad_trigger_t *trigger);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reset wakenet trigger
|
||||||
|
**/
|
||||||
|
void vad_trigger_reset(vad_trigger_t *trigger);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief detect activaty voice by trigger
|
||||||
|
**/
|
||||||
|
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vad_trigger_t *trigger;
|
||||||
|
void *vad_inst;
|
||||||
|
}vad_handle_with_trigger_t;
|
||||||
|
|
||||||
|
typedef vad_handle_with_trigger_t* vad_handle_t;
|
||||||
|
|
||||||
|
// typedef vad_handle_tag * vad_handle_t;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates an instance to the VAD structure.
|
* @brief Creates an instance to the VAD structure.
|
||||||
@ -53,6 +96,18 @@ typedef void* vad_handle_t;
|
|||||||
*/
|
*/
|
||||||
vad_handle_t vad_create(vad_mode_t vad_mode);
|
vad_handle_t vad_create(vad_mode_t vad_mode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Creates an instance to the VAD structure.
|
||||||
|
*
|
||||||
|
* @param vad_mode Sets the VAD operating mode.
|
||||||
|
* @param min_speech_len Minimum frame number of speech duration
|
||||||
|
* @param min_noise_len Minimum frame number of noise duration
|
||||||
|
* @return
|
||||||
|
* - NULL: Create failed
|
||||||
|
* - Others: The instance of VAD
|
||||||
|
*/
|
||||||
|
vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||||
*
|
*
|
||||||
|
|||||||
142
include/esp32p4/esp_vadn_iface.h
Normal file
142
include/esp32p4/esp_vadn_iface.h
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "esp_vad.h"
|
||||||
|
#include "stdint.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Opaque model data container
|
||||||
|
typedef struct model_iface_data_t model_iface_data_t;
|
||||||
|
|
||||||
|
// /**
|
||||||
|
// * @brief The state of vad
|
||||||
|
// */
|
||||||
|
// typedef enum {
|
||||||
|
// VAD_NOISE = -1, // Noise
|
||||||
|
// VADNET_STATE_SILENCE = 0, // Silence
|
||||||
|
// VAD_SPEECH = 1 // Speech
|
||||||
|
// } vad_state_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Easy function type to initialze a model instance with a detection mode
|
||||||
|
* and specified model name
|
||||||
|
*
|
||||||
|
* @param model_name The specified model name
|
||||||
|
* @param mode The voice activity detection mode
|
||||||
|
* @param channel_num The number of input audio channels
|
||||||
|
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
|
||||||
|
* speech
|
||||||
|
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
|
||||||
|
* noise
|
||||||
|
* @returns Handle to the model data
|
||||||
|
*/
|
||||||
|
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
|
||||||
|
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the amount of samples that need to be passed to the detect
|
||||||
|
* function
|
||||||
|
*
|
||||||
|
* Every speech recognition model processes a certain number of samples at the
|
||||||
|
* same time. This function can be used to query that amount. Note that the
|
||||||
|
* returned amount is in 16-bit samples, not in bytes.
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @return The amount of samples to feed the detect function
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the channel number of samples that need to be passed to the detect
|
||||||
|
* function
|
||||||
|
*
|
||||||
|
* Every speech recognition model processes a certain number of samples at the
|
||||||
|
* same time. This function can be used to query that amount. Note that the
|
||||||
|
* returned amount is in 16-bit samples, not in bytes.
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @return The amount of samples to feed the detect function
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the sample rate of the samples to feed to the detect function
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @return The sample rate, in hz
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the detection threshold to manually abjust the probability
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @param det_treshold The threshold to trigger wake words, the range of
|
||||||
|
* det_threshold is 0.5~0.9999
|
||||||
|
* @return 0: setting failed, 1: setting success
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the voice activity detection threshold
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @returns the detection threshold
|
||||||
|
*/
|
||||||
|
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Feed samples of an audio stream to the vad model and detect whether is
|
||||||
|
* voice.
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @param samples An array of 16-bit signed audio samples. The array size used
|
||||||
|
* can be queried by the get_samp_chunksize function.
|
||||||
|
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||||
|
* the index of the wake words.
|
||||||
|
*/
|
||||||
|
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the triggered channel index. Channel index starts from zero
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @return The channel index
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Clean all states of model
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
*/
|
||||||
|
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy a model object
|
||||||
|
*
|
||||||
|
* @param model Model object to destroy
|
||||||
|
*/
|
||||||
|
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This structure contains the functions used to do operations on a voice
|
||||||
|
* activity detection model.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
esp_vadn_iface_op_create_t create;
|
||||||
|
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||||
|
esp_vadn_iface_op_get_channel_num_t get_channel_num;
|
||||||
|
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
|
||||||
|
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
|
||||||
|
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
|
||||||
|
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||||
|
esp_vadn_iface_op_detect_t detect;
|
||||||
|
esp_vadn_iface_op_clean_t clean;
|
||||||
|
esp_vadn_iface_op_destroy_t destroy;
|
||||||
|
} esp_vadn_iface_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
22
include/esp32p4/esp_vadn_models.h
Normal file
22
include/esp32p4/esp_vadn_models.h
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "esp_vadn_iface.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// The prefix of vadnet model name is used to filter all wakenet from availabel models.
|
||||||
|
#define ESP_VADN_PREFIX "vadnet"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the wakenet handle from model name
|
||||||
|
*
|
||||||
|
* @param model_name The name of model
|
||||||
|
* @returns The handle of wakenet
|
||||||
|
*/
|
||||||
|
const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
@ -92,6 +92,10 @@ typedef struct {
|
|||||||
char *afe_ns_model_name;
|
char *afe_ns_model_name;
|
||||||
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
|
||||||
// otherwise, select channel number by wakenet
|
// otherwise, select channel number by wakenet
|
||||||
|
char *vad_model_name; // The model name of vad, support vadnet1 and vadnet1_small
|
||||||
|
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms
|
||||||
|
int vad_min_noise_ms; // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
|
||||||
|
bool vad_mute_playback; // If true, the playback will be muted for vad detection
|
||||||
} afe_config_t;
|
} afe_config_t;
|
||||||
|
|
||||||
|
|
||||||
@ -104,7 +108,7 @@ typedef struct {
|
|||||||
.voice_communication_init = false, \
|
.voice_communication_init = false, \
|
||||||
.voice_communication_agc_init = false, \
|
.voice_communication_agc_init = false, \
|
||||||
.voice_communication_agc_gain = 15, \
|
.voice_communication_agc_gain = 15, \
|
||||||
.vad_mode = VAD_MODE_3, \
|
.vad_mode = VAD_MODE_0, \
|
||||||
.wakenet_model_name = NULL, \
|
.wakenet_model_name = NULL, \
|
||||||
.wakenet_model_name_2 = NULL, \
|
.wakenet_model_name_2 = NULL, \
|
||||||
.wakenet_mode = DET_MODE_90, \
|
.wakenet_mode = DET_MODE_90, \
|
||||||
@ -126,6 +130,10 @@ typedef struct {
|
|||||||
.afe_ns_mode = NS_MODE_SSP, \
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
.afe_ns_model_name = NULL, \
|
.afe_ns_model_name = NULL, \
|
||||||
.fixed_first_channel = true, \
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
}
|
}
|
||||||
#elif CONFIG_IDF_TARGET_ESP32P4
|
#elif CONFIG_IDF_TARGET_ESP32P4
|
||||||
#define AFE_CONFIG_DEFAULT() { \
|
#define AFE_CONFIG_DEFAULT() { \
|
||||||
@ -136,7 +144,7 @@ typedef struct {
|
|||||||
.voice_communication_init = false, \
|
.voice_communication_init = false, \
|
||||||
.voice_communication_agc_init = false, \
|
.voice_communication_agc_init = false, \
|
||||||
.voice_communication_agc_gain = 15, \
|
.voice_communication_agc_gain = 15, \
|
||||||
.vad_mode = VAD_MODE_3, \
|
.vad_mode = VAD_MODE_0, \
|
||||||
.wakenet_model_name = NULL, \
|
.wakenet_model_name = NULL, \
|
||||||
.wakenet_model_name_2 = NULL, \
|
.wakenet_model_name_2 = NULL, \
|
||||||
.wakenet_mode = DET_MODE_90, \
|
.wakenet_mode = DET_MODE_90, \
|
||||||
@ -158,6 +166,10 @@ typedef struct {
|
|||||||
.afe_ns_mode = NS_MODE_SSP, \
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
.afe_ns_model_name = NULL, \
|
.afe_ns_model_name = NULL, \
|
||||||
.fixed_first_channel = true, \
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
}
|
}
|
||||||
#elif CONFIG_IDF_TARGET_ESP32S3
|
#elif CONFIG_IDF_TARGET_ESP32S3
|
||||||
#define AFE_CONFIG_DEFAULT() { \
|
#define AFE_CONFIG_DEFAULT() { \
|
||||||
@ -168,7 +180,7 @@ typedef struct {
|
|||||||
.voice_communication_init = false, \
|
.voice_communication_init = false, \
|
||||||
.voice_communication_agc_init = false, \
|
.voice_communication_agc_init = false, \
|
||||||
.voice_communication_agc_gain = 15, \
|
.voice_communication_agc_gain = 15, \
|
||||||
.vad_mode = VAD_MODE_3, \
|
.vad_mode = VAD_MODE_0, \
|
||||||
.wakenet_model_name = NULL, \
|
.wakenet_model_name = NULL, \
|
||||||
.wakenet_model_name_2 = NULL, \
|
.wakenet_model_name_2 = NULL, \
|
||||||
.wakenet_mode = DET_MODE_2CH_90, \
|
.wakenet_mode = DET_MODE_2CH_90, \
|
||||||
@ -190,6 +202,10 @@ typedef struct {
|
|||||||
.afe_ns_mode = NS_MODE_SSP, \
|
.afe_ns_mode = NS_MODE_SSP, \
|
||||||
.afe_ns_model_name = NULL, \
|
.afe_ns_model_name = NULL, \
|
||||||
.fixed_first_channel = true, \
|
.fixed_first_channel = true, \
|
||||||
|
.vad_model_name = NULL, \
|
||||||
|
.vad_min_speech_ms = 64, \
|
||||||
|
.vad_min_noise_ms = 256, \
|
||||||
|
.vad_mute_playback = false, \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -29,6 +29,8 @@ typedef struct afe_fetch_result_t
|
|||||||
{
|
{
|
||||||
int16_t *data; // the data of audio.
|
int16_t *data; // the data of audio.
|
||||||
int data_size; // the size of data. The unit is byte.
|
int data_size; // the size of data. The unit is byte.
|
||||||
|
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
|
||||||
|
int vad_cache_size; // the size of vad_cache. The unit is byte.
|
||||||
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
|
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
|
||||||
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
|
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
|
||||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||||
@ -36,7 +38,7 @@ typedef struct afe_fetch_result_t
|
|||||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
|
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
|
||||||
afe_vad_state_t vad_state; // the value is afe_vad_state_t
|
afe_vad_state_t vad_state; // the value is afe_vad_state_t
|
||||||
int trigger_channel_id; // the channel index of output
|
int trigger_channel_id; // the channel index of output
|
||||||
int wake_word_length; // the length of wake word. It's unit is the number of samples.
|
int wake_word_length; // the length of wake word. The unit is the number of samples.
|
||||||
int ret_value; // the return state of fetch function
|
int ret_value; // the return state of fetch function
|
||||||
void* reserved; // reserved for future use
|
void* reserved; // reserved for future use
|
||||||
} afe_fetch_result_t;
|
} afe_fetch_result_t;
|
||||||
|
|||||||
@ -4,7 +4,6 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined CONFIG_USE_AFE
|
|
||||||
#include "esp_afe_sr_iface.h"
|
#include "esp_afe_sr_iface.h"
|
||||||
|
|
||||||
|
|
||||||
@ -19,17 +18,6 @@ extern const esp_afe_sr_iface_t esp_afe_vc_v1;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
|
|
||||||
#include "esp_afe_sr_iface.h"
|
|
||||||
extern const esp_afe_sr_iface_t esp_afe_sr_v1;
|
|
||||||
extern const esp_afe_sr_iface_t esp_afe_vc_v1;
|
|
||||||
#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
|
|
||||||
#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -25,22 +25,65 @@ extern "C" {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
||||||
* restrictive in reporting speech.
|
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
VAD_MODE_0 = 0,
|
VAD_MODE_0 = 0, // Normal
|
||||||
VAD_MODE_1,
|
VAD_MODE_1, // Aggressive
|
||||||
VAD_MODE_2,
|
VAD_MODE_2, // Very Aggressive
|
||||||
VAD_MODE_3,
|
VAD_MODE_3, // Very Very Aggressive
|
||||||
VAD_MODE_4
|
VAD_MODE_4 // Very Very Very Aggressive
|
||||||
} vad_mode_t;
|
} vad_mode_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
VAD_SILENCE = 0,
|
VAD_SILENCE = 0,
|
||||||
VAD_SPEECH
|
VAD_SPEECH = 1,
|
||||||
} vad_state_t;
|
} vad_state_t;
|
||||||
|
|
||||||
typedef void* vad_handle_t;
|
typedef struct vad_trigger_tag {
|
||||||
|
vad_state_t state;
|
||||||
|
unsigned int min_speech_len;
|
||||||
|
unsigned int noise_len;
|
||||||
|
unsigned int min_noise_len;
|
||||||
|
unsigned int speech_len;
|
||||||
|
} vad_trigger_t;
|
||||||
|
|
||||||
|
#define vad_MAX_LEN INT32_MAX - 1
|
||||||
|
/**
|
||||||
|
* @brief Allocate wakenet trigger
|
||||||
|
*
|
||||||
|
* @param min_speech_len Minimum frame number of speech duration
|
||||||
|
* @param min_noise_len Minimum frame number of noise duration
|
||||||
|
*
|
||||||
|
* @return Trigger pointer
|
||||||
|
**/
|
||||||
|
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Free wakenet trigger
|
||||||
|
**/
|
||||||
|
void vad_trigger_free(vad_trigger_t *trigger);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reset wakenet trigger
|
||||||
|
**/
|
||||||
|
void vad_trigger_reset(vad_trigger_t *trigger);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief detect activaty voice by trigger
|
||||||
|
**/
|
||||||
|
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vad_trigger_t *trigger;
|
||||||
|
void *vad_inst;
|
||||||
|
}vad_handle_with_trigger_t;
|
||||||
|
|
||||||
|
typedef vad_handle_with_trigger_t* vad_handle_t;
|
||||||
|
|
||||||
|
// typedef vad_handle_tag * vad_handle_t;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates an instance to the VAD structure.
|
* @brief Creates an instance to the VAD structure.
|
||||||
@ -53,6 +96,18 @@ typedef void* vad_handle_t;
|
|||||||
*/
|
*/
|
||||||
vad_handle_t vad_create(vad_mode_t vad_mode);
|
vad_handle_t vad_create(vad_mode_t vad_mode);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Creates an instance to the VAD structure.
|
||||||
|
*
|
||||||
|
* @param vad_mode Sets the VAD operating mode.
|
||||||
|
* @param min_speech_len Minimum frame number of speech duration
|
||||||
|
* @param min_noise_len Minimum frame number of noise duration
|
||||||
|
* @return
|
||||||
|
* - NULL: Create failed
|
||||||
|
* - Others: The instance of VAD
|
||||||
|
*/
|
||||||
|
vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||||
*
|
*
|
||||||
|
|||||||
142
include/esp32s3/esp_vadn_iface.h
Normal file
142
include/esp32s3/esp_vadn_iface.h
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "esp_vad.h"
|
||||||
|
#include "stdint.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Opaque model data container
|
||||||
|
typedef struct model_iface_data_t model_iface_data_t;
|
||||||
|
|
||||||
|
// /**
|
||||||
|
// * @brief The state of vad
|
||||||
|
// */
|
||||||
|
// typedef enum {
|
||||||
|
// VAD_NOISE = -1, // Noise
|
||||||
|
// VADNET_STATE_SILENCE = 0, // Silence
|
||||||
|
// VAD_SPEECH = 1 // Speech
|
||||||
|
// } vad_state_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Easy function type to initialze a model instance with a detection mode
|
||||||
|
* and specified model name
|
||||||
|
*
|
||||||
|
* @param model_name The specified model name
|
||||||
|
* @param mode The voice activity detection mode
|
||||||
|
* @param channel_num The number of input audio channels
|
||||||
|
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
|
||||||
|
* speech
|
||||||
|
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
|
||||||
|
* noise
|
||||||
|
* @returns Handle to the model data
|
||||||
|
*/
|
||||||
|
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
|
||||||
|
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the amount of samples that need to be passed to the detect
|
||||||
|
* function
|
||||||
|
*
|
||||||
|
* Every speech recognition model processes a certain number of samples at the
|
||||||
|
* same time. This function can be used to query that amount. Note that the
|
||||||
|
* returned amount is in 16-bit samples, not in bytes.
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @return The amount of samples to feed the detect function
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the channel number of samples that need to be passed to the detect
|
||||||
|
* function
|
||||||
|
*
|
||||||
|
* Every speech recognition model processes a certain number of samples at the
|
||||||
|
* same time. This function can be used to query that amount. Note that the
|
||||||
|
* returned amount is in 16-bit samples, not in bytes.
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @return The amount of samples to feed the detect function
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the sample rate of the samples to feed to the detect function
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @return The sample rate, in hz
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the detection threshold to manually abjust the probability
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @param det_treshold The threshold to trigger wake words, the range of
|
||||||
|
* det_threshold is 0.5~0.9999
|
||||||
|
* @return 0: setting failed, 1: setting success
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the voice activity detection threshold
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @returns the detection threshold
|
||||||
|
*/
|
||||||
|
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Feed samples of an audio stream to the vad model and detect whether is
|
||||||
|
* voice.
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @param samples An array of 16-bit signed audio samples. The array size used
|
||||||
|
* can be queried by the get_samp_chunksize function.
|
||||||
|
* @return The index of wake words, return 0 if no wake word is detected, else
|
||||||
|
* the index of the wake words.
|
||||||
|
*/
|
||||||
|
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the triggered channel index. Channel index starts from zero
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
* @return The channel index
|
||||||
|
*/
|
||||||
|
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Clean all states of model
|
||||||
|
*
|
||||||
|
* @param model The model object to query
|
||||||
|
*/
|
||||||
|
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy a model object
|
||||||
|
*
|
||||||
|
* @param model Model object to destroy
|
||||||
|
*/
|
||||||
|
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This structure contains the functions used to do operations on a voice
|
||||||
|
* activity detection model.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
esp_vadn_iface_op_create_t create;
|
||||||
|
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||||
|
esp_vadn_iface_op_get_channel_num_t get_channel_num;
|
||||||
|
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
|
||||||
|
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
|
||||||
|
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
|
||||||
|
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
|
||||||
|
esp_vadn_iface_op_detect_t detect;
|
||||||
|
esp_vadn_iface_op_clean_t clean;
|
||||||
|
esp_vadn_iface_op_destroy_t destroy;
|
||||||
|
} esp_vadn_iface_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
22
include/esp32s3/esp_vadn_models.h
Normal file
22
include/esp32s3/esp_vadn_models.h
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "esp_vadn_iface.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// The prefix of vadnet model name is used to filter all wakenet from availabel models.
|
||||||
|
#define ESP_VADN_PREFIX "vadnet"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the wakenet handle from model name
|
||||||
|
*
|
||||||
|
* @param model_name The name of model
|
||||||
|
* @returns The handle of wakenet
|
||||||
|
*/
|
||||||
|
const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
lib/esp32p4/libvadnet.a
Normal file
BIN
lib/esp32p4/libvadnet.a
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
lib/esp32s3/libvadnet.a
Normal file
BIN
lib/esp32s3/libvadnet.a
Normal file
Binary file not shown.
Binary file not shown.
@ -28,6 +28,8 @@ def copy_wakenet_from_sdkconfig(model_path, sdkconfig_path, target_path):
|
|||||||
for label in f:
|
for label in f:
|
||||||
label = label.strip("\n")
|
label = label.strip("\n")
|
||||||
if 'CONFIG_SR_WN' in label and '#' not in label[0]:
|
if 'CONFIG_SR_WN' in label and '#' not in label[0]:
|
||||||
|
if '_NONE' in label:
|
||||||
|
continue
|
||||||
if '=' in label:
|
if '=' in label:
|
||||||
label = label.split("=")[0]
|
label = label.split("=")[0]
|
||||||
if '_MULTI' in label:
|
if '_MULTI' in label:
|
||||||
@ -113,13 +115,13 @@ def copy_vadnet_from_sdkconfig(model_path, sdkconfig_path, target_path):
|
|||||||
models_string = ''
|
models_string = ''
|
||||||
for label in f:
|
for label in f:
|
||||||
label = label.strip("\n")
|
label = label.strip("\n")
|
||||||
if 'CONFIG_SR_VADNET' in label and label[0] != '#':
|
if 'CONFIG_SR_VADN' in label and label[0] != '#':
|
||||||
models_string += label
|
models_string += label
|
||||||
|
|
||||||
models = []
|
models = []
|
||||||
if "CONFIG_SR_VADNET_MODLE_SMALL" in models_string:
|
if "CONFIG_SR_VADN_VADNET1_SMALL" in models_string:
|
||||||
models.append('vadnet1_small')
|
models.append('vadnet1_small')
|
||||||
elif "CONFIG_SR_VADNET_MODLE_MEDIUM" in models_string:
|
elif "CONFIG_SR_VADN_VADNET1_MEDIUM" in models_string:
|
||||||
models.append('vadnet1_medium')
|
models.append('vadnet1_medium')
|
||||||
|
|
||||||
for item in models:
|
for item in models:
|
||||||
|
|||||||
1
model/vadnet_model/vadnet1_medium/_MODEL_INFO_
Normal file
1
model/vadnet_model/vadnet1_medium/_MODEL_INFO_
Normal file
@ -0,0 +1 @@
|
|||||||
|
vadnet1_mediumv1_Speech_3_0.5_0.1
|
||||||
BIN
model/vadnet_model/vadnet1_medium/vadn1_data
Normal file
BIN
model/vadnet_model/vadnet1_medium/vadn1_data
Normal file
Binary file not shown.
BIN
model/vadnet_model/vadnet1_medium/vadn1_index
Normal file
BIN
model/vadnet_model/vadnet1_medium/vadn1_index
Normal file
Binary file not shown.
BIN
model/wakenet_model/wn9_nihaodameng.zip
Normal file
BIN
model/wakenet_model/wn9_nihaodameng.zip
Normal file
Binary file not shown.
1
model/wakenet_model/wn9_nihaodameng/_MODEL_INFO_
Normal file
1
model/wakenet_model/wn9_nihaodameng/_MODEL_INFO_
Normal file
@ -0,0 +1 @@
|
|||||||
|
wakenet9l_tts2h12_你好达蒙_3_0.634_0.640
|
||||||
BIN
model/wakenet_model/wn9_nihaodameng/wn9_data
Normal file
BIN
model/wakenet_model/wn9_nihaodameng/wn9_data
Normal file
Binary file not shown.
BIN
model/wakenet_model/wn9_nihaodameng/wn9_index
Normal file
BIN
model/wakenet_model/wn9_nihaodameng/wn9_index
Normal file
Binary file not shown.
@ -57,8 +57,16 @@ void check_chip_config(void)
|
|||||||
ESP_LOGW(TAG, "PSRAM freq should be 200MHz");
|
ESP_LOGW(TAG, "PSRAM freq should be 200MHz");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_ESP32P4_DATA_CACHE_128KB
|
||||||
|
ESP_LOGW(TAG, "Recommend data cache larger than 128KB");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_ESP32P4_DATA_CACHE_LINE_64B
|
||||||
|
ESP_LOGW(TAG, "Recommend data cache line larger than 64B");
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
ESP_LOGW(TAG, "ESP-SR-AFE only support ESP32/ESP32S3");
|
ESP_LOGW(TAG, "ESP-SR-AFE only support ESP32/ESP32S3/ESP32P4");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -476,7 +484,7 @@ char *get_id_name_cn(int i)
|
|||||||
|
|
||||||
char *get_id_name_en(int i)
|
char *get_id_name_en(int i)
|
||||||
{
|
{
|
||||||
#if defined CONFIG_USE_MULTINET && defined CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8
|
#if CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
return CONFIG_EN_SPEECH_COMMAND_ID0;
|
return CONFIG_EN_SPEECH_COMMAND_ID0;
|
||||||
} else if (i == 1) {
|
} else if (i == 1) {
|
||||||
|
|||||||
@ -23,6 +23,8 @@
|
|||||||
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4)
|
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4)
|
||||||
#include "esp_nsn_models.h"
|
#include "esp_nsn_models.h"
|
||||||
#include "esp_nsn_iface.h"
|
#include "esp_nsn_iface.h"
|
||||||
|
#include "esp_vadn_models.h"
|
||||||
|
#include "esp_vadn_iface.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ARRAY_SIZE_OFFSET 8 // Increase this if audio_sys_get_real_time_stats returns ESP_ERR_INVALID_SIZE
|
#define ARRAY_SIZE_OFFSET 8 // Increase this if audio_sys_get_real_time_stats returns ESP_ERR_INVALID_SIZE
|
||||||
@ -69,6 +71,10 @@ TEST_CASE(">>>>>>>> audio_front_end SR create/destroy API & memory leak <<<<<<<<
|
|||||||
int start_internal_size = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
|
int start_internal_size = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
|
||||||
srmodel_list_t *models = esp_srmodel_init("model");
|
srmodel_list_t *models = esp_srmodel_init("model");
|
||||||
char *model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
|
char *model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
|
||||||
|
char *vad_model_name = NULL;
|
||||||
|
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4)
|
||||||
|
vad_model_name = esp_srmodel_filter(models, ESP_VADN_PREFIX, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
esp_afe_sr_iface_t *afe_handle = (esp_afe_sr_iface_t *)&ESP_AFE_SR_HANDLE;
|
esp_afe_sr_iface_t *afe_handle = (esp_afe_sr_iface_t *)&ESP_AFE_SR_HANDLE;
|
||||||
afe_config_t afe_config = AFE_CONFIG_DEFAULT();
|
afe_config_t afe_config = AFE_CONFIG_DEFAULT();
|
||||||
@ -79,7 +85,10 @@ TEST_CASE(">>>>>>>> audio_front_end SR create/destroy API & memory leak <<<<<<<<
|
|||||||
afe_config.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM;
|
afe_config.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM;
|
||||||
afe_config.wakenet_model_name = model_name;
|
afe_config.wakenet_model_name = model_name;
|
||||||
afe_config.voice_communication_init = false;
|
afe_config.voice_communication_init = false;
|
||||||
|
afe_config.vad_model_name = vad_model_name;
|
||||||
|
if (vad_model_name) {
|
||||||
|
printf("vad_model_name:%s\n", vad_model_name);
|
||||||
|
}
|
||||||
|
|
||||||
// test model loading time
|
// test model loading time
|
||||||
struct timeval tv_start, tv_end;
|
struct timeval tv_start, tv_end;
|
||||||
@ -106,7 +115,11 @@ TEST_CASE(">>>>>>>> audio_front_end SR create/destroy API & memory leak <<<<<<<<
|
|||||||
printf("init partition ...\n");
|
printf("init partition ...\n");
|
||||||
models = esp_srmodel_init("model");
|
models = esp_srmodel_init("model");
|
||||||
model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
|
model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
|
||||||
|
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4)
|
||||||
|
vad_model_name = esp_srmodel_filter(models, ESP_VADN_PREFIX, NULL);
|
||||||
|
#endif
|
||||||
afe_config.wakenet_model_name = model_name;
|
afe_config.wakenet_model_name = model_name;
|
||||||
|
afe_config.vad_model_name = vad_model_name;
|
||||||
|
|
||||||
printf("create ...\n");
|
printf("create ...\n");
|
||||||
afe_data = afe_handle->create_from_config(&afe_config);
|
afe_data = afe_handle->create_from_config(&afe_config);
|
||||||
|
|||||||
@ -93,6 +93,7 @@ TEST_CASE("multinet cpu loading", "[mn]")
|
|||||||
struct timeval tv_start, tv_end;
|
struct timeval tv_start, tv_end;
|
||||||
gettimeofday(&tv_start, NULL);
|
gettimeofday(&tv_start, NULL);
|
||||||
esp_mn_state_t mn_state;
|
esp_mn_state_t mn_state;
|
||||||
|
multinet->print_active_speech_commands(model_data);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if ((chunks + 1)*audio_chunksize <= data_size) {
|
if ((chunks + 1)*audio_chunksize <= data_size) {
|
||||||
|
|||||||
@ -15,10 +15,7 @@ from pytest_embedded import Dut
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_multinet_s3(dut: Dut)-> None:
|
def test_multinet_s3(dut: Dut)-> None:
|
||||||
# dut.run_all_single_board_cases(group="mn")
|
dut.run_all_single_board_cases(group="mn")
|
||||||
dut.expect_exact('Press ENTER to see the list of tests.')
|
|
||||||
dut.write('[mn]')
|
|
||||||
dut.expect_unity_test_output(timeout = 1000)
|
|
||||||
|
|
||||||
@pytest.mark.target('esp32p4')
|
@pytest.mark.target('esp32p4')
|
||||||
@pytest.mark.env('esp32p4')
|
@pytest.mark.env('esp32p4')
|
||||||
@ -30,10 +27,7 @@ def test_multinet_s3(dut: Dut)-> None:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_multinet_p4(dut: Dut)-> None:
|
def test_multinet_p4(dut: Dut)-> None:
|
||||||
# dut.run_all_single_board_cases(group="mn")
|
dut.run_all_single_board_cases(group="mn")
|
||||||
dut.expect_exact('Press ENTER to see the list of tests.')
|
|
||||||
dut.write('[mn]')
|
|
||||||
dut.expect_unity_test_output(timeout = 1000)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.target('esp32s3')
|
@pytest.mark.target('esp32s3')
|
||||||
@ -46,10 +40,7 @@ def test_multinet_p4(dut: Dut)-> None:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_wakenet(dut: Dut)-> None:
|
def test_wakenet(dut: Dut)-> None:
|
||||||
# dut.run_all_single_board_cases(group="wn")
|
dut.run_all_single_board_cases(group="wn")
|
||||||
dut.expect_exact('Press ENTER to see the list of tests.')
|
|
||||||
dut.write('[wn]')
|
|
||||||
dut.expect_unity_test_output(timeout = 1000)
|
|
||||||
|
|
||||||
@pytest.mark.target('esp32p4')
|
@pytest.mark.target('esp32p4')
|
||||||
@pytest.mark.env('esp32p4')
|
@pytest.mark.env('esp32p4')
|
||||||
@ -61,10 +52,7 @@ def test_wakenet(dut: Dut)-> None:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_wakenet_p4(dut: Dut)-> None:
|
def test_wakenet_p4(dut: Dut)-> None:
|
||||||
# dut.run_all_single_board_cases(group="wn")
|
dut.run_all_single_board_cases(group="wn")
|
||||||
dut.expect_exact('Press ENTER to see the list of tests.')
|
|
||||||
dut.write('[wn]')
|
|
||||||
dut.expect_unity_test_output(timeout = 1000)
|
|
||||||
|
|
||||||
@pytest.mark.target('esp32s3')
|
@pytest.mark.target('esp32s3')
|
||||||
@pytest.mark.env('esp32s3')
|
@pytest.mark.env('esp32s3')
|
||||||
@ -72,13 +60,11 @@ def test_wakenet_p4(dut: Dut)-> None:
|
|||||||
'config',
|
'config',
|
||||||
[
|
[
|
||||||
'wn9_hilexin',
|
'wn9_hilexin',
|
||||||
|
'vadnet',
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_sr_afe(dut: Dut)-> None:
|
def test_sr_afe(dut: Dut)-> None:
|
||||||
# dut.run_all_single_board_cases(group="afe")
|
dut.run_all_single_board_cases(group="afe_sr", timeout=100000)
|
||||||
dut.expect_exact('Press ENTER to see the list of tests.')
|
|
||||||
dut.write('[afe_sr]')
|
|
||||||
dut.expect_unity_test_output(timeout = 1000)
|
|
||||||
|
|
||||||
@pytest.mark.target('esp32p4')
|
@pytest.mark.target('esp32p4')
|
||||||
@pytest.mark.env('esp32p4')
|
@pytest.mark.env('esp32p4')
|
||||||
@ -89,10 +75,7 @@ def test_sr_afe(dut: Dut)-> None:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_sr_afe_p4(dut: Dut)-> None:
|
def test_sr_afe_p4(dut: Dut)-> None:
|
||||||
# dut.run_all_single_board_cases(group="afe")
|
dut.run_all_single_board_cases(group="afe_sr", timeout=100000)
|
||||||
dut.expect_exact('Press ENTER to see the list of tests.')
|
|
||||||
dut.write('[afe_sr]')
|
|
||||||
dut.expect_unity_test_output(timeout = 1000)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.target('esp32s3')
|
@pytest.mark.target('esp32s3')
|
||||||
@ -104,10 +87,7 @@ def test_sr_afe_p4(dut: Dut)-> None:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_vc_afe(dut: Dut)-> None:
|
def test_vc_afe(dut: Dut)-> None:
|
||||||
# dut.run_all_single_board_cases(group="afe")
|
dut.run_all_single_board_cases(group="afe_vc", timeout=100000)
|
||||||
dut.expect_exact('Press ENTER to see the list of tests.')
|
|
||||||
dut.write('[afe_vc]')
|
|
||||||
dut.expect_unity_test_output(timeout = 100000)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.target('esp32p4')
|
@pytest.mark.target('esp32p4')
|
||||||
@ -119,7 +99,4 @@ def test_vc_afe(dut: Dut)-> None:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_vc_afe_p4(dut: Dut)-> None:
|
def test_vc_afe_p4(dut: Dut)-> None:
|
||||||
# dut.run_all_single_board_cases(group="afe")
|
dut.run_all_single_board_cases(group="afe_vc", timeout=100000)
|
||||||
dut.expect_exact('Press ENTER to see the list of tests.')
|
|
||||||
dut.write('[afe_vc]')
|
|
||||||
dut.expect_unity_test_output(timeout = 100000)
|
|
||||||
@ -6,6 +6,7 @@ CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
|||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
|
CONFIG_SR_WN_WN9_HILEXIN=y
|
||||||
CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8=y
|
CONFIG_SR_MN_CN_MULTINET5_RECOGNITION_QUANT8=y
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
CONFIG_SPIRAM_MODE_OCT=y
|
CONFIG_SPIRAM_MODE_OCT=y
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
|
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
|
||||||
#
|
#
|
||||||
CONFIG_IDF_TARGET="esp32s3"
|
CONFIG_IDF_TARGET="esp32s3"
|
||||||
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
||||||
@ -7,7 +7,6 @@ CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
|||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
CONFIG_SR_WN_WN9_HIESP=y
|
CONFIG_SR_WN_WN9_HIESP=y
|
||||||
CONFIG_SR_MN_CN_NONE=y
|
|
||||||
CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8=y
|
CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8=y
|
||||||
CONFIG_ESP_PHY_REDUCE_TX_POWER=y
|
CONFIG_ESP_PHY_REDUCE_TX_POWER=y
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
|
|||||||
@ -1,11 +1,13 @@
|
|||||||
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
|
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
|
||||||
#
|
#
|
||||||
CONFIG_IDF_TARGET="esp32s3"
|
CONFIG_IDF_TARGET="esp32s3"
|
||||||
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
|
CONFIG_SR_WN_WN9_HILEXIN=y
|
||||||
|
CONFIG_SR_MN_CN_MULTINET6_QUANT=y
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
CONFIG_SPIRAM_MODE_OCT=y
|
CONFIG_SPIRAM_MODE_OCT=y
|
||||||
CONFIG_SPIRAM_SPEED_80M=y
|
CONFIG_SPIRAM_SPEED_80M=y
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
|
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
|
||||||
#
|
#
|
||||||
CONFIG_IDF_TARGET="esp32s3"
|
CONFIG_IDF_TARGET="esp32s3"
|
||||||
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
CONFIG_SR_MN_CN_NONE=y
|
CONFIG_SR_WN_WN9_HIESP=y
|
||||||
CONFIG_SR_MN_EN_MULTINET6_QUANT=y
|
CONFIG_SR_MN_EN_MULTINET6_QUANT=y
|
||||||
CONFIG_ESP_PHY_REDUCE_TX_POWER=y
|
CONFIG_ESP_PHY_REDUCE_TX_POWER=y
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
|
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
|
||||||
#
|
#
|
||||||
CONFIG_IDF_TARGET="esp32s3"
|
CONFIG_IDF_TARGET="esp32s3"
|
||||||
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
CONFIG_SR_MN_CN_NONE=y
|
CONFIG_SR_WN_WN9_HIESP=y
|
||||||
CONFIG_SR_MN_EN_MULTINET7_QUANT=y
|
CONFIG_SR_MN_EN_MULTINET7_QUANT=y
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
CONFIG_SPIRAM_MODE_OCT=y
|
CONFIG_SPIRAM_MODE_OCT=y
|
||||||
|
|||||||
@ -1,13 +1,12 @@
|
|||||||
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
|
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
|
||||||
#
|
#
|
||||||
CONFIG_IDF_TARGET="esp32s3"
|
CONFIG_IDF_TARGET="esp32s3"
|
||||||
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
CONFIG_USE_NSNET=y
|
CONFIG_SR_NSN_NSNET2=y
|
||||||
CONFIG_USE_MULTINET=n
|
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
CONFIG_SPIRAM_MODE_OCT=y
|
CONFIG_SPIRAM_MODE_OCT=y
|
||||||
CONFIG_SPIRAM_SPEED_80M=y
|
CONFIG_SPIRAM_SPEED_80M=y
|
||||||
|
|||||||
@ -5,6 +5,7 @@ CONFIG_IDF_TARGET="esp32p4"
|
|||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
|
CONFIG_SR_WN_WN9_HILEXIN=y
|
||||||
CONFIG_SR_MN_CN_MULTINET7_QUANT=y
|
CONFIG_SR_MN_CN_MULTINET7_QUANT=y
|
||||||
CONFIG_COMPILER_OPTIMIZATION_PERF=y
|
CONFIG_COMPILER_OPTIMIZATION_PERF=y
|
||||||
CONFIG_ESP32P4_REV_MIN_0=y
|
CONFIG_ESP32P4_REV_MIN_0=y
|
||||||
|
|||||||
@ -5,9 +5,8 @@ CONFIG_IDF_TARGET="esp32p4"
|
|||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
CONFIG_USE_NSNET=y
|
|
||||||
CONFIG_SR_WN_WN9_HIESP=y
|
CONFIG_SR_WN_WN9_HIESP=y
|
||||||
CONFIG_USE_MULTINET=n
|
CONFIG_SR_NSN_NSNET2=y
|
||||||
CONFIG_COMPILER_OPTIMIZATION_PERF=y
|
CONFIG_COMPILER_OPTIMIZATION_PERF=y
|
||||||
CONFIG_ESP32P4_REV_MIN_0=y
|
CONFIG_ESP32P4_REV_MIN_0=y
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
|
|||||||
24
test_apps/esp-sr/sdkconfig.ci.vadnet
Normal file
24
test_apps/esp-sr/sdkconfig.ci.vadnet
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
|
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
|
||||||
|
#
|
||||||
|
CONFIG_IDF_TARGET="esp32s3"
|
||||||
|
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
||||||
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
|
CONFIG_SR_VADN_VADNET1_MEDIUM=y
|
||||||
|
CONFIG_SR_WN_WN9_HILEXIN=y
|
||||||
|
CONFIG_SPIRAM=y
|
||||||
|
CONFIG_SPIRAM_MODE_OCT=y
|
||||||
|
CONFIG_SPIRAM_SPEED_80M=y
|
||||||
|
CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
|
||||||
|
CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
|
||||||
|
CONFIG_ESP32S3_DATA_CACHE_64KB=y
|
||||||
|
CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
|
||||||
|
CONFIG_ESP_MAIN_TASK_STACK_SIZE=8192
|
||||||
|
CONFIG_ESP_WIFI_GMAC_SUPPORT=n
|
||||||
|
CONFIG_FREERTOS_VTASKLIST_INCLUDE_COREID=y
|
||||||
|
CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS=y
|
||||||
|
CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744
|
||||||
|
CONFIG_LWIP_TCP_WND_DEFAULT=5744
|
||||||
|
CONFIG_UNITY_CRITICAL_LEAK_LEVEL_GENERAL=1024
|
||||||
@ -1,13 +1,12 @@
|
|||||||
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
|
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
|
||||||
#
|
#
|
||||||
CONFIG_IDF_TARGET="esp32s3"
|
CONFIG_IDF_TARGET="esp32s3"
|
||||||
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
CONFIG_USE_NSNET=y
|
CONFIG_SR_WN_WN9_HILEXIN=y
|
||||||
CONFIG_USE_MULTINET=n
|
|
||||||
CONFIG_ESP_PHY_REDUCE_TX_POWER=y
|
CONFIG_ESP_PHY_REDUCE_TX_POWER=y
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
CONFIG_SPIRAM_MODE_OCT=y
|
CONFIG_SPIRAM_MODE_OCT=y
|
||||||
|
|||||||
@ -1,13 +1,10 @@
|
|||||||
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
|
# Espressif IoT Development Framework (ESP-IDF) 5.3.1 Project Minimal Configuration
|
||||||
#
|
#
|
||||||
CONFIG_IDF_TARGET="esp32p4"
|
CONFIG_IDF_TARGET="esp32p4"
|
||||||
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
|
||||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
|
||||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||||
CONFIG_USE_AFE=n
|
|
||||||
CONFIG_USE_WAKENET=n
|
|
||||||
CONFIG_USE_MULTINET=n
|
|
||||||
CONFIG_COMPILER_OPTIMIZATION_PERF=y
|
CONFIG_COMPILER_OPTIMIZATION_PERF=y
|
||||||
CONFIG_ESP32P4_REV_MIN_0=y
|
CONFIG_ESP32P4_REV_MIN_0=y
|
||||||
CONFIG_SPIRAM=y
|
CONFIG_SPIRAM=y
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
# This file was generated using idf.py save-defconfig. It can be edited manually.
|
||||||
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
|
# Espressif IoT Development Framework (ESP-IDF) 5.3.1 Project Minimal Configuration
|
||||||
#
|
#
|
||||||
CONFIG_IDF_TARGET="esp32s3"
|
CONFIG_IDF_TARGET="esp32s3"
|
||||||
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
|
||||||
@ -13,6 +13,9 @@ CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
|
|||||||
CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
|
CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
|
||||||
CONFIG_ESP32S3_DATA_CACHE_64KB=y
|
CONFIG_ESP32S3_DATA_CACHE_64KB=y
|
||||||
CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
|
CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
|
||||||
|
CONFIG_ESP_SYSTEM_ALLOW_RTC_FAST_MEM_AS_HEAP=n
|
||||||
|
CONFIG_ESP_INT_WDT=n
|
||||||
|
CONFIG_ESP_TASK_WDT_EN=n
|
||||||
CONFIG_ESP_WIFI_GMAC_SUPPORT=n
|
CONFIG_ESP_WIFI_GMAC_SUPPORT=n
|
||||||
CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744
|
CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744
|
||||||
CONFIG_LWIP_TCP_WND_DEFAULT=5744
|
CONFIG_LWIP_TCP_WND_DEFAULT=5744
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user