feat(esp32p4): add vadnet1

This commit is contained in:
xysun 2024-12-31 20:26:04 +08:00
parent 404fa46e38
commit d72ed551cb
21 changed files with 296 additions and 208 deletions

View File

@ -13,14 +13,9 @@ choice MODEL_DATA_PATH
endchoice
config USE_AFE
bool "use afe"
default "y"
choice AFE_INTERFACE_SEL
prompt "Afe interface"
default AFE_INTERFACE_V1
depends on USE_AFE
help
Select the afe interface to be used.
@ -29,187 +24,60 @@ choice AFE_INTERFACE_SEL
endchoice
config USE_NSNET
bool "use nsnet"
default "n"
choice SR_NSN_MODEL_LOAD
prompt "Select deep noise suppression"
default SR_NSN_NSNET2
depends on USE_NSNET
prompt "Select noise suppression model"
default SR_NSN_WEBRTC
help
Select the deep noise suppression to be loaded.
Select the noise suppression model to be loaded.
config SR_NSN_NONE
bool "None"
config SR_NSN_WEBRTC
bool "noise suppression (WebRTC)"
config SR_NSN_NSNET1
bool "Deep noise suppression v1 (nsnet1)"
depends on IDF_TARGET_ESP32S3
config SR_NSN_NSNET2
bool "Deep noise suppression v2 (nsnet2)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
endchoice
config USE_WAKENET
bool "use wakenet"
default "y"
choice SR_VAD_MODEL_LOAD
prompt "Select voice activity detection"
default SR_VADNET_WEBRTC
help
Select the vad model to be loaded.
config SR_VAD_WEBRTC
bool "voice activity detection (WebRTC)"
config SR_VADNET1_MODLE_MEDIUM
bool "voice activity detection (vadnet1 medium)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
endchoice
choice SR_WN_MODEL_LOAD
prompt "Select wake words"
default SR_WN_WN9_HILEXIN
depends on USE_WAKENET
default SR_WN_WN5_HILEXIN
depends on IDF_TARGET_ESP32
help
Select the Wake Words to be loaded.
config SR_WN_WN5_HILEXIN
bool "Hi,乐鑫 (wn5_hilexin)"
depends on IDF_TARGET_ESP32
bool "Hi,Lexin (wn5_hilexin)"
config SR_WN_WN5X3_HILEXIN
bool "Hi,乐鑫 (wn5_hilexinX3)"
depends on IDF_TARGET_ESP32
bool "Hi,Lexin (wn5_hilexinX3)"
config SR_WN_WN5_NIHAOXIAOZHI
bool "你好小智 (wn5_nihaoxiaozhi)"
depends on IDF_TARGET_ESP32
bool "nihaoxiaozhi (wn5_nihaoxiaozhi)"
config SR_WN_WN5X3_NIHAOXIAOZHI
bool "你好小智 (wn5_nihaoxiaozhiX3)"
depends on IDF_TARGET_ESP32
bool "nihaoxiaozhi (wn5_nihaoxiaozhiX3)"
config SR_WN_WN5X3_NIHAOXIAOXIN
bool "你好小鑫 (wn5_nihaoxiaoxinX3)"
depends on IDF_TARGET_ESP32
config SR_WN_WN8_ALEXA
bool "Alexa (wn8_alexa)"
depends on IDF_TARGET_ESP32S3
config SR_WN_WN9_HILEXIN
bool "Hi,乐鑫 (wn9_hilexin)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_XIAOAITONGXUE
bool "小爱同学 (wn9_xiaoaitongxue)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_ALEXA
bool "Alexa (wn9_alexa)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HIESP
bool "Hi,ESP (wn9_hiesp)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HIMFIVE
bool "Hi,M Five (wn9_himfive)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_NIHAOXIAOZHI_TTS
bool "你好小智 (wn9_nihaoxiaozhi_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_JARVIS_TTS
bool "Jarvis (wn9_jarvis_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_COMPUTER_TTS
bool "computer (wn9_computer_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HEYWILLOW_TTS
bool "Hey,Willow (wn9_heywillow_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_SOPHIA_TTS
bool "Sophia (wn9_sophia_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_NIHAOXIAOXIN_TTS
bool "你好小鑫 (wn9_nihaoxiaoxin_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_XIAOMEITONGXUE_TTS
bool "小美同学 (wn9_xiaomeitongxue_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HIXIAOXING_TTS
bool "Hi,小星 (wn9_hixiaoxing_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_MYCROFT_TTS
bool "Mycroft (wn9_mycroft_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HEYPRINTER_TTS
bool "Hey,Printer (wn9_heyprinter_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_XIAOLONGXIAOLONG_TTS
bool "小龙小龙 (wn9_xiaolongxiaolong_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_MIAOMIAOTONGXUE_TTS
bool "喵喵同学 (wn9_miaomiaotongxue_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HIJOY_TTS
bool "Hi,Joy (wn9_hijoy_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HILILI_TTS
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HITELLY_TTS
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HEYWANDA_TTS
bool "Hey,Wanda (wn9_heywanda_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HIMIAOMIAO_TTS
bool "Hi,喵喵 (wn9_himiaomiao_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_XIAOBINXIAOBIN_TTS
bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HAIXIAOWU_TTS
bool "Hi,小巫 (wn9_haixiaowu_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_ASTROLABE_TTS
bool "Astrolabe (wn9_astrolabe_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_XIAOYAXIAOYA_TTS2
bool "小鸭小鸭 (wn9_xiaoyaxiaoya_tts2)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HIJASON_TTS2
bool "Hi,Jason (wn9_hijason_tts2)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_LINAIBAN_TTS2
bool "璃奈板 (wn9_linaiban_tts2)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_CUSTOMWORD
bool "customized word (wn9_customword)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_LOAD_MULIT_WORD
bool "Load Multiple Wake Words"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
bool "nihaoxiaoxin (wn5_nihaoxiaoxinX3)"
endchoice
menu "Load Multiple Wake Words"
depends on SR_WN_LOAD_MULIT_WORD
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
config SR_WN_WN9_HILEXIN_MULTI
bool "Hi,乐鑫 (wn9_hilexin)"
@ -241,94 +109,90 @@ menu "Load Multiple Wake Words"
config SR_WN_WN9_HEYWILLOW_TTS_MULTI
bool "Hey,Willow (wn9_heywillow_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_SOPHIA_TTS_MULTI
bool "Sophia (wn9_sophia_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_NIHAOXIAOXIN_TTS_MULTI
bool "你好小鑫 (wn9_nihaoxiaoxin_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_XIAOMEITONGXUE_TTS_MULTI
bool "小美同学 (wn9_xiaomeitongxue_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_HEYPRINTER_TTS_MULTI
bool "Hey,Printer (wn9_heyprinter_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_XIAOLONGXIAOLONG_TTS_MULTI
bool "小龙小龙 (wn9_xiaolongxiaolong_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_MIAOMIAOTONGXUE_TTS_MULTI
bool "喵喵同学 (wn9_miaomiaotongxue_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_HEYWANDA_TTS_MULTI
bool "Hey,Wanda (wn9_heywanda_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_HIMIAOMIAO_TTS_MULTI
bool "Hi,喵喵 (wn9_himiaomiao_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_MYCROFT_TTS_MULTI
bool "Mycroft (wn9_mycroft_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_HIJOY_TTS_MULTI
bool "Hi,Joy (wn9_hijoy_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_HILILI_TTS_MULTI
bool "Hi,Lily/Hi,莉莉 (wn9_hilili_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_HITELLY_TTS_MULTI
bool "Hi,Telly/Hi,泰力 (wn9_hitelly_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_XIAOBINXIAOBIN_TTS_MULTI
bool "小滨小滨/小冰小冰 (wn9_xiaobinxiaobin_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_HAIXIAOWU_TTS_MULTI
bool "Hi,小巫 (wn9_haixiaowu_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_ASTROLABE_TTS_MULTI
bool "Astrolabe (wn9_astrolabe_tts)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_XIAOYAXIAOYA_TTS2_MULTI
bool "小鸭小鸭 (wn9_xiaoyaxiaoya_tts2)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_HIJASON_TTS2_MULTI
bool "Hi,Jason (wn9_hijason_tts2)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
config SR_WN_WN9_LINAIBAN_TTS2_MULTI
bool "璃奈板 (wn9_linaiban_tts2)"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
default False
endmenu
config USE_MULTINET
bool "use multinet"
default "y"
choice CHINESE_SR_MN_MODEL_SEL
prompt "Chinese Speech Commands Model"
default SR_MN_CN_MULTINET6_QUANT
depends on USE_MULTINET
default SR_MN_CN_NONE
help
Select the Wake Word Engine to be used.
Select the Chinese Speech Commands Model.
config SR_MN_CN_NONE
bool "None"
@ -362,9 +226,8 @@ endchoice
choice ENGLISH_SR_MN_MODEL_SEL
prompt "English Speech Commands Model"
default SR_MN_EN_NONE
depends on USE_MULTINET
help
Select the Wake Word Engine to be used.
Select the English Speech Commands Model.
config SR_MN_EN_NONE
bool "None"

View File

@ -92,6 +92,10 @@ typedef struct {
char *afe_ns_model_name;
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
// otherwise, select channel number by wakenet
char *vad_model_name; // The model name of vad, support vadnet1 and vadnet1_small
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms
int vad_min_noise_ms; // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
bool vad_mute_playback; // If true, the playback will be muted for vad detection
} afe_config_t;
@ -126,6 +130,10 @@ typedef struct {
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
.vad_model_name = NULL, \
.vad_min_speech_ms = 64, \
.vad_min_noise_ms = 256, \
.vad_mute_playback = false, \
}
#elif CONFIG_IDF_TARGET_ESP32P4
#define AFE_CONFIG_DEFAULT() { \
@ -158,6 +166,10 @@ typedef struct {
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
.vad_model_name = NULL, \
.vad_min_speech_ms = 64, \
.vad_min_noise_ms = 256, \
.vad_mute_playback = false, \
}
#elif CONFIG_IDF_TARGET_ESP32S3
#define AFE_CONFIG_DEFAULT() { \
@ -190,6 +202,10 @@ typedef struct {
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
.vad_model_name = NULL, \
.vad_min_speech_ms = 64, \
.vad_min_noise_ms = 256, \
.vad_mute_playback = false, \
}
#endif

View File

@ -29,6 +29,8 @@ typedef struct afe_fetch_result_t
{
int16_t *data; // the data of audio.
int data_size; // the size of data. The unit is byte.
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
int vad_cache_size; // the size of vad_cache. The unit is byte.
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
wakenet_state_t wakeup_state; // the value is wakenet_state_t
@ -36,7 +38,7 @@ typedef struct afe_fetch_result_t
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
afe_vad_state_t vad_state; // the value is afe_vad_state_t
int trigger_channel_id; // the channel index of output
int wake_word_length; // the length of wake word. It's unit is the number of samples.
int wake_word_length; // the length of wake word. The unit is the number of samples.
int ret_value; // the return state of fetch function
void* reserved; // reserved for future use
} afe_fetch_result_t;

View File

@ -4,7 +4,6 @@
extern "C" {
#endif
#if defined CONFIG_USE_AFE
#include "esp_afe_sr_iface.h"
@ -19,17 +18,6 @@ extern const esp_afe_sr_iface_t esp_afe_vc_v1;
#endif
#else
#include "esp_afe_sr_iface.h"
extern const esp_afe_sr_iface_t esp_afe_sr_v1;
extern const esp_afe_sr_iface_t esp_afe_vc_v1;
#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
#endif
#ifdef __cplusplus
}
#endif

View File

@ -25,22 +25,65 @@ extern "C" {
/**
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
* restrictive in reporting speech.
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
*/
typedef enum {
VAD_MODE_0 = 0,
VAD_MODE_1,
VAD_MODE_2,
VAD_MODE_3,
VAD_MODE_4
VAD_MODE_0 = 0, // Normal
VAD_MODE_1, // Aggressive
VAD_MODE_2, // Very Aggressive
VAD_MODE_3, // Very Very Aggressive
VAD_MODE_4 // Very Very Very Aggressive
} vad_mode_t;
typedef enum {
VAD_SILENCE = 0,
VAD_SPEECH
VAD_SPEECH = 1,
} vad_state_t;
typedef void* vad_handle_t;
typedef struct vad_trigger_tag {
vad_state_t state;
unsigned int min_speech_len;
unsigned int noise_len;
unsigned int min_noise_len;
unsigned int speech_len;
} vad_trigger_t;
#define vad_MAX_LEN INT32_MAX - 1
/**
* @brief Allocate wakenet trigger
*
* @param min_speech_len Minimum frame number of speech duration
* @param min_noise_len Minimum frame number of noise duration
*
* @return Trigger pointer
**/
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
/**
* @brief Free wakenet trigger
**/
void vad_trigger_free(vad_trigger_t *trigger);
/**
* @brief Reset wakenet trigger
**/
void vad_trigger_reset(vad_trigger_t *trigger);
/**
* @brief detect activaty voice by trigger
**/
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
typedef struct {
vad_trigger_t *trigger;
void *vad_inst;
}vad_handle_with_trigger_t;
typedef vad_handle_with_trigger_t* vad_handle_t;
// typedef vad_handle_tag * vad_handle_t;
/**
* @brief Creates an instance to the VAD structure.
@ -53,6 +96,18 @@ typedef void* vad_handle_t;
*/
vad_handle_t vad_create(vad_mode_t vad_mode);
/**
* @brief Creates an instance to the VAD structure.
*
* @param vad_mode Sets the VAD operating mode.
* @param min_speech_len Minimum frame number of speech duration
* @param min_noise_len Minimum frame number of noise duration
* @return
* - NULL: Create failed
* - Others: The instance of VAD
*/
vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
/**
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
*

View File

@ -0,0 +1,142 @@
#pragma once
#include "esp_vad.h"
#include "stdint.h"
#ifdef __cplusplus
extern "C" {
#endif
// Opaque model data container
typedef struct model_iface_data_t model_iface_data_t;
// /**
// * @brief The state of vad
// */
// typedef enum {
// VAD_NOISE = -1, // Noise
// VADNET_STATE_SILENCE = 0, // Silence
// VAD_SPEECH = 1 // Speech
// } vad_state_t;
/**
* @brief Easy function type to initialze a model instance with a detection mode
* and specified model name
*
* @param model_name The specified model name
* @param mode The voice activity detection mode
* @param channel_num The number of input audio channels
* @param min_speech_ms The minimum duration of speech in ms to trigger vad
* speech
* @param min_noise_ms The minimum duration of noise in ms to trigger vad
* noise
* @returns Handle to the model data
*/
typedef model_iface_data_t *(*esp_vadn_iface_op_create_t)(
const void *model_name, vad_mode_t mode, int channel_num, int min_speech_ms, int min_noise_ms);
/**
* @brief Get the amount of samples that need to be passed to the detect
* function
*
* Every speech recognition model processes a certain number of samples at the
* same time. This function can be used to query that amount. Note that the
* returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_vadn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Get the channel number of samples that need to be passed to the detect
* function
*
* Every speech recognition model processes a certain number of samples at the
* same time. This function can be used to query that amount. Note that the
* returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_vadn_iface_op_get_channel_num_t)(model_iface_data_t *model);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_vadn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger wake words, the range of
* det_threshold is 0.5~0.9999
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_vadn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
/**
* @brief Get the voice activity detection threshold
*
* @param model The model object to query
* @returns the detection threshold
*/
typedef float (*esp_vadn_iface_op_get_det_threshold_t)(model_iface_data_t *model);
/**
* @brief Feed samples of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param samples An array of 16-bit signed audio samples. The array size used
* can be queried by the get_samp_chunksize function.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef vad_state_t (*esp_vadn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Get the triggered channel index. Channel index starts from zero
*
* @param model The model object to query
* @return The channel index
*/
typedef int (*esp_vadn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
/**
* @brief Clean all states of model
*
* @param model The model object to query
*/
typedef void (*esp_vadn_iface_op_clean_t)(model_iface_data_t *model);
/**
* @brief Destroy a model object
*
* @param model Model object to destroy
*/
typedef void (*esp_vadn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* This structure contains the functions used to do operations on a voice
* activity detection model.
*/
typedef struct {
esp_vadn_iface_op_create_t create;
esp_vadn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_vadn_iface_op_get_channel_num_t get_channel_num;
esp_vadn_iface_op_get_samp_rate_t get_samp_rate;
esp_vadn_iface_op_set_det_threshold_t set_det_threshold;
esp_vadn_iface_op_get_det_threshold_t get_det_threshold;
esp_vadn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_vadn_iface_op_detect_t detect;
esp_vadn_iface_op_clean_t clean;
esp_vadn_iface_op_destroy_t destroy;
} esp_vadn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,22 @@
#pragma once
#include "esp_vadn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
// The prefix of vadnet model name is used to filter all wakenet from availabel models.
#define ESP_VADNET_PREFIX "vadnet"
/**
* @brief Get the wakenet handle from model name
*
* @param model_name The name of model
* @returns The handle of wakenet
*/
const esp_vadn_iface_t *esp_vadn_handle_from_name(const char *model_name);
#ifdef __cplusplus
}
#endif

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
lib/esp32p4/libvadnet.a Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1 +1 @@
vadnet1_medium50k_Speech_5_0.849_0.573
vadnet1_50k_Speech_3_0.5_0.1