feat: update esp32 interface

This commit is contained in:
xysun 2025-01-02 20:34:03 +08:00
parent fd4165653f
commit 3d97b360aa
24 changed files with 160 additions and 65 deletions

View File

@ -230,7 +230,7 @@ build_esp_sr_html:
parallel:
matrix:
- DOCLANG: ["en", "zh_CN"]
DOCTGT: ["esp32", "esp32s3", "esp32p4"]
DOCTGT: ["esp32", "esp32s3"]
build_esp_sr_pdf:
extends:
@ -252,7 +252,7 @@ build_esp_sr_pdf:
parallel:
matrix:
- DOCLANG: ["en", "zh_CN"]
DOCTGT: ["esp32", "esp32s3", "esp32p4"]
DOCTGT: ["esp32", "esp32s3"]
.deploy_docs_template:

View File

@ -202,7 +202,7 @@ class IdfPytestEmbedded:
for item in items:
# default timeout 5 mins
if 'timeout' not in item.keywords:
item.add_marker(pytest.mark.timeout(8 * 60))
item.add_marker(pytest.mark.timeout(500 * 60))
# filter all the test cases with "--target"
if self.target:

View File

@ -1,7 +1,7 @@
from esp_docs.conf_docs import * # noqa: F403,F401
languages = ['en', 'zh_CN']
idf_targets = ['esp32', 'esp32s3', 'esp32p4']
idf_targets = ['esp32', 'esp32s3']
extensions += ['sphinx_copybutton',
'sphinxcontrib.wavedrom',

View File

@ -78,7 +78,7 @@ void *dl_lib_calloc_psram(int cnt, int size, int align);
/**
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
*
* @param prt Pointer to free
* @param ptr Pointer to free
*/
void dl_lib_free(void *ptr);
@ -415,4 +415,4 @@ dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, con
}
#endif
#endif
#endif

View File

@ -292,6 +292,7 @@ qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, in
void print_convq8(dl_convq8_queue_t *cq, int offset);
void print_convq(dl_convq_queue_t *cq, int offset);
void dl_relu_convq8(dl_convq8_queue_t *cq);
void lstmq8_free(void);

View File

@ -279,9 +279,9 @@ dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
dl_matrix2dq_t *dl_convq16_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
const dl_matrix2dq_t *bias, int prenum);
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
dl_matrix2dq_t *bias, int prenum);
/**
* @brief Allocate a fixed-point multi channel convolution queue

View File

@ -25,10 +25,6 @@
extern "C" {
#endif
// #ifdef CONFIG_IDF_TARGET_ESP32S3
// #include "dl_tie728_bzero.h"
// #endif
typedef float fptp_t;
#if CONFIG_BT_SHARE_MEM_REUSE

View File

@ -23,7 +23,8 @@ extern "C" {
#define USE_AEC_FFT // Not kiss_fft
#define AEC_USE_SPIRAM 0
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
#define AEC_FRAME_LENGTH_MS 16
//#define AEC_FRAME_LENGTH_MS 16
#define AEC_FRAME_LENGTH_MS 32
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
typedef void* aec_handle_t;

View File

@ -90,6 +90,12 @@ typedef struct {
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
afe_ns_mode_t afe_ns_mode;
char *afe_ns_model_name;
bool fixed_first_channel; // If true, the channel after first wake-up is fixed to raw data of microphone
// otherwise, select channel number by wakenet
char *vad_model_name; // The model name of vad, support vadnet1 and vadnet1_small
int vad_min_speech_ms; // The minimum duration of speech in ms. It should be bigger than 32 ms
int vad_min_noise_ms; // The minimum duration of noise/silence in ms. It should be bigger than 64 ms
bool vad_mute_playback; // If true, the playback will be muted for vad detection
} afe_config_t;
@ -123,6 +129,47 @@ typedef struct {
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
.vad_model_name = NULL, \
.vad_min_speech_ms = 64, \
.vad_min_noise_ms = 256, \
.vad_mute_playback = false, \
}
#elif CONFIG_IDF_TARGET_ESP32P4
#define AFE_CONFIG_DEFAULT() { \
.aec_init = true, \
.se_init = true, \
.vad_init = true, \
.wakenet_init = true, \
.voice_communication_init = false, \
.voice_communication_agc_init = false, \
.voice_communication_agc_gain = 15, \
.vad_mode = VAD_MODE_3, \
.wakenet_model_name = NULL, \
.wakenet_model_name_2 = NULL, \
.wakenet_mode = DET_MODE_90, \
.afe_mode = SR_MODE_LOW_COST, \
.afe_perferred_core = 0, \
.afe_perferred_priority = 5, \
.afe_ringbuf_size = 50, \
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
.afe_linear_gain = 1.0, \
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
.pcm_config = { \
.total_ch_num = 2, \
.mic_num = 1, \
.ref_num = 1, \
.sample_rate = 16000, \
}, \
.debug_init = false, \
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
.vad_model_name = NULL, \
.vad_min_speech_ms = 64, \
.vad_min_noise_ms = 256, \
.vad_mute_playback = false, \
}
#elif CONFIG_IDF_TARGET_ESP32S3
#define AFE_CONFIG_DEFAULT() { \
@ -154,6 +201,11 @@ typedef struct {
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
.fixed_first_channel = true, \
.vad_model_name = NULL, \
.vad_min_speech_ms = 64, \
.vad_min_noise_ms = 256, \
.vad_mute_playback = false, \
}
#endif

View File

@ -29,6 +29,8 @@ typedef struct afe_fetch_result_t
{
int16_t *data; // the data of audio.
int data_size; // the size of data. The unit is byte.
int16_t *vad_cache; // the cache data of vad. It's only valid when vad_cache_size > 0. It is used to complete the audio that was truncated.
int vad_cache_size; // the size of vad_cache. The unit is byte.
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
wakenet_state_t wakeup_state; // the value is wakenet_state_t
@ -36,7 +38,7 @@ typedef struct afe_fetch_result_t
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
afe_vad_state_t vad_state; // the value is afe_vad_state_t
int trigger_channel_id; // the channel index of output
int wake_word_length; // the length of wake word. It's unit is the number of samples.
int wake_word_length; // the length of wake word. The unit is the number of samples.
int ret_value; // the return state of fetch function
void* reserved; // reserved for future use
} afe_fetch_result_t;
@ -112,7 +114,7 @@ typedef afe_fetch_result_t* (*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *af
* @brief reset ringbuf of AFE.
*
* @param afe The AFE_SR object to query
* @return -1: fail, 0: success
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
@ -122,7 +124,7 @@ typedef int (*esp_afe_sr_iface_op_reset_buffer_t)(esp_afe_sr_data_t *afe);
*
* @param afe The AFE_SR object to query
* @param wakenet_word The wakenet word, should be DEFAULT_WAKE_WORD or EXTRA_WAKE_WORD
* @return 0: fail, 1: success
* @return -1: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* model_name);
@ -130,7 +132,7 @@ typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* m
* @brief Disable wakenet model.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
@ -138,7 +140,7 @@ typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
* @brief Enable wakenet model.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
@ -146,7 +148,7 @@ typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
* @brief Disable AEC algorithm.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
@ -154,7 +156,7 @@ typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
* @brief Enable AEC algorithm.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
@ -162,7 +164,7 @@ typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
* @brief Disable SE algorithm.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
@ -170,7 +172,7 @@ typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
* @brief Enable SE algorithm.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
* @return -1: fail, 0: disabled, 1: enabled
*/
typedef int (*esp_afe_sr_iface_op_enable_se_t)(esp_afe_sr_data_t *afe);

View File

@ -4,7 +4,6 @@
extern "C" {
#endif
#if defined CONFIG_USE_AFE
#include "esp_afe_sr_iface.h"
@ -19,17 +18,6 @@ extern const esp_afe_sr_iface_t esp_afe_vc_v1;
#endif
#else
#include "esp_afe_sr_iface.h"
extern const esp_afe_sr_iface_t esp_afe_sr_v1;
extern const esp_afe_sr_iface_t esp_afe_vc_v1;
#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
#endif
#ifdef __cplusplus
}
#endif

View File

@ -2,8 +2,16 @@
#include "esp_nsn_iface.h"
// The prefix of nsnet model name is used to filter all wakenet from availabel models.
/*
The prefix of nset
Now there are nsnet1 and nsnet2
*/
#define ESP_NSNET_PREFIX "nsnet"
extern const esp_nsn_iface_t esp_nsnet1_quantized;
#define ESP_NSN_HANDLE esp_nsnet1_quantized
/**
* @brief Get the nsnet handle from model name
*
* @param model_name The name of model
* @returns The handle of multinet
*/
esp_nsn_iface_t *esp_nsnet_handle_from_name(char *model_name);

View File

@ -25,22 +25,65 @@ extern "C" {
/**
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
* restrictive in reporting speech.
* restrictive in reporting speech. So If you want trigger more speech, please select lower mode.
*/
typedef enum {
VAD_MODE_0 = 0,
VAD_MODE_1,
VAD_MODE_2,
VAD_MODE_3,
VAD_MODE_4
VAD_MODE_0 = 0, // Normal
VAD_MODE_1, // Aggressive
VAD_MODE_2, // Very Aggressive
VAD_MODE_3, // Very Very Aggressive
VAD_MODE_4 // Very Very Very Aggressive
} vad_mode_t;
typedef enum {
VAD_SILENCE = 0,
VAD_SPEECH
VAD_SPEECH = 1,
} vad_state_t;
typedef void* vad_handle_t;
typedef struct vad_trigger_tag {
vad_state_t state;
unsigned int min_speech_len;
unsigned int noise_len;
unsigned int min_noise_len;
unsigned int speech_len;
} vad_trigger_t;
#define vad_MAX_LEN INT32_MAX - 1
/**
* @brief Allocate wakenet trigger
*
* @param min_speech_len Minimum frame number of speech duration
* @param min_noise_len Minimum frame number of noise duration
*
* @return Trigger pointer
**/
vad_trigger_t *vad_trigger_alloc(int min_speech_len, int min_noise_len);
/**
* @brief Free wakenet trigger
**/
void vad_trigger_free(vad_trigger_t *trigger);
/**
* @brief Reset wakenet trigger
**/
void vad_trigger_reset(vad_trigger_t *trigger);
/**
* @brief detect activaty voice by trigger
**/
vad_state_t vad_trigger_detect(vad_trigger_t *trigger, vad_state_t state);
typedef struct {
vad_trigger_t *trigger;
void *vad_inst;
}vad_handle_with_trigger_t;
typedef vad_handle_with_trigger_t* vad_handle_t;
// typedef vad_handle_tag * vad_handle_t;
/**
* @brief Creates an instance to the VAD structure.
@ -53,6 +96,18 @@ typedef void* vad_handle_t;
*/
vad_handle_t vad_create(vad_mode_t vad_mode);
/**
* @brief Creates an instance to the VAD structure.
*
* @param vad_mode Sets the VAD operating mode.
* @param min_speech_len Minimum frame number of speech duration
* @param min_noise_len Minimum frame number of noise duration
* @return
* - NULL: Create failed
* - Others: The instance of VAD
*/
vad_handle_t vad_create_with_param(vad_mode_t vad_mode, int min_speech_len, int min_noise_len);
/**
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
*

View File

@ -9,7 +9,7 @@ typedef struct {
void flite_g2p_result_free(flite_g2p_result *result);
flite_g2p_result *flite_g2p_get_result(char *grapheme);
flite_g2p_result *flite_g2p_get_result(const char *grapheme);
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -16,7 +16,6 @@ from pytest_embedded import Dut
)
def test_multinet_s3(dut: Dut)-> None:
dut.run_all_single_board_cases(group="mn")
dut.expect_unity_test_output(timeout = 1000)
@pytest.mark.target('esp32p4')
@pytest.mark.env('esp32p4')
@ -29,7 +28,6 @@ def test_multinet_s3(dut: Dut)-> None:
)
def test_multinet_p4(dut: Dut)-> None:
dut.run_all_single_board_cases(group="mn")
dut.expect_unity_test_output(timeout = 1000)
@pytest.mark.target('esp32s3')
@ -43,7 +41,6 @@ def test_multinet_p4(dut: Dut)-> None:
)
def test_wakenet(dut: Dut)-> None:
dut.run_all_single_board_cases(group="wn")
dut.expect_unity_test_output(timeout = 1000)
@pytest.mark.target('esp32p4')
@pytest.mark.env('esp32p4')
@ -56,7 +53,6 @@ def test_wakenet(dut: Dut)-> None:
)
def test_wakenet_p4(dut: Dut)-> None:
dut.run_all_single_board_cases(group="wn")
dut.expect_unity_test_output(timeout = 1000)
@pytest.mark.target('esp32s3')
@pytest.mark.env('esp32s3')
@ -64,12 +60,11 @@ def test_wakenet_p4(dut: Dut)-> None:
'config',
[
'wn9_hilexin',
'vadnet'
'vadnet',
],
)
def test_sr_afe(dut: Dut)-> None:
dut.run_all_single_board_cases(group="afe_sr")
dut.expect_unity_test_output(timeout = 1000)
dut.run_all_single_board_cases(group="afe_sr", timeout=100000)
@pytest.mark.target('esp32p4')
@pytest.mark.env('esp32p4')
@ -80,8 +75,7 @@ def test_sr_afe(dut: Dut)-> None:
],
)
def test_sr_afe_p4(dut: Dut)-> None:
dut.run_all_single_board_cases(group="afe_sr")
dut.expect_unity_test_output(timeout = 1000)
dut.run_all_single_board_cases(group="afe_sr", timeout=100000)
@pytest.mark.target('esp32s3')
@ -93,8 +87,7 @@ def test_sr_afe_p4(dut: Dut)-> None:
],
)
def test_vc_afe(dut: Dut)-> None:
dut.run_all_single_board_cases(group="afe_vc")
dut.expect_unity_test_output(timeout = 100000)
dut.run_all_single_board_cases(group="afe_vc", timeout=100000)
@pytest.mark.target('esp32p4')
@ -106,5 +99,4 @@ def test_vc_afe(dut: Dut)-> None:
],
)
def test_vc_afe_p4(dut: Dut)-> None:
dut.run_all_single_board_cases(group="afe_vc")
dut.expect_unity_test_output(timeout = 100000)
dut.run_all_single_board_cases(group="afe_vc", timeout=100000)

View File

@ -1,13 +1,10 @@
# This file was generated using idf.py save-defconfig. It can be edited manually.
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
# Espressif IoT Development Framework (ESP-IDF) 5.3.1 Project Minimal Configuration
#
CONFIG_IDF_TARGET="esp32p4"
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
CONFIG_PARTITION_TABLE_CUSTOM=y
CONFIG_USE_AFE=n
CONFIG_USE_WAKENET=n
CONFIG_USE_MULTINET=n
CONFIG_COMPILER_OPTIMIZATION_PERF=y
CONFIG_ESP32P4_REV_MIN_0=y
CONFIG_SPIRAM=y

View File

@ -1,5 +1,5 @@
# This file was generated using idf.py save-defconfig. It can be edited manually.
# Espressif IoT Development Framework (ESP-IDF) 5.3.0 Project Minimal Configuration
# Espressif IoT Development Framework (ESP-IDF) 5.3.1 Project Minimal Configuration
#
CONFIG_IDF_TARGET="esp32s3"
CONFIG_APP_RETRIEVE_LEN_ELF_SHA=16
@ -13,6 +13,9 @@ CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
CONFIG_ESP32S3_DATA_CACHE_64KB=y
CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
CONFIG_ESP_SYSTEM_ALLOW_RTC_FAST_MEM_AS_HEAP=n
CONFIG_ESP_INT_WDT=n
CONFIG_ESP_TASK_WDT_EN=n
CONFIG_ESP_WIFI_GMAC_SUPPORT=n
CONFIG_LWIP_TCP_SND_BUF_DEFAULT=5744
CONFIG_LWIP_TCP_WND_DEFAULT=5744