feat: Add first noise suppression model (nsnet1) for ESP32-S3

This commit is contained in:
xysun 2023-11-17 15:31:52 +08:00
parent bdd0373263
commit 7627897716
17 changed files with 1977 additions and 3 deletions

View File

@ -4,6 +4,10 @@
- Available storage is less than the remaining flash space on IDF v5.0.
If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later.
## unreleased
- Add Chinese MultiNet7 models
- Add first Noise Suppression model: nsnet1
## 1.5.1
- Reduce Internal RAM of multinet7
- Update benchmark

View File

@ -93,6 +93,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32s3")
esp_tts_chinese
voice_set_xiaole
wakenet
nsnet
"-Wl,--end-group")
set(MVMODEL_EXE ${COMPONENT_PATH}/model/movemodel.py)

View File

@ -29,6 +29,25 @@ choice AFE_INTERFACE_SEL
endchoice
config USE_NSNET
bool "use nsnet"
default "n"
choice SR_NSN_MODEL_LOAD
prompt "Select deep noise suppression"
default SR_NSN_NSNET1
depends on USE_NSNET
help
Select the deep noise suppression to be loaded.
config SR_NSN_NONE
bool "None"
config SR_NSN_NSNET1
bool "Deep noise suppression v1 (nsnet1)"
depends on IDF_TARGET_ESP32S3
endchoice
config USE_WAKENET
bool "use wakenet"

View File

@ -39,6 +39,12 @@ typedef struct {
int sample_rate; // sample rate of audio
} afe_pcm_config_t;
typedef enum {
NS_MODE_SSP = 0, // speech signal process method
NS_MODE_NET = 1, // deep noise suppression net method
} afe_ns_mode_t;
/**
* @brief Function to get the debug audio data
*
@ -82,6 +88,8 @@ typedef struct {
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
bool debug_init;
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
afe_ns_mode_t afe_ns_mode;
char *afe_ns_model_name;
} afe_config_t;
@ -111,6 +119,8 @@ typedef struct {
.pcm_config.sample_rate = 16000, \
.debug_init = false, \
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
}
#elif CONFIG_IDF_TARGET_ESP32S3
#define AFE_CONFIG_DEFAULT() { \
@ -138,6 +148,8 @@ typedef struct {
.pcm_config.sample_rate = 16000, \
.debug_init = false, \
.debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
.afe_ns_mode = NS_MODE_SSP, \
.afe_ns_model_name = NULL, \
}
#endif

View File

@ -0,0 +1,64 @@
#pragma once
#include "stdint.h"
//Opaque model data container
typedef struct esp_nsn_data_t esp_nsn_data_t;
/**
* @brief Easy function type to initialze a model instance
*
* @param model_name The name of the model instance
* @returns Handle to the model data
*/
typedef esp_nsn_data_t* (*esp_nsn_iface_op_create_t)(char *model_name);
/**
* @brief Get the amount of samples that need to be passed to the process function
*
* Every noise suppression model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the process function
*/
typedef int (*esp_nsn_iface_op_get_samp_chunksize_t)(esp_nsn_data_t *model);
/**
* @brief Feed samples of an audio stream to the noise suppression model and get data after process.
*
*
* @param model The model object to query
* @param in_data An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @param out_data An array of 16-bit signed audio samples after process.
* @return The state of return.
*/
typedef int (*esp_nsn_iface_op_process_t)(esp_nsn_data_t *model, int16_t *in_data, int16_t *out_data);
/**
* @brief Get the sample rate of the samples to feed to the process function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_nsn_iface_op_get_samp_rate_t)(esp_nsn_data_t *model);
/**
* @brief Destroy a noise suppression model
*
* @param model Model object to destroy
*/
typedef void (*esp_nsn_iface_op_destroy_t)(esp_nsn_data_t *model);
/**
* This structure contains the functions used to do operations on a wake word detection model.
*/
typedef struct {
esp_nsn_iface_op_create_t create;
esp_nsn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_nsn_iface_op_process_t process;
esp_nsn_iface_op_get_samp_rate_t get_samp_rate;
esp_nsn_iface_op_destroy_t destroy;
} esp_nsn_iface_t;

View File

@ -0,0 +1,9 @@
#pragma once
#include "esp_nsn_iface.h"
// The prefix of nsnet model name is used to filter all wakenet from availabel models.
#define ESP_NSNET_PREFIX "nsnet"
extern const esp_nsn_iface_t esp_nsnet1_quantized;
#define ESP_NSN_HANDLE esp_nsnet1_quantized

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
lib/esp32s3/libnsnet.a Normal file

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,2 @@
# (neural network type)_(model data version)_ns_0_0.0_0.0
nsnet1_v1_ns_0_0.0_0.0

Binary file not shown.

Binary file not shown.

View File

@ -229,7 +229,8 @@ void srmodel_config_deinit(srmodel_list_t *models)
}
free(models);
}
models = NULL;
// models is static_srmodels
static_srmodels = NULL;
}
model_coeff_getter_t *srmodel_get_model_coeff(char *model_name)

View File

@ -44,8 +44,23 @@ def test_wakenet(dut: Dut)-> None:
'wn9_hilexin',
],
)
def test_afe(dut: Dut)-> None:
def test_sr_afe(dut: Dut)-> None:
# dut.run_all_single_board_cases(group="afe")
dut.expect_exact('Press ENTER to see the list of tests.')
dut.write('[afe]')
dut.write('[afe_sr]')
dut.expect_unity_test_output(timeout = 1000)
@pytest.mark.target('esp32s3')
@pytest.mark.env('esp32s3')
@pytest.mark.parametrize(
'config',
[
'nsnet1',
],
)
def test_sr_afe(dut: Dut)-> None:
# dut.run_all_single_board_cases(group="afe")
dut.expect_exact('Press ENTER to see the list of tests.')
dut.write('[afe_vc]')
dut.expect_unity_test_output(timeout = 1000)

File diff suppressed because it is too large Load Diff