feat: Add first noise suppression model (nsnet1) for ESP32-S3

2025-09-15 15:28:44 +08:00 · 2023-11-17 15:31:52 +08:00 · 2023-11-17 15:31:52 +08:00 · 7627897716
commit 7627897716
parent bdd0373263
17 changed files with 1977 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,6 +4,10 @@
 - Available storage is less than the remaining flash space on IDF v5.0.   
 If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later.

+## unreleased
+- Add Chinese MultiNet7 models
+- Add first Noise Suppression model: nsnet1
+
 ## 1.5.1
 - Reduce Internal RAM of multinet7
 - Update benchmark
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -93,6 +93,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32s3")
        esp_tts_chinese
        voice_set_xiaole
        wakenet
+	nsnet
        "-Wl,--end-group")

    set(MVMODEL_EXE ${COMPONENT_PATH}/model/movemodel.py)
--- a/Kconfig.projbuild
+++ b/Kconfig.projbuild
@ -29,6 +29,25 @@ choice AFE_INTERFACE_SEL

 endchoice

+config USE_NSNET
+    bool "use nsnet"
+    default "n"
+
+choice SR_NSN_MODEL_LOAD
+    prompt "Select deep noise suppression"
+    default SR_NSN_NSNET1
+    depends on USE_NSNET
+    help
+        Select the deep noise suppression to be loaded.
+
+    config SR_NSN_NONE
+        bool "None"
+
+    config SR_NSN_NSNET1
+        bool "Deep noise suppression v1 (nsnet1)"
+        depends on IDF_TARGET_ESP32S3
+
+endchoice

 config USE_WAKENET
    bool "use wakenet"
--- a/include/esp32s3/esp_afe_config.h
+++ b/include/esp32s3/esp_afe_config.h
@ -39,6 +39,12 @@ typedef struct {
    int sample_rate;                        // sample rate of audio
 } afe_pcm_config_t;

+typedef enum {
+    NS_MODE_SSP = 0,                        // speech signal process method
+    NS_MODE_NET = 1,                        // deep noise suppression net method
+} afe_ns_mode_t;
+
+
 /**
 * @brief Function to get the debug audio data
 *
@ -82,6 +88,8 @@ typedef struct {
    afe_pcm_config_t pcm_config;            // Config the channel num of original data which is fed to the afe feed function.
    bool debug_init;
    afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
+    afe_ns_mode_t afe_ns_mode;
+    char *afe_ns_model_name;
 } afe_config_t;


@ -111,6 +119,8 @@ typedef struct {
    .pcm_config.sample_rate = 16000, \
    .debug_init = false, \
    .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
+    .afe_ns_mode = NS_MODE_SSP, \
+    .afe_ns_model_name = NULL, \
 }
 #elif CONFIG_IDF_TARGET_ESP32S3
 #define AFE_CONFIG_DEFAULT() { \
@ -138,6 +148,8 @@ typedef struct {
    .pcm_config.sample_rate = 16000, \
    .debug_init = false, \
    .debug_hook = {{AFE_DEBUG_HOOK_MASE_TASK_IN, NULL}, {AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL}}, \
+    .afe_ns_mode = NS_MODE_SSP, \
+    .afe_ns_model_name = NULL, \
 }
 #endif

--- a/include/esp32s3/esp_nsn_iface.h
+++ b/include/esp32s3/esp_nsn_iface.h
@ -0,0 +1,64 @@
+#pragma once
+#include "stdint.h"
+
+//Opaque model data container
+typedef struct esp_nsn_data_t esp_nsn_data_t;
+
+
+/**
+ * @brief Easy function type to initialze a model instance
+ *
+ * @param model_name The name of the model instance
+ * @returns Handle to the model data
+ */
+typedef esp_nsn_data_t* (*esp_nsn_iface_op_create_t)(char *model_name);
+
+/**
+ * @brief Get the amount of samples that need to be passed to the process function
+ *
+ * Every noise suppression model processes a certain number of samples at the same time. This function
+ * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
+ *
+ * @param model The model object to query
+ * @return The amount of samples to feed the process function
+ */
+typedef int (*esp_nsn_iface_op_get_samp_chunksize_t)(esp_nsn_data_t *model);
+
+/**
+ * @brief Feed samples of an audio stream to the noise suppression model and get data after process.
+ *
+ *
+ * @param model The model object to query
+ * @param in_data An array of 16-bit signed audio samples. The array size used can be queried by the 
+ *        get_samp_chunksize function.
+ * @param out_data An array of 16-bit signed audio samples after process.
+ * @return The state of return.
+ */
+typedef int (*esp_nsn_iface_op_process_t)(esp_nsn_data_t *model, int16_t *in_data, int16_t *out_data);
+
+/**
+ * @brief Get the sample rate of the samples to feed to the process function
+ *
+ * @param model The model object to query
+ * @return The sample rate, in hz
+ */
+typedef int (*esp_nsn_iface_op_get_samp_rate_t)(esp_nsn_data_t *model);
+
+/**
+ * @brief Destroy a noise suppression model
+ *
+ * @param model Model object to destroy
+ */
+typedef void (*esp_nsn_iface_op_destroy_t)(esp_nsn_data_t *model);
+
+
+/**
+ * This structure contains the functions used to do operations on a wake word detection model.
+ */
+typedef struct {
+    esp_nsn_iface_op_create_t create;
+    esp_nsn_iface_op_get_samp_chunksize_t get_samp_chunksize;
+    esp_nsn_iface_op_process_t process;
+    esp_nsn_iface_op_get_samp_rate_t get_samp_rate;
+    esp_nsn_iface_op_destroy_t destroy;
+} esp_nsn_iface_t;
--- a/include/esp32s3/esp_nsn_models.h
+++ b/include/esp32s3/esp_nsn_models.h
@ -0,0 +1,9 @@
+#pragma once
+
+#include "esp_nsn_iface.h"
+
+// The prefix of nsnet model name is used to filter all wakenet from availabel models.
+#define ESP_NSNET_PREFIX "nsnet"
+
+extern const esp_nsn_iface_t esp_nsnet1_quantized;
+#define ESP_NSN_HANDLE esp_nsnet1_quantized
--- a/lib/esp32s3/libdl_lib.a
+++ b/lib/esp32s3/libdl_lib.a
--- a/lib/esp32s3/libesp_audio_front_end.a
+++ b/lib/esp32s3/libesp_audio_front_end.a
--- a/lib/esp32s3/libmultinet.a
+++ b/lib/esp32s3/libmultinet.a
--- a/lib/esp32s3/libnsnet.a
+++ b/lib/esp32s3/libnsnet.a
--- a/lib/esp32s3/libwakenet.a
+++ b/lib/esp32s3/libwakenet.a
--- a/model/nsnet_model/nsnet1/_MODEL_INFO_
+++ b/model/nsnet_model/nsnet1/_MODEL_INFO_
@ -0,0 +1,2 @@
+# (neural network type)_(model data version)_ns_0_0.0_0.0
+nsnet1_v1_ns_0_0.0_0.0
--- a/model/nsnet_model/nsnet1/nsnet1_data
+++ b/model/nsnet_model/nsnet1/nsnet1_data
--- a/model/nsnet_model/nsnet1/nsnet1_index
+++ b/model/nsnet_model/nsnet1/nsnet1_index
--- a/src/model_path.c
+++ b/src/model_path.c
@ -229,7 +229,8 @@ void srmodel_config_deinit(srmodel_list_t *models)
        }
        free(models);
    }
-    models = NULL;
+    // models is static_srmodels
+    static_srmodels = NULL;
 }

 model_coeff_getter_t *srmodel_get_model_coeff(char *model_name)
--- a/test_apps/esp-sr/pytest_esp_sr.py
+++ b/test_apps/esp-sr/pytest_esp_sr.py
@ -44,8 +44,23 @@ def test_wakenet(dut: Dut)-> None:
        'wn9_hilexin',
    ],
 )
-def test_afe(dut: Dut)-> None:
+def test_sr_afe(dut: Dut)-> None:
    # dut.run_all_single_board_cases(group="afe")
    dut.expect_exact('Press ENTER to see the list of tests.')
-    dut.write('[afe]')
+    dut.write('[afe_sr]')
    dut.expect_unity_test_output(timeout = 1000)
+
+
+@pytest.mark.target('esp32s3')
+@pytest.mark.env('esp32s3')
+@pytest.mark.parametrize(
+    'config',
+    [
+        'nsnet1',
+    ],
+)
+def test_sr_afe(dut: Dut)-> None:
+    # dut.run_all_single_board_cases(group="afe")
+    dut.expect_exact('Press ENTER to see the list of tests.')
+    dut.write('[afe_vc]')
+    dut.expect_unity_test_output(timeout = 1000)
--- a/test_apps/esp-sr/sdkconfig.ci.nsnet1
+++ b/test_apps/esp-sr/sdkconfig.ci.nsnet1