feat(afe): Return the volume of wake word audio

2025-09-15 15:28:44 +08:00 · 2023-09-05 19:22:17 +08:00 · 2023-09-05 19:22:17 +08:00 · 4f3c441d69
commit 4f3c441d69
parent c3a57bb4b2
5 changed files with 9 additions and 3 deletions
--- a/include/esp32s3/esp_afe_config.h
+++ b/include/esp32s3/esp_afe_config.h
@ -22,8 +22,8 @@ typedef enum {
 } afe_memory_alloc_mode_t;

 typedef enum {
-    AFE_MN_PEAK_AGC_MODE_1 = -5,            // The peak amplitude of audio fed to multinet is -5dB
-    AFE_MN_PEAK_AGC_MODE_2 = -4,            // The peak amplitude of audio fed to multinet is -4dB
+    AFE_MN_PEAK_AGC_MODE_1 = -9,            // The peak amplitude of audio fed to multinet is -9dB
+    AFE_MN_PEAK_AGC_MODE_2 = -6,            // The peak amplitude of audio fed to multinet is -6dB
    AFE_MN_PEAK_AGC_MODE_3 = -3,            // The peak amplitude of audio fed to multinet is -3dB
    AFE_MN_PEAK_NO_AGC = 0,                 // There is no agc gain
 } afe_mn_peak_agc_mode_t;
@ -72,7 +72,9 @@ typedef struct {
    int afe_perferred_priority;
    int afe_ringbuf_size;
    afe_memory_alloc_mode_t memory_alloc_mode;
-    afe_mn_peak_agc_mode_t agc_mode;        // The agc mode for ASR
+    float afe_linear_gain;                  // The linear gain for sr output(note: invaild for vc), the value should be in [0.1, 10.0]. 
+                                            // This value acts directly on the output amplitude: out_linear_gain * amplitude.
+    afe_mn_peak_agc_mode_t agc_mode;        // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
    afe_pcm_config_t pcm_config;            // Config the channel num of original data which is fed to the afe feed function.
    bool debug_init;
    afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
@ -97,6 +99,7 @@ typedef struct {
    .afe_perferred_priority = 5, \
    .afe_ringbuf_size = 50, \
    .memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
+    .afe_linear_gain = 1.0, \
    .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
    .pcm_config.total_ch_num = 2, \
    .pcm_config.mic_num = 1, \
@ -123,6 +126,7 @@ typedef struct {
    .afe_perferred_priority = 5, \
    .afe_ringbuf_size = 50, \
    .memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
+    .afe_linear_gain = 1.0, \
    .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
    .pcm_config.total_ch_num = 3, \
    .pcm_config.mic_num = 2, \
--- a/include/esp32s3/esp_afe_sr_iface.h
+++ b/include/esp32s3/esp_afe_sr_iface.h
@ -25,6 +25,8 @@ typedef struct afe_fetch_result_t
 {
    int16_t *data;                          // the data of audio.
    int data_size;                          // the size of data. The unit is byte.
+    float data_volume;                      // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
+                                            // if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length. 
    wakenet_state_t wakeup_state;           // the value is wakenet_state_t
    int wake_word_index;                    // if the wake word is detected. It will store the wake word index which start from 1.
    int wakenet_model_index;                // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
--- a/lib/esp32s3/libesp_audio_front_end.a
+++ b/lib/esp32s3/libesp_audio_front_end.a
--- a/lib/esp32s3/libmultinet.a
+++ b/lib/esp32s3/libmultinet.a
--- a/lib/esp32s3/libwakenet.a
+++ b/lib/esp32s3/libwakenet.a