diff --git a/include/esp32s3/esp_afe_config.h b/include/esp32s3/esp_afe_config.h index 9af4eb2..1291b5c 100644 --- a/include/esp32s3/esp_afe_config.h +++ b/include/esp32s3/esp_afe_config.h @@ -22,8 +22,8 @@ typedef enum { } afe_memory_alloc_mode_t; typedef enum { - AFE_MN_PEAK_AGC_MODE_1 = -5, // The peak amplitude of audio fed to multinet is -5dB - AFE_MN_PEAK_AGC_MODE_2 = -4, // The peak amplitude of audio fed to multinet is -4dB + AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of audio fed to multinet is -9dB + AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of audio fed to multinet is -6dB AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain } afe_mn_peak_agc_mode_t; @@ -72,7 +72,9 @@ typedef struct { int afe_perferred_priority; int afe_ringbuf_size; afe_memory_alloc_mode_t memory_alloc_mode; - afe_mn_peak_agc_mode_t agc_mode; // The agc mode for ASR + float afe_linear_gain; // The linear gain for sr output(note: invaild for vc), the value should be in [0.1, 10.0]. + // This value acts directly on the output amplitude: out_linear_gain * amplitude. + afe_mn_peak_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain. afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function. bool debug_init; afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX]; @@ -97,6 +99,7 @@ typedef struct { .afe_perferred_priority = 5, \ .afe_ringbuf_size = 50, \ .memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \ + .afe_linear_gain = 1.0, \ .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \ .pcm_config.total_ch_num = 2, \ .pcm_config.mic_num = 1, \ @@ -123,6 +126,7 @@ typedef struct { .afe_perferred_priority = 5, \ .afe_ringbuf_size = 50, \ .memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \ + .afe_linear_gain = 1.0, \ .agc_mode = AFE_MN_PEAK_AGC_MODE_2, \ .pcm_config.total_ch_num = 3, \ .pcm_config.mic_num = 2, \ diff --git a/include/esp32s3/esp_afe_sr_iface.h b/include/esp32s3/esp_afe_sr_iface.h index 276e493..d45c118 100644 --- a/include/esp32s3/esp_afe_sr_iface.h +++ b/include/esp32s3/esp_afe_sr_iface.h @@ -25,6 +25,8 @@ typedef struct afe_fetch_result_t { int16_t *data; // the data of audio. int data_size; // the size of data. The unit is byte. + float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc). + // if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length. wakenet_state_t wakeup_state; // the value is wakenet_state_t int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1. int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1. diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a index 16d2503..200d7dd 100644 Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a index 7f441f7..8b3bef5 100644 Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a index 3b3e961..6be6a7c 100644 Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ