Merge branch 'bugfix/fft' into 'master'

Bugfix/fft See merge request speech-recognition-framework/esp-sr!157
2025-09-15 15:28:44 +08:00 · 2025-04-17 16:32:40 +08:00 · 2025-04-17 16:32:40 +08:00 · 763dd04a87
commit 763dd04a87
parent f900648c87 33f47da975
35 changed files with 150 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,8 @@
 # Change log for esp-sr

+## 2.0.5
+- Fix fftr bug
+
 ## 2.0.4
 - Add DOA(Direction of Arrival) algorithm

--- a/README.md
+++ b/README.md
@ -23,6 +23,7 @@ The new algorithms will no longer support ESP32 chips.

 News
 ----
+[17/4/2025]: We add a new DOA(Direction of Arrival) algorithm.
 [14/2/2025]: We release **ESP-SR V2.0**. [Migration from ESP-SR V1.* to ESP-SR V2.*](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/audio_front_end/migration_guide.html)   
 [13/2/2025]: We release **VADNet**, a voice activaty detection model. You can use it to replace the WebRTC VAD and improve the performance.

--- a/idf_component.yml
+++ b/idf_component.yml
@ -1,9 +1,9 @@
-version: "2.0.4"
+version: "2.0.5"
 description: esp_sr provides basic algorithms for Speech Recognition applications
 url: https://github.com/espressif/esp-sr
 dependencies:
  idf: ">=5.0"
-  espressif/esp-dsp: ">1.5.1"
+  espressif/esp-dsp: "1.6.0"
 files:
  exclude:
    - ".github"
--- a/include/esp32/esp_afe_doa.h
+++ b/include/esp32/esp_afe_doa.h
@ -0,0 +1,48 @@
+#ifndef _ESP_AFE_DOA_H_
+#define _ESP_AFE_DOA_H_
+
+#include "esp_doa.h"
+#include "esp_afe_config.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    doa_handle_t *doa_handle;
+    afe_pcm_config_t pcm_config;
+    int16_t *leftdata;
+    int16_t *rightdata;
+    int frame_size;
+} afe_doa_handle_t;
+
+/**
+ * @brief Initialize SRP-PHAT processor
+ * @param input_format     The input format
+ * @param fs Sampling rate (Hz), e.g., 16000
+ * @param resolution Angular search resolution (degrees), e.g., 20
+ * @param d_mics Microphone spacing (meters), e.g., 0.06
+ * @param input_timedate_samples input timedate samples, e.g., 1024
+ * @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
+ */
+afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
+/**
+ * @brief Process audio frame for direction estimation
+ * @param handle doa_handle_t instance pointer
+ * @param indata Input audio data, format is define by input_format.
+ * @return Estimated sound direction in degrees, e.g., 0-180
+ */
+float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
+/**
+ * @brief Release all allocated resources
+ * @param doa doa_handle_t instance pointer to be freed
+ */
+void afe_doa_destroy(afe_doa_handle_t *handle);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ESP_AFE_DOA_H_ */
--- a/include/esp32p4/esp_afe_doa.h
+++ b/include/esp32p4/esp_afe_doa.h
@ -0,0 +1,48 @@
+#ifndef _ESP_AFE_DOA_H_
+#define _ESP_AFE_DOA_H_
+
+#include "esp_doa.h"
+#include "esp_afe_config.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    doa_handle_t *doa_handle;
+    afe_pcm_config_t pcm_config;
+    int16_t *leftdata;
+    int16_t *rightdata;
+    int frame_size;
+} afe_doa_handle_t;
+
+/**
+ * @brief Initialize SRP-PHAT processor
+ * @param input_format     The input format
+ * @param fs Sampling rate (Hz), e.g., 16000
+ * @param resolution Angular search resolution (degrees), e.g., 20
+ * @param d_mics Microphone spacing (meters), e.g., 0.06
+ * @param input_timedate_samples input timedate samples, e.g., 1024
+ * @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
+ */
+afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
+/**
+ * @brief Process audio frame for direction estimation
+ * @param handle doa_handle_t instance pointer
+ * @param indata Input audio data, format is define by input_format.
+ * @return Estimated sound direction in degrees, e.g., 0-180
+ */
+float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
+/**
+ * @brief Release all allocated resources
+ * @param doa doa_handle_t instance pointer to be freed
+ */
+void afe_doa_destroy(afe_doa_handle_t *handle);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ESP_AFE_DOA_H_ */
--- a/include/esp32s3/esp_afe_doa.h
+++ b/include/esp32s3/esp_afe_doa.h
@ -0,0 +1,48 @@
+#ifndef _ESP_AFE_DOA_H_
+#define _ESP_AFE_DOA_H_
+
+#include "esp_doa.h"
+#include "esp_afe_config.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    doa_handle_t *doa_handle;
+    afe_pcm_config_t pcm_config;
+    int16_t *leftdata;
+    int16_t *rightdata;
+    int frame_size;
+} afe_doa_handle_t;
+
+/**
+ * @brief Initialize SRP-PHAT processor
+ * @param input_format     The input format
+ * @param fs Sampling rate (Hz), e.g., 16000
+ * @param resolution Angular search resolution (degrees), e.g., 20
+ * @param d_mics Microphone spacing (meters), e.g., 0.06
+ * @param input_timedate_samples input timedate samples, e.g., 1024
+ * @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
+ */
+afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
+/**
+ * @brief Process audio frame for direction estimation
+ * @param handle doa_handle_t instance pointer
+ * @param indata Input audio data, format is define by input_format.
+ * @return Estimated sound direction in degrees, e.g., 0-180
+ */
+float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
+/**
+ * @brief Release all allocated resources
+ * @param doa doa_handle_t instance pointer to be freed
+ */
+void afe_doa_destroy(afe_doa_handle_t *handle);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ESP_AFE_DOA_H_ */
--- a/lib/esp32/libc_speech_features.a
+++ b/lib/esp32/libc_speech_features.a
--- a/lib/esp32/libdl_lib.a
+++ b/lib/esp32/libdl_lib.a
--- a/lib/esp32/libesp_audio_front_end.a
+++ b/lib/esp32/libesp_audio_front_end.a
--- a/lib/esp32/libesp_audio_processor.a
+++ b/lib/esp32/libesp_audio_processor.a
--- a/lib/esp32/libflite_g2p.a
+++ b/lib/esp32/libflite_g2p.a
--- a/lib/esp32/libfst.a
+++ b/lib/esp32/libfst.a
--- a/lib/esp32/libhufzip.a
+++ b/lib/esp32/libhufzip.a
--- a/lib/esp32/libmultinet.a
+++ b/lib/esp32/libmultinet.a
--- a/lib/esp32/libnsnet.a
+++ b/lib/esp32/libnsnet.a
--- a/lib/esp32/libvadnet.a
+++ b/lib/esp32/libvadnet.a
--- a/lib/esp32/libwakenet.a
+++ b/lib/esp32/libwakenet.a
--- a/lib/esp32p4/libc_speech_features.a
+++ b/lib/esp32p4/libc_speech_features.a
--- a/lib/esp32p4/libdl_lib.a
+++ b/lib/esp32p4/libdl_lib.a
--- a/lib/esp32p4/libesp_audio_front_end.a
+++ b/lib/esp32p4/libesp_audio_front_end.a
--- a/lib/esp32p4/libesp_audio_processor.a
+++ b/lib/esp32p4/libesp_audio_processor.a
--- a/lib/esp32p4/libflite_g2p.a
+++ b/lib/esp32p4/libflite_g2p.a
--- a/lib/esp32p4/libfst.a
+++ b/lib/esp32p4/libfst.a
--- a/lib/esp32p4/libhufzip.a
+++ b/lib/esp32p4/libhufzip.a
--- a/lib/esp32p4/libmultinet.a
+++ b/lib/esp32p4/libmultinet.a
--- a/lib/esp32p4/libnsnet.a
+++ b/lib/esp32p4/libnsnet.a
--- a/lib/esp32p4/libvadnet.a
+++ b/lib/esp32p4/libvadnet.a
--- a/lib/esp32p4/libwakenet.a
+++ b/lib/esp32p4/libwakenet.a
--- a/lib/esp32s3/libc_speech_features.a
+++ b/lib/esp32s3/libc_speech_features.a
--- a/lib/esp32s3/libesp_audio_front_end.a
+++ b/lib/esp32s3/libesp_audio_front_end.a
--- a/lib/esp32s3/libesp_audio_processor.a
+++ b/lib/esp32s3/libesp_audio_processor.a
--- a/lib/esp32s3/libmultinet.a
+++ b/lib/esp32s3/libmultinet.a
--- a/lib/esp32s3/libnsnet.a
+++ b/lib/esp32s3/libnsnet.a
--- a/lib/esp32s3/libvadnet.a
+++ b/lib/esp32s3/libvadnet.a
--- a/lib/esp32s3/libwakenet.a
+++ b/lib/esp32s3/libwakenet.a