Merge branch 'bugfix/fft' into 'master'

Bugfix/fft

See merge request speech-recognition-framework/esp-sr!157
This commit is contained in:
Sun Xiang Yu 2025-04-17 16:32:40 +08:00
commit 763dd04a87
35 changed files with 150 additions and 2 deletions

View File

@ -1,5 +1,8 @@
# Change log for esp-sr
## 2.0.5
- Fix fftr bug
## 2.0.4
- Add DOA(Direction of Arrival) algorithm

View File

@ -23,6 +23,7 @@ The new algorithms will no longer support ESP32 chips.
News
----
[17/4/2025]: We add a new DOA(Direction of Arrival) algorithm.
[14/2/2025]: We release **ESP-SR V2.0**. [Migration from ESP-SR V1.* to ESP-SR V2.*](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/audio_front_end/migration_guide.html)
[13/2/2025]: We release **VADNet**, a voice activaty detection model. You can use it to replace the WebRTC VAD and improve the performance.

View File

@ -1,9 +1,9 @@
version: "2.0.4"
version: "2.0.5"
description: esp_sr provides basic algorithms for Speech Recognition applications
url: https://github.com/espressif/esp-sr
dependencies:
idf: ">=5.0"
espressif/esp-dsp: ">1.5.1"
espressif/esp-dsp: "1.6.0"
files:
exclude:
- ".github"

View File

@ -0,0 +1,48 @@
#ifndef _ESP_AFE_DOA_H_
#define _ESP_AFE_DOA_H_
#include "esp_doa.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
doa_handle_t *doa_handle;
afe_pcm_config_t pcm_config;
int16_t *leftdata;
int16_t *rightdata;
int frame_size;
} afe_doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param input_format The input format
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Process audio frame for direction estimation
* @param handle doa_handle_t instance pointer
* @param indata Input audio data, format is define by input_format.
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void afe_doa_destroy(afe_doa_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_AFE_DOA_H_ */

View File

@ -0,0 +1,48 @@
#ifndef _ESP_AFE_DOA_H_
#define _ESP_AFE_DOA_H_
#include "esp_doa.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
doa_handle_t *doa_handle;
afe_pcm_config_t pcm_config;
int16_t *leftdata;
int16_t *rightdata;
int frame_size;
} afe_doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param input_format The input format
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Process audio frame for direction estimation
* @param handle doa_handle_t instance pointer
* @param indata Input audio data, format is define by input_format.
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void afe_doa_destroy(afe_doa_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_AFE_DOA_H_ */

View File

@ -0,0 +1,48 @@
#ifndef _ESP_AFE_DOA_H_
#define _ESP_AFE_DOA_H_
#include "esp_doa.h"
#include "esp_afe_config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
doa_handle_t *doa_handle;
afe_pcm_config_t pcm_config;
int16_t *leftdata;
int16_t *rightdata;
int frame_size;
} afe_doa_handle_t;
/**
* @brief Initialize SRP-PHAT processor
* @param input_format The input format
* @param fs Sampling rate (Hz), e.g., 16000
* @param resolution Angular search resolution (degrees), e.g., 20
* @param d_mics Microphone spacing (meters), e.g., 0.06
* @param input_timedate_samples input timedate samples, e.g., 1024
* @return Initialized doa_handle_t object pointer, Recommend using the above configuration for better performance
*/
afe_doa_handle_t *afe_doa_create(const char *input_format, int fs, float resolution, float d_mics, int input_timedate_samples);
/**
* @brief Process audio frame for direction estimation
* @param handle doa_handle_t instance pointer
* @param indata Input audio data, format is define by input_format.
* @return Estimated sound direction in degrees, e.g., 0-180
*/
float afe_doa_process(afe_doa_handle_t *handle, const int16_t *indata);
/**
* @brief Release all allocated resources
* @param doa doa_handle_t instance pointer to be freed
*/
void afe_doa_destroy(afe_doa_handle_t *handle);
#ifdef __cplusplus
}
#endif
#endif /* _ESP_AFE_DOA_H_ */

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.