mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
feature/Add NS/AEC/AGC/VAD algorithm
This commit is contained in:
parent
c62aeb3826
commit
0f6442f84a
1
Makefile
1
Makefile
@ -5,6 +5,7 @@ MODULE_PATH := $(abspath $(shell pwd))
|
||||
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/lib
|
||||
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/wake_word_engine
|
||||
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/speech_command_recognition
|
||||
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/acoustic_algorithm
|
||||
|
||||
include $(IDF_PATH)/make/project.mk
|
||||
|
||||
|
||||
11
acoustic_algorithm/component.mk
Normal file
11
acoustic_algorithm/component.mk
Normal file
@ -0,0 +1,11 @@
|
||||
COMPONENT_ADD_INCLUDEDIRS := include
|
||||
|
||||
COMPONENT_SRCDIRS := .
|
||||
|
||||
LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib*.a)
|
||||
|
||||
LIBS := $(patsubst lib%.a,-l%,$(notdir $(LIB_FILES)))
|
||||
|
||||
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/ $(LIBS)
|
||||
|
||||
ALL_LIB_FILES += $(LIB_FILES)
|
||||
76
acoustic_algorithm/include/esp_aec.h
Normal file
76
acoustic_algorithm/include/esp_aec.h
Normal file
@ -0,0 +1,76 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AEC_H_
|
||||
#define _ESP_AEC_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define USE_AEC_FFT // Not kiss_fft
|
||||
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
|
||||
#define AEC_FRAME_LENGTH_MS 16 // Only support 16ms
|
||||
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
|
||||
|
||||
typedef void* aec_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the AEC structure.
|
||||
*
|
||||
* @param sample_rate The Sampling frequency (Hz) can be 8000, 16000.
|
||||
*
|
||||
* @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
|
||||
*
|
||||
* @param filter_length Number of samples of echo to cancel.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of AEC
|
||||
*/
|
||||
aec_handle_t aec_create(int sample_rate, int frame_length, int filter_length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @param indata An array of 16-bit signed audio samples from mic.
|
||||
*
|
||||
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
|
||||
*
|
||||
* @param outdata Returns near-end signal with echo removed.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_process(aec_handle_t inst, int16_t *indata, int16_t *refdata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Free the AEC instance
|
||||
*
|
||||
* @param inst The instance of AEC.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void aec_destroy(aec_handle_t inst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#endif //_ESP_AEC_H_
|
||||
31
acoustic_algorithm/include/esp_agc.h
Normal file
31
acoustic_algorithm/include/esp_agc.h
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_AGC_H_
|
||||
#define _ESP_AGC_H_
|
||||
|
||||
////all positive value is valid, negective is error
|
||||
typedef enum {
|
||||
ESP_AGC_SUCCESS = 0, ////success
|
||||
ESP_AGC_FAIL = -1, ////agc fail
|
||||
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
|
||||
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
|
||||
} ESP_AGE_ERR;
|
||||
|
||||
|
||||
void *esp_agc_open(int agc_mode, int sample_rate);
|
||||
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
|
||||
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
|
||||
void esp_agc_clse(void *agc_handle);
|
||||
|
||||
#endif // _ESP_AGC_H_
|
||||
70
acoustic_algorithm/include/esp_ns.h
Normal file
70
acoustic_algorithm/include/esp_ns.h
Normal file
@ -0,0 +1,70 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_NS_H_
|
||||
#define _ESP_NS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NS_FRAME_LENGTH_MS 30 //Supports 10ms, 20ms, 30ms
|
||||
|
||||
/**
|
||||
* The Sampling frequency (Hz) must be 16000Hz
|
||||
*/
|
||||
|
||||
typedef void* ns_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the NS structure.
|
||||
*
|
||||
* @param frame_length_ms The length of the audio processing can be 10ms, 20ms, 30ms.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of NS
|
||||
*/
|
||||
ns_handle_t ns_create(int frame_length_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression.
|
||||
*
|
||||
* @param inst The instance of NS.
|
||||
*
|
||||
* @param indata An array of 16-bit signed audio samples.
|
||||
*
|
||||
* @param outdata An array of 16-bit signed audio samples after noise suppression.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata);
|
||||
|
||||
/**
|
||||
* @brief Free the NS instance
|
||||
*
|
||||
* @param inst The instance of NS.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void ns_destroy(ns_handle_t inst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#endif //_ESP_NS_H_
|
||||
104
acoustic_algorithm/include/esp_vad.h
Normal file
104
acoustic_algorithm/include/esp_vad.h
Normal file
@ -0,0 +1,104 @@
|
||||
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License
|
||||
#ifndef _ESP_VAD_H_
|
||||
#define _ESP_VAD_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SAMPLE_RATE_HZ 16000 //Supports 32000, 16000, 8000
|
||||
#define VAD_FRAME_LENGTH_MS 30 //Supports 10ms, 20ms, 30ms
|
||||
|
||||
/**
|
||||
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
|
||||
* restrictive in reporting speech.
|
||||
*/
|
||||
typedef enum {
|
||||
VAD_MODE_0 = 0,
|
||||
VAD_MODE_1,
|
||||
VAD_MODE_2,
|
||||
VAD_MODE_3,
|
||||
VAD_MODE_4
|
||||
} vad_mode_t;
|
||||
|
||||
typedef enum {
|
||||
VAD_SILENCE = 0,
|
||||
VAD_SPEECH
|
||||
} vad_state_t;
|
||||
|
||||
typedef void* vad_handle_t;
|
||||
|
||||
/**
|
||||
* @brief Creates an instance to the VAD structure.
|
||||
*
|
||||
* @param vad_mode Sets the VAD operating mode.
|
||||
*
|
||||
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
|
||||
*
|
||||
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
|
||||
*
|
||||
* @return
|
||||
* - NULL: Create failed
|
||||
* - Others: The instance of VAD
|
||||
*/
|
||||
vad_handle_t vad_create(vad_mode_t vad_mode, int sample_rate_hz, int one_frame_ms);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
|
||||
*
|
||||
* @param inst The instance of VAD.
|
||||
*
|
||||
* @param data An array of 16-bit signed audio samples.
|
||||
*
|
||||
* @return
|
||||
* - VAD_SILENCE if no voice
|
||||
* - VAD_SPEECH if voice is detected
|
||||
*
|
||||
*/
|
||||
vad_state_t vad_process(vad_handle_t inst, int16_t *data);
|
||||
|
||||
/**
|
||||
* @brief Free the VAD instance
|
||||
*
|
||||
* @param inst The instance of VAD.
|
||||
*
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
void vad_destroy(vad_handle_t inst);
|
||||
|
||||
/*
|
||||
* Programming Guide:
|
||||
*
|
||||
* @code{c}
|
||||
* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to the VAD structure.
|
||||
*
|
||||
* while (1) {
|
||||
* //Use buffer to receive the audio data from MIC.
|
||||
* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result.
|
||||
* }
|
||||
*
|
||||
* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process
|
||||
*
|
||||
* @endcode
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#endif //_ESP_VAD_H_
|
||||
BIN
acoustic_algorithm/libesp_audio_processor.a
Normal file
BIN
acoustic_algorithm/libesp_audio_processor.a
Normal file
Binary file not shown.
120
main/audio_process.c
Normal file
120
main/audio_process.c
Normal file
@ -0,0 +1,120 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
|
||||
#include "audio_process.h"
|
||||
#include "esp_ns.h"
|
||||
#include "esp_aec.h"
|
||||
#include "esp_agc.h"
|
||||
#include "esp_vad.h"
|
||||
#include "audio_test_file.h"
|
||||
|
||||
void NSTask(void *arg)
|
||||
{
|
||||
ns_handle_t ns_inst = ns_create(NS_FRAME_LENGTH_MS);
|
||||
int chunks = 0;
|
||||
int audio_chunksize = NS_FRAME_LENGTH_MS * 16;
|
||||
int16_t *ns_in = malloc(audio_chunksize * sizeof(int16_t));
|
||||
int16_t *ns_out = malloc(audio_chunksize * sizeof(int16_t));
|
||||
while (1) {
|
||||
if ((chunks + 1) * audio_chunksize * sizeof(int16_t) <= sizeof(audio_test_file)) {
|
||||
memcpy(ns_in, audio_test_file + chunks * audio_chunksize * sizeof(int16_t), audio_chunksize * sizeof(int16_t));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
ns_process(ns_inst, ns_in, ns_out);
|
||||
chunks++;
|
||||
}
|
||||
ns_destroy(ns_inst);
|
||||
free(ns_in);
|
||||
free(ns_out);
|
||||
printf("NS test successfully\n\n");
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
#define AGC_FRAME_BYTES 320
|
||||
void AGCTask(void *arg)
|
||||
{
|
||||
void *agc_handle = esp_agc_open(3, 16000);
|
||||
set_agc_config(agc_handle, 15, 1, -3);
|
||||
int chunks = 0;
|
||||
int16_t *agc_in = malloc(AGC_FRAME_BYTES);
|
||||
int16_t *agc_out = malloc(AGC_FRAME_BYTES);
|
||||
while (1) {
|
||||
if ((chunks + 1) * AGC_FRAME_BYTES <= sizeof(audio_test_file)) {
|
||||
memcpy(agc_in, audio_test_file + chunks * AGC_FRAME_BYTES, AGC_FRAME_BYTES);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
esp_agc_process(agc_handle, agc_in, agc_out, AGC_FRAME_BYTES / 2, 16000);
|
||||
chunks++;
|
||||
}
|
||||
esp_agc_clse(agc_handle);
|
||||
free(agc_in);
|
||||
free(agc_out);
|
||||
printf("AGC test successfully\n\n");
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
void AECTask(void *arg)
|
||||
{
|
||||
aec_handle_t aec_inst = aec_create(AEC_SAMPLE_RATE, AEC_FRAME_LENGTH_MS, AEC_FILTER_LENGTH);
|
||||
int chunks = 0;
|
||||
int audio_chunksize = AEC_FRAME_LENGTH_MS * 16;
|
||||
int16_t *aec_in = malloc(audio_chunksize * sizeof(int16_t));
|
||||
int16_t *aec_ref = malloc(audio_chunksize * sizeof(int16_t));
|
||||
int16_t *aec_out = malloc(audio_chunksize * sizeof(int16_t));
|
||||
while (1) {
|
||||
if ((chunks + 1) * audio_chunksize * sizeof(int16_t) <= sizeof(audio_test_file)) {
|
||||
memcpy(aec_in, audio_test_file + chunks * audio_chunksize * sizeof(int16_t), audio_chunksize * sizeof(int16_t));
|
||||
memset(aec_ref, 0, audio_chunksize * sizeof(int16_t));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
aec_process(aec_inst, aec_in, aec_ref, aec_out);
|
||||
chunks++;
|
||||
}
|
||||
aec_destroy(aec_inst);
|
||||
free(aec_in);
|
||||
free(aec_ref);
|
||||
free(aec_out);
|
||||
printf("AEC test successfully\n\n");
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
void VADTask(void *arg)
|
||||
{
|
||||
vad_handle_t vad_inst = vad_create(VAD_MODE_4, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS);
|
||||
int chunks = 0;
|
||||
int audio_chunksize = VAD_FRAME_LENGTH_MS * 16;
|
||||
int16_t *vad_in = malloc(audio_chunksize * sizeof(int16_t));
|
||||
while (1) {
|
||||
if ((chunks + 1) * audio_chunksize * sizeof(int16_t) <= sizeof(audio_test_file)) {
|
||||
memcpy(vad_in, audio_test_file + chunks * audio_chunksize * sizeof(int16_t), audio_chunksize * sizeof(int16_t));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
vad_state_t vad_state = vad_process(vad_inst, vad_in);
|
||||
chunks++;
|
||||
}
|
||||
vad_destroy(vad_inst);
|
||||
free(vad_in);
|
||||
printf("VAD test successfully\n\n");
|
||||
printf("TEST3 FINISHED\n\n");
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void audio_process_test()
|
||||
{
|
||||
xTaskCreatePinnedToCore(&NSTask, "noise_suppression", 3 * 1024, NULL, 5, NULL, 1);
|
||||
vTaskDelay(1000 / portTICK_PERIOD_MS);
|
||||
xTaskCreatePinnedToCore(&AGCTask, "automatic_gain_control", 3 * 1024, NULL, 5, NULL, 1);
|
||||
vTaskDelay(1000 / portTICK_PERIOD_MS);
|
||||
xTaskCreatePinnedToCore(&AECTask, "acoustic_echo_cancellation", 3 * 1024, NULL, 5, NULL, 0);
|
||||
vTaskDelay(1000 / portTICK_PERIOD_MS);
|
||||
xTaskCreatePinnedToCore(&VADTask, "voice_activity_detection", 3 * 1024, NULL, 5, NULL, 0);
|
||||
}
|
||||
3
main/include/audio_process.h
Normal file
3
main/include/audio_process.h
Normal file
@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
void audio_process_test();
|
||||
7643
main/include/audio_test_file.h
Normal file
7643
main/include/audio_test_file.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -8,6 +8,7 @@
|
||||
|
||||
#include "wakenet_test.h"
|
||||
#include "multinet_test.h"
|
||||
#include "audio_process.h"
|
||||
|
||||
void app_main()
|
||||
{
|
||||
@ -15,6 +16,10 @@ void app_main()
|
||||
wakenet_test();
|
||||
vTaskDelay(3000 / portTICK_PERIOD_MS);
|
||||
|
||||
//test multinet
|
||||
// test multinet
|
||||
multinet_test();
|
||||
vTaskDelay(3000 / portTICK_PERIOD_MS);
|
||||
|
||||
// test acoustic algorithm
|
||||
audio_process_test();
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ void wakenetTask(void *arg)
|
||||
int tv_ms=(tv_end.tv_sec-tv_start.tv_sec)*1000+(tv_end.tv_usec-tv_start.tv_usec)/1000;
|
||||
printf("Done! Took %d ms to parse %d ms worth of samples in %d iterations. CPU loading(single core):%.1f%%\n",
|
||||
tv_ms, chunks*30, chunks, tv_ms*1.0/chunks/3*10);
|
||||
printf("TEST FINISHED\n\n");
|
||||
printf("TEST1 FINISHED\n\n");
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user