feature/Add NS/AEC/AGC/VAD algorithm

This commit is contained in:
Wang Wang Wang 2019-08-19 17:09:21 +08:00 committed by Sun Xiang Yu
parent c62aeb3826
commit 0f6442f84a
16 changed files with 8066 additions and 2 deletions

View File

@ -5,6 +5,7 @@ MODULE_PATH := $(abspath $(shell pwd))
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/lib
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/wake_word_engine
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/speech_command_recognition
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/acoustic_algorithm
include $(IDF_PATH)/make/project.mk

View File

@ -0,0 +1,11 @@
COMPONENT_ADD_INCLUDEDIRS := include
COMPONENT_SRCDIRS := .
LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib*.a)
LIBS := $(patsubst lib%.a,-l%,$(notdir $(LIB_FILES)))
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/ $(LIBS)
ALL_LIB_FILES += $(LIB_FILES)

View File

@ -0,0 +1,76 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AEC_H_
#define _ESP_AEC_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define USE_AEC_FFT // Not kiss_fft
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
#define AEC_FRAME_LENGTH_MS 16 // Only support 16ms
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
typedef void* aec_handle_t;
/**
* @brief Creates an instance to the AEC structure.
*
* @param sample_rate The Sampling frequency (Hz) can be 8000, 16000.
*
* @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
*
* @param filter_length Number of samples of echo to cancel.
*
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t aec_create(int sample_rate, int frame_length, int filter_length);
/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
* @param inst The instance of AEC.
*
* @param indata An array of 16-bit signed audio samples from mic.
*
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
*
* @param outdata Returns near-end signal with echo removed.
*
* @return None
*
*/
void aec_process(aec_handle_t inst, int16_t *indata, int16_t *refdata, int16_t *outdata);
/**
* @brief Free the AEC instance
*
* @param inst The instance of AEC.
*
* @return None
*
*/
void aec_destroy(aec_handle_t inst);
#ifdef __cplusplus
extern "C" {
#endif
#endif //_ESP_AEC_H_

View File

@ -0,0 +1,31 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AGC_H_
#define _ESP_AGC_H_
////all positive value is valid, negective is error
typedef enum {
ESP_AGC_SUCCESS = 0, ////success
ESP_AGC_FAIL = -1, ////agc fail
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
} ESP_AGE_ERR;
void *esp_agc_open(int agc_mode, int sample_rate);
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
void esp_agc_clse(void *agc_handle);
#endif // _ESP_AGC_H_

View File

@ -0,0 +1,70 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_NS_H_
#define _ESP_NS_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define NS_FRAME_LENGTH_MS 30 //Supports 10ms, 20ms, 30ms
/**
* The Sampling frequency (Hz) must be 16000Hz
*/
typedef void* ns_handle_t;
/**
* @brief Creates an instance to the NS structure.
*
* @param frame_length_ms The length of the audio processing can be 10ms, 20ms, 30ms.
*
* @return
* - NULL: Create failed
* - Others: The instance of NS
*/
ns_handle_t ns_create(int frame_length_ms);
/**
* @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression.
*
* @param inst The instance of NS.
*
* @param indata An array of 16-bit signed audio samples.
*
* @param outdata An array of 16-bit signed audio samples after noise suppression.
*
* @return None
*
*/
void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata);
/**
* @brief Free the NS instance
*
* @param inst The instance of NS.
*
* @return None
*
*/
void ns_destroy(ns_handle_t inst);
#ifdef __cplusplus
extern "C" {
#endif
#endif //_ESP_NS_H_

View File

@ -0,0 +1,104 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_VAD_H_
#define _ESP_VAD_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define SAMPLE_RATE_HZ 16000 //Supports 32000, 16000, 8000
#define VAD_FRAME_LENGTH_MS 30 //Supports 10ms, 20ms, 30ms
/**
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
* restrictive in reporting speech.
*/
typedef enum {
VAD_MODE_0 = 0,
VAD_MODE_1,
VAD_MODE_2,
VAD_MODE_3,
VAD_MODE_4
} vad_mode_t;
typedef enum {
VAD_SILENCE = 0,
VAD_SPEECH
} vad_state_t;
typedef void* vad_handle_t;
/**
* @brief Creates an instance to the VAD structure.
*
* @param vad_mode Sets the VAD operating mode.
*
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
*
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
*
* @return
* - NULL: Create failed
* - Others: The instance of VAD
*/
vad_handle_t vad_create(vad_mode_t vad_mode, int sample_rate_hz, int one_frame_ms);
/**
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
*
* @param inst The instance of VAD.
*
* @param data An array of 16-bit signed audio samples.
*
* @return
* - VAD_SILENCE if no voice
* - VAD_SPEECH if voice is detected
*
*/
vad_state_t vad_process(vad_handle_t inst, int16_t *data);
/**
* @brief Free the VAD instance
*
* @param inst The instance of VAD.
*
* @return None
*
*/
void vad_destroy(vad_handle_t inst);
/*
* Programming Guide:
*
* @code{c}
* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to the VAD structure.
*
* while (1) {
* //Use buffer to receive the audio data from MIC.
* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result.
* }
*
* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process
*
* @endcode
*/
#ifdef __cplusplus
extern "C" {
#endif
#endif //_ESP_VAD_H_

Binary file not shown.

120
main/audio_process.c Normal file
View File

@ -0,0 +1,120 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "audio_process.h"
#include "esp_ns.h"
#include "esp_aec.h"
#include "esp_agc.h"
#include "esp_vad.h"
#include "audio_test_file.h"
void NSTask(void *arg)
{
ns_handle_t ns_inst = ns_create(NS_FRAME_LENGTH_MS);
int chunks = 0;
int audio_chunksize = NS_FRAME_LENGTH_MS * 16;
int16_t *ns_in = malloc(audio_chunksize * sizeof(int16_t));
int16_t *ns_out = malloc(audio_chunksize * sizeof(int16_t));
while (1) {
if ((chunks + 1) * audio_chunksize * sizeof(int16_t) <= sizeof(audio_test_file)) {
memcpy(ns_in, audio_test_file + chunks * audio_chunksize * sizeof(int16_t), audio_chunksize * sizeof(int16_t));
} else {
break;
}
ns_process(ns_inst, ns_in, ns_out);
chunks++;
}
ns_destroy(ns_inst);
free(ns_in);
free(ns_out);
printf("NS test successfully\n\n");
vTaskDelete(NULL);
}
#define AGC_FRAME_BYTES 320
void AGCTask(void *arg)
{
void *agc_handle = esp_agc_open(3, 16000);
set_agc_config(agc_handle, 15, 1, -3);
int chunks = 0;
int16_t *agc_in = malloc(AGC_FRAME_BYTES);
int16_t *agc_out = malloc(AGC_FRAME_BYTES);
while (1) {
if ((chunks + 1) * AGC_FRAME_BYTES <= sizeof(audio_test_file)) {
memcpy(agc_in, audio_test_file + chunks * AGC_FRAME_BYTES, AGC_FRAME_BYTES);
} else {
break;
}
esp_agc_process(agc_handle, agc_in, agc_out, AGC_FRAME_BYTES / 2, 16000);
chunks++;
}
esp_agc_clse(agc_handle);
free(agc_in);
free(agc_out);
printf("AGC test successfully\n\n");
vTaskDelete(NULL);
}
void AECTask(void *arg)
{
aec_handle_t aec_inst = aec_create(AEC_SAMPLE_RATE, AEC_FRAME_LENGTH_MS, AEC_FILTER_LENGTH);
int chunks = 0;
int audio_chunksize = AEC_FRAME_LENGTH_MS * 16;
int16_t *aec_in = malloc(audio_chunksize * sizeof(int16_t));
int16_t *aec_ref = malloc(audio_chunksize * sizeof(int16_t));
int16_t *aec_out = malloc(audio_chunksize * sizeof(int16_t));
while (1) {
if ((chunks + 1) * audio_chunksize * sizeof(int16_t) <= sizeof(audio_test_file)) {
memcpy(aec_in, audio_test_file + chunks * audio_chunksize * sizeof(int16_t), audio_chunksize * sizeof(int16_t));
memset(aec_ref, 0, audio_chunksize * sizeof(int16_t));
} else {
break;
}
aec_process(aec_inst, aec_in, aec_ref, aec_out);
chunks++;
}
aec_destroy(aec_inst);
free(aec_in);
free(aec_ref);
free(aec_out);
printf("AEC test successfully\n\n");
vTaskDelete(NULL);
}
void VADTask(void *arg)
{
vad_handle_t vad_inst = vad_create(VAD_MODE_4, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS);
int chunks = 0;
int audio_chunksize = VAD_FRAME_LENGTH_MS * 16;
int16_t *vad_in = malloc(audio_chunksize * sizeof(int16_t));
while (1) {
if ((chunks + 1) * audio_chunksize * sizeof(int16_t) <= sizeof(audio_test_file)) {
memcpy(vad_in, audio_test_file + chunks * audio_chunksize * sizeof(int16_t), audio_chunksize * sizeof(int16_t));
} else {
break;
}
vad_state_t vad_state = vad_process(vad_inst, vad_in);
chunks++;
}
vad_destroy(vad_inst);
free(vad_in);
printf("VAD test successfully\n\n");
printf("TEST3 FINISHED\n\n");
vTaskDelete(NULL);
}
void audio_process_test()
{
xTaskCreatePinnedToCore(&NSTask, "noise_suppression", 3 * 1024, NULL, 5, NULL, 1);
vTaskDelay(1000 / portTICK_PERIOD_MS);
xTaskCreatePinnedToCore(&AGCTask, "automatic_gain_control", 3 * 1024, NULL, 5, NULL, 1);
vTaskDelay(1000 / portTICK_PERIOD_MS);
xTaskCreatePinnedToCore(&AECTask, "acoustic_echo_cancellation", 3 * 1024, NULL, 5, NULL, 0);
vTaskDelay(1000 / portTICK_PERIOD_MS);
xTaskCreatePinnedToCore(&VADTask, "voice_activity_detection", 3 * 1024, NULL, 5, NULL, 0);
}

View File

@ -0,0 +1,3 @@
#pragma once
void audio_process_test();

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,7 @@
#include "wakenet_test.h"
#include "multinet_test.h"
#include "audio_process.h"
void app_main()
{
@ -15,6 +16,10 @@ void app_main()
wakenet_test();
vTaskDelay(3000 / portTICK_PERIOD_MS);
//test multinet
// test multinet
multinet_test();
vTaskDelay(3000 / portTICK_PERIOD_MS);
// test acoustic algorithm
audio_process_test();
}

View File

@ -41,7 +41,7 @@ void wakenetTask(void *arg)
int tv_ms=(tv_end.tv_sec-tv_start.tv_sec)*1000+(tv_end.tv_usec-tv_start.tv_usec)/1000;
printf("Done! Took %d ms to parse %d ms worth of samples in %d iterations. CPU loading(single core):%.1f%%\n",
tv_ms, chunks*30, chunks, tv_ms*1.0/chunks/3*10);
printf("TEST FINISHED\n\n");
printf("TEST1 FINISHED\n\n");
vTaskDelete(NULL);
}