mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
add README
This commit is contained in:
parent
36d617aff0
commit
74e120a663
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
build/
|
||||
sdkconfig.old
|
||||
|
||||
20
LICENSE
Normal file
20
LICENSE
Normal file
@ -0,0 +1,20 @@
|
||||
ESPRESSIF MIT License
|
||||
|
||||
Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
|
||||
|
||||
Permission is hereby granted for use on all ESPRESSIF SYSTEMS products, in which case,
|
||||
it is free of charge, to any person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the Software is furnished
|
||||
to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or
|
||||
substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
10
Makefile
Normal file
10
Makefile
Normal file
@ -0,0 +1,10 @@
|
||||
PROJECT_NAME := esp_sr_public
|
||||
|
||||
MODULE_PATH := $(abspath $(shell pwd))
|
||||
|
||||
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/lib
|
||||
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/wake_words_engine
|
||||
EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/speech_commands_recognition
|
||||
|
||||
include $(IDF_PATH)/make/project.mk
|
||||
|
||||
22
README.md
Normal file
22
README.md
Normal file
@ -0,0 +1,22 @@
|
||||
# esp_sr
|
||||
|
||||
Espressif esp_sr provides basic algorithms for **Speech Interaction** applications. Now, this framework has two models:
|
||||
|
||||
* The wake word detection model [WakeNet](wake_words_engine/README.md)
|
||||
* The speech commands recognition model [MultiNet](speech_commands_recognition/README.md)
|
||||
|
||||
These algorithms are provided in the form of a component, so they can be integrated into your projects with minimum efforts.
|
||||
|
||||
## Wake Word Engine
|
||||
|
||||
Espressif wake word engine [WakeNet](wake_words_engine/README.md) is specially designed to provide a high performance and low memory footprint wake word detection algorithm for users, which enables devices always listen wake words, such as “Alexa”, “天猫精灵” (Tian Mao Jing Ling) and “小爱同学” (Xiao Ai Tong Xue).
|
||||
|
||||
Currently, Espressif has not only provided an official wake word "Hi, Lexin" to public for free, but also allows customized wake words. For details on how to customize your own wake words, please see [Espressif Speech Wake Words Customization Process](wake_words_engine/ESP_Wake_Words_Customization.md).
|
||||
|
||||
## Speech Commands Recognition
|
||||
|
||||
Espressif's speech commands recognition model [MultiNet](speech_commands_recognition/README.md) is specially designed to provide a flexible off-line speech commands recognition model. With this model, you can easily add your own speech commands, eliminating the need to train model again.
|
||||
|
||||
Currently, Espressif **MultiNet** supports up to 100 Chinese speech commands, such as “打开空调” (Turn on the air conditioner) and “打开卧室灯” (Turn on the bedroom light).
|
||||
|
||||
We will add supports for English commands in the next release.
|
||||
BIN
img/model_sel.png
Normal file
BIN
img/model_sel.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 21 KiB |
BIN
img/multinet_workflow.png
Normal file
BIN
img/multinet_workflow.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 16 KiB |
BIN
img/wakenet_workflow.png
Normal file
BIN
img/wakenet_workflow.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 103 KiB |
BIN
img/word_sel.png
Normal file
BIN
img/word_sel.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 20 KiB |
11
lib/component.mk
Normal file
11
lib/component.mk
Normal file
@ -0,0 +1,11 @@
|
||||
COMPONENT_ADD_INCLUDEDIRS := include
|
||||
|
||||
COMPONENT_SRCDIRS := .
|
||||
|
||||
LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib*.a)
|
||||
|
||||
LIBS := $(patsubst lib%.a,-l%,$(notdir $(LIB_FILES)))
|
||||
|
||||
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/ $(LIBS)
|
||||
|
||||
ALL_LIB_FILES += $(LIB_FILES)
|
||||
315
lib/include/dl_lib.h
Normal file
315
lib/include/dl_lib.h
Normal file
@ -0,0 +1,315 @@
|
||||
#ifndef DL_LIB_H
|
||||
#define DL_LIB_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
|
||||
typedef int padding_state;
|
||||
/**
|
||||
* @brief Does a fast version of the exp() operation on a floating point number.
|
||||
*
|
||||
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
|
||||
* Should be good til an input of 5 or so with a steps factor of 8.
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
|
||||
* @return Exp()'ed output
|
||||
*/
|
||||
fptp_t fast_exp(double x, int steps);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on a quantized matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
|
||||
fptp_t dl_sigmoid_op(fptp_t in);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input number
|
||||
* @return Tanh value
|
||||
*/
|
||||
fptp_t dl_tanh_op(fptp_t v);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
|
||||
*
|
||||
* @param in Floating point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix.
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fully connected layer operation
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
|
||||
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
|
||||
*/
|
||||
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
|
||||
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
|
||||
* this matrix only needs to be calculated once. This function does that.
|
||||
*
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
|
||||
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, partial quantized version.
|
||||
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons, need to be quantised
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
|
||||
|
||||
/**
|
||||
* @brief Do a fully-connected layer pass, fully-quantized version.
|
||||
*
|
||||
* @param in Input vector
|
||||
* @param weight Weights of the neurons
|
||||
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a basic LSTM layer pass, fully-quantized version
|
||||
*
|
||||
* @warning Returns state_h pointer, so do not free result.
|
||||
|
||||
* @param in Input vector
|
||||
* @param state_c Internal state of the LSTM network
|
||||
* @param state_h Internal state (previous output values) of the LSTM network
|
||||
* @param weights Weights for the neurons
|
||||
* @param bias Bias for the neurons. Can be NULL if no bias is required
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return Output values of the neurons
|
||||
*/
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
|
||||
|
||||
/**
|
||||
* @brief Batch-normalize a matrix, fully-quantized version
|
||||
*
|
||||
* @param m The matrix to normalize
|
||||
* @param offset Offset matrix
|
||||
* @param scale Scale matrix
|
||||
* @param mean Mean matrix
|
||||
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
|
||||
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
|
||||
* @return
|
||||
*/
|
||||
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
|
||||
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
|
||||
|
||||
/**
|
||||
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
|
||||
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @param clip If value is higher than this, it will be clipped to this value
|
||||
* @return Relu output
|
||||
*/
|
||||
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
|
||||
|
||||
/**
|
||||
* @brief Does a ReLu operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return Sigmoid output
|
||||
*/
|
||||
int dl_sigmoid_op_q(const int in);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on a fixed-point number.
|
||||
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
|
||||
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
|
||||
*
|
||||
* @param in Fixed-point input
|
||||
* @return tanh output
|
||||
*/
|
||||
int dl_tanh_op_q(int v);
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, quantized version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Filter out the number greater than clip in the matrix, float version
|
||||
*
|
||||
* @param in Input matrix
|
||||
* @param clip If values are higher than this, they will be clipped to this value
|
||||
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
|
||||
*/
|
||||
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
|
||||
* @param bias Bias for the CNN layer.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1.
|
||||
* @return The result of CNN layer.
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Do a basic CNN layer pass, quantised wersion.
|
||||
*
|
||||
* @Warning This just supports the single channel input image, and the output is single row matrix.
|
||||
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
|
||||
*
|
||||
* @param in Input single channel image
|
||||
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
|
||||
* @param bias Bias of the neurons.
|
||||
* @param filter_height The height of convolution kernel
|
||||
* @param filter_width The width of convolution kernel
|
||||
* @param out_channels The number of output channels of convolution kernel
|
||||
* @param stride_x The step length of the convolution window in x(width) direction
|
||||
* @param stride_y The step length of the convolution window in y(height) direction
|
||||
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
|
||||
* @param out The result of CNN layer, out->h=1
|
||||
* @return The result of CNN layer
|
||||
*/
|
||||
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
|
||||
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
|
||||
|
||||
#endif
|
||||
|
||||
54
lib/include/dl_lib_coefgetter_if.h
Normal file
54
lib/include/dl_lib_coefgetter_if.h
Normal file
@ -0,0 +1,54 @@
|
||||
#ifndef DL_LIB_COEFGETTER_IF_H
|
||||
#define DL_LIB_COEFGETTER_IF_H
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
|
||||
//dl_batch_normalize_get_sqrtvar first.
|
||||
#define COEF_GETTER_HINT_BNVAR (1<<0)
|
||||
|
||||
/*
|
||||
This struct describes the basic information of model data:
|
||||
word_num: the number of wake words or speech commands
|
||||
word_list: the name list of wake words or speech commands
|
||||
thres_list: the threshold list of wake words or speech commands
|
||||
info_str: the string used to reflect the version and information of model data
|
||||
which consist of the architecture of network, the version of model data, wake words and their threshold
|
||||
*/
|
||||
typedef struct {
|
||||
int word_num;
|
||||
char **word_list;
|
||||
int *win_list;
|
||||
float *thresh_list;
|
||||
char *info_str;
|
||||
} model_info_t;
|
||||
|
||||
/*
|
||||
Alphabet struct describes the basic grapheme or phoneme.
|
||||
item_num: the number of baisc item(grapheme or phonemr)
|
||||
items: the list of basic item
|
||||
*/
|
||||
typedef struct {
|
||||
int item_num;
|
||||
char **items;
|
||||
}alphabet_t;
|
||||
|
||||
/*
|
||||
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
|
||||
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
|
||||
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
|
||||
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
|
||||
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
|
||||
memory for the returned matrices, when applicable.
|
||||
*/
|
||||
typedef struct {
|
||||
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
|
||||
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
|
||||
void (*free_f)(const dl_matrix2d_t *m);
|
||||
void (*free_q)(const dl_matrix2dq_t *m);
|
||||
const model_info_t* (*getter_info)(void *arg);
|
||||
const alphabet_t* (*getter_alphabet)(void *arg);
|
||||
} model_coeff_getter_t;
|
||||
|
||||
#endif
|
||||
151
lib/include/dl_lib_conv_queue.h
Normal file
151
lib/include/dl_lib_conv_queue.h
Normal file
@ -0,0 +1,151 @@
|
||||
#ifndef DL_LIB_CONV_QUEUE_H
|
||||
#define DL_LIB_CONV_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrix.h"
|
||||
typedef float fptp_t;
|
||||
|
||||
|
||||
//Flags for matrices
|
||||
#define DL_MF_FOREIGNDATA (1<<0) /*< Matrix *item data actually points to another matrix and should not be freed */
|
||||
|
||||
//Float convolution FIFO queue.
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the channel number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int flag; /*< not used*/
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_conv_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
|
||||
|
||||
/**
|
||||
* @brief Free a convolution queue
|
||||
*
|
||||
* @param cq The convolution queue to free
|
||||
*/
|
||||
void dl_conv_queue_free(dl_conv_queue_t *cq);
|
||||
|
||||
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
|
||||
* by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
|
||||
|
||||
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
|
||||
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
|
||||
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
|
||||
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
|
||||
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
|
||||
/**
|
||||
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
|
||||
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input convolution queue
|
||||
* @param out Output convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
|
||||
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
|
||||
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
|
||||
|
||||
|
||||
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
|
||||
|
||||
#endif
|
||||
157
lib/include/dl_lib_convq_queue.h
Normal file
157
lib/include/dl_lib_convq_queue.h
Normal file
@ -0,0 +1,157 @@
|
||||
#ifndef DL_LIB_CONVQ_QUEUE_H
|
||||
#define DL_LIB_CONVQ_QUEUE_H
|
||||
|
||||
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
//fixed-point convolution FIFO queue.
|
||||
typedef struct {
|
||||
int n; /*< the length of queue */
|
||||
int c; /*< the channel number of queue element*/
|
||||
int front; /*< the front(top) position of queue */
|
||||
int flag; /*< not used */
|
||||
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
|
||||
to get the real values */
|
||||
qtp_t *itemq; /*< Pointer to item array */
|
||||
} dl_convq_queue_t;
|
||||
|
||||
/**
|
||||
* @brief Allocate a fixed-point convolution queue
|
||||
*
|
||||
* @param n The length of queue
|
||||
* @param c The channel number of elements in the queue
|
||||
* @return The convolution queue, or NULL if out of memory
|
||||
*/
|
||||
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
|
||||
|
||||
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out);
|
||||
|
||||
/**
|
||||
* @brief Free a fixed-point convolution queue
|
||||
*
|
||||
* @param cq The fixed-point convolution queue to free
|
||||
*/
|
||||
void dl_convq_queue_free(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Move the front pointer of queue forward,
|
||||
the First(oldest) element become the last(newest) element,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @return Pointer of oldest element
|
||||
*/
|
||||
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
|
||||
|
||||
/**
|
||||
* @brief Remove the oldest element, then insert the input element at the end of queue
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The new element
|
||||
*/
|
||||
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
|
||||
|
||||
/**
|
||||
* @brief Insert the float-point element at the end of queue.
|
||||
* The precision of fixed-point numbers is described by the Qm.f notation,
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param item The float-point element
|
||||
* @param m_bit The number of integer bits including the sign bits
|
||||
* @param f_bit The number of fractional bits
|
||||
*/
|
||||
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer of element in the queue by offset
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a sigmoid operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* sigmoid operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_sigmoid_step_q(dl_convq_queue_t *cq, int offset);
|
||||
|
||||
/**
|
||||
* @brief Does a tanh operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, and does a
|
||||
* tanh operation by this pointer, then return the pointer
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @return Pointer of the element
|
||||
*/
|
||||
qtp_t *dl_tanh_step_q(dl_convq_queue_t *cq, int offset);
|
||||
qtp_t *dl_relu_step_q(dl_convq_queue_t *cq, fptp_t clip, int offset);
|
||||
/**
|
||||
* @brief Does a softmax operation on the one of element in the convolution queue.
|
||||
* Gets the pointer of element in the convolution queue by offset, input data
|
||||
stay as it is. Results are saved into the *out* array.
|
||||
*
|
||||
* @param cq Input fixed-point convolution queue
|
||||
* @param offset Offset from the front of the queue
|
||||
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return softmax results
|
||||
*/
|
||||
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
|
||||
|
||||
/**
|
||||
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
|
||||
* based on convolution queue.
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param kernel The kernel matrix of filter
|
||||
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
|
||||
* @return The result of atrous convolution
|
||||
*/
|
||||
qtp_t *dl_atrous_conv1dq_step(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift);
|
||||
|
||||
/**
|
||||
* @brief Fast implement of dilation layer as follows
|
||||
*
|
||||
* |-> [gate(sigmoid)] -|
|
||||
* input - | |-> (*) - output
|
||||
* |-> [filter(tanh)] -|
|
||||
*
|
||||
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
|
||||
* is first element of output queue and should not be freed separately.
|
||||
*
|
||||
* @param in Input fixed-point convolution queue
|
||||
* @param out Output fixed-point convolution queue
|
||||
* @param rate A positive int, the stride with which we sample input value
|
||||
* @param size A positive int, the size of 1D-filter
|
||||
* @param filter_kernel The kernel matrix of filter
|
||||
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
|
||||
* @param gate_kernel The kernel matrix of gate
|
||||
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
|
||||
* @filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
|
||||
* @gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
|
||||
* @return The result of dilation layer
|
||||
*/
|
||||
qtp_t *dl_dilation_layer_q(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
|
||||
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
|
||||
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
|
||||
int filter_shift, int gate_shift);
|
||||
|
||||
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
|
||||
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
|
||||
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
|
||||
|
||||
#endif
|
||||
223
lib/include/dl_lib_matrix.h
Normal file
223
lib/include/dl_lib_matrix.h
Normal file
@ -0,0 +1,223 @@
|
||||
#ifndef DL_LIB_MATRIX_H
|
||||
#define DL_LIB_MATRIX_H
|
||||
|
||||
#if CONFIG_BT_SHARE_MEM_REUSE
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#endif
|
||||
|
||||
typedef float fptp_t;
|
||||
|
||||
#if CONFIG_BT_SHARE_MEM_REUSE
|
||||
extern multi_heap_handle_t gst_heap;
|
||||
#endif
|
||||
|
||||
//Flags for matrices
|
||||
#define DL_MF_FOREIGNDATA (1<<0) /*< Matrix *item data actually points to another matrix and should not be freed */
|
||||
|
||||
//'Normal' float matrix
|
||||
typedef struct {
|
||||
int w; /*< Width */
|
||||
int h; /*< Height */
|
||||
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
|
||||
int flags; /*< Flags. OR of DL_MF_* values */
|
||||
fptp_t *item; /*< Pointer to item array */
|
||||
} dl_matrix2d_t;
|
||||
|
||||
//Macro to quickly access the raw items in a matrix
|
||||
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrix_free(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrix_zero(dl_matrix2d_t *m);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
|
||||
|
||||
/**
|
||||
* @brief Generate a matrix from existing floating-point data
|
||||
*
|
||||
* @param w Width of resulting matrix
|
||||
* @param h Height of resulting matrix
|
||||
* @param data Data to populate matrix with
|
||||
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two matrices : res=a.b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Add a pair of matrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Subtract a matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated array with as avlues a|b
|
||||
*/
|
||||
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrix(const dl_matrix2d_t *a);
|
||||
|
||||
/**
|
||||
* @brief Return the average square error given a correct and a test matrix.
|
||||
*
|
||||
* ...Well, more or less. If anything, it gives an indication of the error between
|
||||
* the two. Check the code for the exact implementation.
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return value indicating the relative difference between matrices
|
||||
*/
|
||||
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get a specific item from the matrix
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
|
||||
return DL_ITM(m, x, y);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the matrix to the given value
|
||||
*
|
||||
* Please use these for external matrix access instead of DL_ITM
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
|
||||
DL_ITM(m, x, y)=val;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
359
lib/include/dl_lib_matrixq.h
Normal file
359
lib/include/dl_lib_matrixq.h
Normal file
@ -0,0 +1,359 @@
|
||||
#ifndef DL_LIB_MATRIXQ_H
|
||||
#define DL_LIB_MATRIXQ_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dl_lib_matrix.h"
|
||||
|
||||
typedef int16_t qtp_t;
|
||||
|
||||
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
|
||||
//for easy use as a multiplicand without stressing out the flash cache too much.
|
||||
typedef struct {
|
||||
int w;
|
||||
int h;
|
||||
int stride; //Normally equals h, not w!
|
||||
int flags;
|
||||
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
|
||||
qtp_t *itemq;
|
||||
} dl_matrix2dq_t;
|
||||
|
||||
#define DL_QTP_SHIFT 15
|
||||
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
|
||||
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
|
||||
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
|
||||
|
||||
#define DL_SHIFT_AUTO 32
|
||||
|
||||
/**
|
||||
* @info About quantized matrices and shift values
|
||||
*
|
||||
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
|
||||
* time. Quantized matrices can speed up your operations, but come with some quirks, and
|
||||
* it's good to understand how they work before using them.
|
||||
*
|
||||
* The data in the quantized matrix type is stored similarily to floating-point types:
|
||||
* when storing a real value, the value is stored as a mantissa (base number) and an
|
||||
* exponent. The 'real' value that can be re-derived from those two numbers is something
|
||||
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
|
||||
* the standard floating point implementations like e.g. IEEE-754.
|
||||
*
|
||||
* The difference with respect to quantized matrices is that for a quantized matrix, it is
|
||||
* assumed all values stored have more-or-less the same order of magnitude. This allows the
|
||||
* matrix to only store all the mantissas, while the exponents are shared; there is only one
|
||||
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
|
||||
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
|
||||
* integer arithmetic. It also nets us some memory savings - while normally a floating point
|
||||
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
|
||||
* memory requirements.
|
||||
*
|
||||
* While most of the details of handling the intricacies of the quantized matrixes are done
|
||||
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
|
||||
* specifically in places where addition/subtraction/division happens.
|
||||
*
|
||||
* The problem is that the routines do not know what the size of the resulting operation is. For
|
||||
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
|
||||
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
|
||||
* assume the mantissas needs to be scaled back, we may lose precision.
|
||||
*
|
||||
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
|
||||
* the argument is zero, the routine will be conservative, that is, increase the exponent of
|
||||
* the result to such an extent it's mathematically impossible a value in the result will exceed
|
||||
* the maximum value that can be stored. However, when this argument is larger than zero, the
|
||||
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
|
||||
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
|
||||
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
|
||||
* value possible. (Neural networks usually are okay with this happening for a limited amount
|
||||
* of matrix indices).
|
||||
*
|
||||
* For deciding on these shift values, it is recommended to start with a shift value of one, then
|
||||
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
|
||||
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
|
||||
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
|
||||
* else.
|
||||
*
|
||||
* For neural networks and other noise-tolerant applications, note that even when
|
||||
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
|
||||
* to slightly improved precision. Feel free to experiment.
|
||||
**/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Allocate a matrix
|
||||
*
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
* @return The matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
|
||||
|
||||
/**
|
||||
* @brief Convert a floating-point matrix to a quantized matrix
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* TODO: DESCRIBE THIS FUNCTION
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Convert a quantized matrix to a floating-point one.
|
||||
*
|
||||
* @param m Floating-point matrix to convert
|
||||
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
|
||||
* @Return The quantized version of the floating-point matrix
|
||||
**/
|
||||
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Free a quantized matrix
|
||||
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
|
||||
*
|
||||
* @param m Matrix to free
|
||||
*/
|
||||
void dl_matrixq_free(dl_matrix2dq_t *m);
|
||||
|
||||
/**
|
||||
* @brief Zero out the matrix
|
||||
* Sets all entries in the matrix to 0.
|
||||
*
|
||||
* @param m Matrix to zero
|
||||
*/
|
||||
void dl_matrixq_zero(dl_matrix2dq_t *m);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a fixed-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
|
||||
* much slower than dl_matrixq_dot .
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
* @param shift Shift ratio
|
||||
*/
|
||||
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
|
||||
*
|
||||
* Result is a floating-point matrix.
|
||||
*
|
||||
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
|
||||
* much slower than dl_matrixq_dot_matrix_out.
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
/**
|
||||
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
|
||||
*
|
||||
* @param a First multiplicand; float matrix
|
||||
* @param b Second multiplicand; quantized matrix
|
||||
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
|
||||
*/
|
||||
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
|
||||
*
|
||||
* @param a The matrix to print.
|
||||
*/
|
||||
void dl_printmatrixq(const dl_matrix2dq_t *a);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Added data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Generate a new matrix using a range of items from an existing matrix.
|
||||
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
|
||||
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
|
||||
* the data in the existing matrix that has been sliced.
|
||||
*
|
||||
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
|
||||
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
|
||||
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
|
||||
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
|
||||
* garbage after modifying the data in one of the slices.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting slice matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief select a range of items from an existing matrix and flatten them into one dimension.
|
||||
*
|
||||
* @Warning The results are flattened in row-major order.
|
||||
*
|
||||
* @param x X-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
|
||||
* @param w Width of the resulting matrix
|
||||
* @param h Height of the resulting matrix
|
||||
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
|
||||
* @return The resulting flatten matrix, or NULL if out of memory
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
|
||||
|
||||
/**
|
||||
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Subtracted data. Can be equal to a or b to overwrite that.
|
||||
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
|
||||
*/
|
||||
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
|
||||
|
||||
/**
|
||||
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
|
||||
*
|
||||
* @param a First multiplicand
|
||||
* @param b Second multiplicand
|
||||
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
|
||||
*/
|
||||
void dl_matrixq_mul(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res);
|
||||
|
||||
/**
|
||||
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @param res Divided data. Can be equal to a or b to overwrite that.
|
||||
*/
|
||||
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
|
||||
* rows and columns
|
||||
*
|
||||
* @param a First of the two matrices to compare
|
||||
* @param b Second of the two matrices to compare
|
||||
* @return true if the two matrices are shaped the same, false otherwise.
|
||||
*/
|
||||
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Concatenate the rows of two quantized matrices into a new matrix
|
||||
*
|
||||
* @param a First matrix
|
||||
* @param b Second matrix
|
||||
* @return A newly allocated quantized matrix with as values a|b
|
||||
*/
|
||||
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
|
||||
|
||||
/**
|
||||
* @brief Add a constant to every item of the quantized matrix
|
||||
*
|
||||
* @param subj Matrix to add the constant to
|
||||
* @param add The constant
|
||||
*/
|
||||
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
|
||||
|
||||
/**
|
||||
* @brief Check the sanity of a quantized matrix
|
||||
*
|
||||
* Due to the nature of quantized matrices, depending on the calculations a quantized
|
||||
* matrix is the result of and the shift values chosen in those calculations, a quantized
|
||||
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
|
||||
* most significant mantissa bits are unused, or because a fair amount of mantissas are
|
||||
* clipped. This function checks if this is the case and will report a message to stdout
|
||||
* if significant loss of precision is detected.
|
||||
*
|
||||
* @param m The quantized matrix to check
|
||||
* @param name A string to be displayed in the message if the sanity check fails
|
||||
* @return True if matrix is sane, false otherwise
|
||||
**/
|
||||
|
||||
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
|
||||
|
||||
/**
|
||||
* @brief re-adjust the exponent of the matrix to fit the mantissa better
|
||||
*
|
||||
* This function will shift up all the data in the mantissas so there are no
|
||||
* most-significant bits that are unused in all mantissas. It will also adjust
|
||||
* the exponent to keep the actua values in the matrix the same.
|
||||
*
|
||||
* Some operations done on a matrix, especially operations that re-use the
|
||||
* result of earlier operations done in the same way, can lead to the loss of
|
||||
* data because the exponent of the quantized matrix is never re-adjusted. You
|
||||
* can do that implicitely by calling this function.
|
||||
*
|
||||
* @param m The matrix to re-adjust
|
||||
**/
|
||||
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the floating-point value of a specific item from the quantized matrix
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @return Value in that position
|
||||
*/
|
||||
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
|
||||
|
||||
/**
|
||||
* @brief Set a specific item in the quantized matrix to the given
|
||||
* floating-point value
|
||||
*
|
||||
* @warning If the given value is more than the exponent in the quantized matrix
|
||||
* allows for, all mantissas in the matrix will be shifted down to make the value
|
||||
* 'fit'. If, however, the exponent is such that the value would result in a
|
||||
* quantized mantissa of 0, nothing is done.
|
||||
*
|
||||
* @param m Matrix to access
|
||||
* @param x Column address
|
||||
* @param y Row address
|
||||
* @param val Value to write to that position
|
||||
*/
|
||||
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
|
||||
|
||||
#endif
|
||||
BIN
lib/libc_speech_features.a
Normal file
BIN
lib/libc_speech_features.a
Normal file
Binary file not shown.
BIN
lib/libdl_lib.a
Normal file
BIN
lib/libdl_lib.a
Normal file
Binary file not shown.
BIN
lib/libmultinet.a
Normal file
BIN
lib/libmultinet.a
Normal file
Binary file not shown.
BIN
lib/libwakenet.a
Normal file
BIN
lib/libwakenet.a
Normal file
Binary file not shown.
79
main/Kconfig
Normal file
79
main/Kconfig
Normal file
@ -0,0 +1,79 @@
|
||||
menu "ESP Speech Recognition"
|
||||
|
||||
choice SR_MODEL_SEL
|
||||
prompt "Wake word engine"
|
||||
default SR_MODEL_WN5_QUANT
|
||||
help
|
||||
Select the keyword spotting model to be used.
|
||||
|
||||
config SR_MODEL_WN3_QUANT
|
||||
bool "WakeNet 3 (quantized)"
|
||||
|
||||
config SR_MODEL_WN4_QUANT
|
||||
bool "WakeNet 4 (quantized)"
|
||||
|
||||
config SR_MODEL_WN5_QUANT
|
||||
bool "WakeNet 5 (quantized)"
|
||||
|
||||
config SR_MODEL_WN6_QUANT
|
||||
bool "WakeNet 6 (quantized)"
|
||||
|
||||
endchoice
|
||||
|
||||
choice SR_WAKE_WORD_SEL
|
||||
prompt "Wake word name"
|
||||
default SR_WN5_HILEXIN
|
||||
help
|
||||
Select the wake word to be used.
|
||||
|
||||
config SR_WN3_HILEXIN
|
||||
bool "hilexin (WakeNet3)"
|
||||
depends on SR_MODEL_WN3_QUANT
|
||||
|
||||
config SR_WN4_HILEXIN
|
||||
bool "hilexin (WakeNet4)"
|
||||
depends on SR_MODEL_WN4_QUANT
|
||||
|
||||
config SR_WN5_HILEXIN
|
||||
bool "hilexin (WakeNet5)"
|
||||
depends on SR_MODEL_WN5_QUANT || SR_MODEL_WN5_FLOAT
|
||||
|
||||
config SR_WN5_CUSTOMIZED_WORD
|
||||
bool "customized word (WakeNet5)"
|
||||
depends on SR_MODEL_WN5_QUANT || SR_MODEL_WN5_FLOAT
|
||||
|
||||
config SR_WN6_HILEXIN
|
||||
bool "hilexin (WakeNet6)"
|
||||
depends on SR_MODEL_WN6_QUANT
|
||||
|
||||
config SR_WN6_CUSTOMIZED_WORD
|
||||
bool "customized word (WakeNet6)"
|
||||
depends on SR_MODEL_WN6_QUANT || SR_MODEL_WN6_FLOAT
|
||||
|
||||
endchoice
|
||||
|
||||
choice SR_MN_MODEL_SEL
|
||||
prompt "speech commands recognition model to use"
|
||||
default CONFIG_MN1_MODEL_QUANT
|
||||
help
|
||||
Select the model to be used.
|
||||
|
||||
config SR_MN1_MODEL_QUANT
|
||||
bool "MultiNet 1 (quantized)"
|
||||
|
||||
endchoice
|
||||
|
||||
choice SR_LANGUAGE_SEL
|
||||
prompt "langugae"
|
||||
default SR_MN1_CHINESE
|
||||
help
|
||||
Select the language to be used.
|
||||
|
||||
config SR_MN1_CHINESE_QUANT
|
||||
bool "chinese (MultiNet1)"
|
||||
depends on SR_MN1_MODEL_QUANT
|
||||
|
||||
endchoice
|
||||
|
||||
|
||||
endmenu
|
||||
6
main/component.mk
Normal file
6
main/component.mk
Normal file
@ -0,0 +1,6 @@
|
||||
#
|
||||
# "main" pseudo-component makefile.
|
||||
#
|
||||
# (Uses default behaviour of compiling all source files in directory, adding 'include' to include path.)
|
||||
|
||||
COMPONENT_DEPENDS := wake_words_engine
|
||||
4985
main/dakaidiandeng.h
Normal file
4985
main/dakaidiandeng.h
Normal file
File diff suppressed because it is too large
Load Diff
7643
main/hilexin.h
Normal file
7643
main/hilexin.h
Normal file
File diff suppressed because it is too large
Load Diff
19
main/main.c
Normal file
19
main/main.c
Normal file
@ -0,0 +1,19 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
#include "esp_system.h"
|
||||
#include "xtensa/core-macros.h"
|
||||
|
||||
#include "wakenet_test.h"
|
||||
#include "multinet_test.h"
|
||||
void app_main()
|
||||
{
|
||||
// test wakenet
|
||||
wakenet_test();
|
||||
vTaskDelay(3000 / portTICK_PERIOD_MS);
|
||||
|
||||
// //test multinet
|
||||
multinet_test();
|
||||
}
|
||||
90
main/multinet_test.c
Normal file
90
main/multinet_test.c
Normal file
@ -0,0 +1,90 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
|
||||
#include "esp_mn_iface.h"
|
||||
#include "esp_mn_models.h"
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "multinet_test.h"
|
||||
#include "dakaidiandeng.h"
|
||||
|
||||
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
|
||||
|
||||
void multinetTask(void *arg)
|
||||
{
|
||||
model_iface_data_t *model_data = arg;
|
||||
int frequency = multinet->get_samp_rate(model_data);
|
||||
int audio_chunksize = multinet->get_samp_chunksize(model_data);
|
||||
int16_t *buffer = malloc(audio_chunksize * sizeof(int16_t));
|
||||
assert(buffer);
|
||||
int chunks = 0;
|
||||
while (1) {
|
||||
if ((chunks+1)*audio_chunksize*sizeof(int16_t) <= sizeof(dakaidiandeng)) {
|
||||
memcpy(buffer, dakaidiandeng+chunks*audio_chunksize*sizeof(int16_t), audio_chunksize * sizeof(int16_t));
|
||||
} else {
|
||||
memset(buffer, 0, audio_chunksize*sizeof(int16_t));
|
||||
}
|
||||
int commend_id = multinet->detect(model_data, buffer);
|
||||
chunks++;
|
||||
if (chunks == 200 || commend_id > -1) {
|
||||
if (commend_id > -1) {
|
||||
printf("MN test successfully, Commands ID: %d.\n", commend_id);
|
||||
} else {
|
||||
printf("can not recognize any speech commands\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
printf("TEST2 FINISHED\n\n");
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
void add_speech_commands(esp_mn_iface_t *multinet, model_iface_data_t *model_data)
|
||||
{
|
||||
char *phrase_spelling[20];
|
||||
phrase_spelling[0] = "d,a,k,ai,k,ong,ti,ao";
|
||||
phrase_spelling[1] = "gu,an,b,i,k,ong,ti,ao";
|
||||
phrase_spelling[2] = "z,eng,d,a,f,eng,s,u";
|
||||
phrase_spelling[3] = "ji,an,xi,ao,f,eng,s,u";
|
||||
phrase_spelling[4] = "sh,eng,g,ao,y,i,d,u";
|
||||
phrase_spelling[5] = "ji,ang,d,i,y,i,d,u";
|
||||
phrase_spelling[6] = "zh,i,r,e,m,o,sh,i";
|
||||
phrase_spelling[7] = "zh,i,l,eng,m,o,sh,i";
|
||||
phrase_spelling[8] = "s,ong,f,eng,m,o,sh,i";
|
||||
phrase_spelling[9] = "j,ie,n,eng,m,o,sh,i";
|
||||
|
||||
phrase_spelling[10] = "gu,an,b,i,j,ie,n,eng,m,o,sh,i";
|
||||
phrase_spelling[11] = "ch,u,sh,i,m,o,sh,i";
|
||||
phrase_spelling[12] = "gu,an,b,i,ch,u,sh,i";
|
||||
phrase_spelling[13] = "d,a,k,ai,l,an,y,a";
|
||||
phrase_spelling[14] = "gu,an,b,i,l,an,y,a";
|
||||
phrase_spelling[15] = "b,o,f,ang,g,e,q,u";
|
||||
phrase_spelling[16] = "z,an,t,ing,b,o,f,ang";
|
||||
phrase_spelling[17] = "d,ing,sh,i,y,i,xi,ao,sh,i";
|
||||
phrase_spelling[18] = "d,a,k,ai,di,an,d,eng";
|
||||
phrase_spelling[19] = "gu,an,b,i,di,an,d,eng";
|
||||
|
||||
printf("start to add commands:\n");
|
||||
for (int i = 0; i < 20; i++) {
|
||||
printf("commend %d: %s\n",i+1, phrase_spelling[i]);
|
||||
multinet->add_speech_commands(model_data, i, phrase_spelling[i], NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void multinet_test()
|
||||
{
|
||||
int start_size = heap_caps_get_free_size(MALLOC_CAP_8BIT);
|
||||
printf("Start free RAM size: %d\n", start_size);
|
||||
|
||||
//Initialize multinet model
|
||||
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
|
||||
add_speech_commands(multinet, model_data);
|
||||
|
||||
//define_speech_commands(multinet, model_data);
|
||||
int audio_chunksize = multinet->get_samp_chunksize(model_data);
|
||||
printf("multinet RAM size: %d\n, current RAM size after multinet init: %d\n",
|
||||
start_size - heap_caps_get_free_size(MALLOC_CAP_8BIT), heap_caps_get_free_size(MALLOC_CAP_8BIT));
|
||||
|
||||
xTaskCreatePinnedToCore(&multinetTask, "multinet", 2 * 1024, (void*)model_data, 5, NULL, 0);
|
||||
}
|
||||
3
main/multinet_test.h
Normal file
3
main/multinet_test.h
Normal file
@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
void multinet_test();
|
||||
53
main/wakenet_test.c
Normal file
53
main/wakenet_test.c
Normal file
@ -0,0 +1,53 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/task.h"
|
||||
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "wakenet_test.h"
|
||||
#include "hilexin.h"
|
||||
|
||||
static const esp_wn_iface_t *wakenet = &WAKENET_MODEL;
|
||||
static const model_coeff_getter_t *model_coeff_getter = &WAKENET_COEFF;
|
||||
|
||||
void wakenetTask(void *arg)
|
||||
{
|
||||
model_iface_data_t *model_data = arg;
|
||||
int frequency = wakenet->get_samp_rate(model_data);
|
||||
int audio_chunksize = wakenet->get_samp_chunksize(model_data);
|
||||
int16_t *buffer = malloc(audio_chunksize * sizeof(int16_t));
|
||||
assert(buffer);
|
||||
|
||||
int chunks = 0;
|
||||
while (1) {
|
||||
if ((chunks + 1)*audio_chunksize * sizeof(int16_t) <= sizeof(hilexin)) {
|
||||
memcpy(buffer, hilexin + chunks * audio_chunksize * sizeof(int16_t), audio_chunksize * sizeof(int16_t));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
int r = wakenet->detect(model_data, buffer);
|
||||
if (r) {
|
||||
int ms = (chunks * audio_chunksize * 1000) / frequency;
|
||||
printf("WN test successfully, %.2f: Neural network detection triggered output %d.\n", (float)ms / 1000.0, r);
|
||||
}
|
||||
chunks++;
|
||||
}
|
||||
printf("TEST FINISHED\n\n");
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
void wakenet_test()
|
||||
{
|
||||
int start_size = heap_caps_get_free_size(MALLOC_CAP_8BIT);
|
||||
printf("Start free RAM size: %d\n", start_size);
|
||||
|
||||
//Initialize wakenet model
|
||||
model_iface_data_t *model_data = wakenet->create(model_coeff_getter, DET_MODE_90);
|
||||
int audio_chunksize = wakenet->get_samp_chunksize(model_data);
|
||||
printf("WakeNet RAM size: %d\nRAM size after WakeNet init: %d\n",
|
||||
start_size - heap_caps_get_free_size(MALLOC_CAP_8BIT), heap_caps_get_free_size(MALLOC_CAP_8BIT));
|
||||
|
||||
xTaskCreatePinnedToCore(&wakenetTask, "wakenet", 2 * 1024, (void*)model_data, 5, NULL, 1);
|
||||
}
|
||||
3
main/wakenet_test.h
Normal file
3
main/wakenet_test.h
Normal file
@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
void wakenet_test();
|
||||
4
partitions.csv
Normal file
4
partitions.csv
Normal file
@ -0,0 +1,4 @@
|
||||
# Espressif ESP32 Partition Table
|
||||
# Name, Type, SubType, Offset, Size
|
||||
factory, app, factory, 0x010000, 3840k
|
||||
nvs, data, nvs, 0x3D0000, 16K
|
||||
|
683
sdkconfig
Normal file
683
sdkconfig
Normal file
@ -0,0 +1,683 @@
|
||||
#
|
||||
# Automatically generated file; DO NOT EDIT.
|
||||
# Espressif IoT Development Framework Configuration
|
||||
#
|
||||
|
||||
#
|
||||
# SDK tool configuration
|
||||
#
|
||||
CONFIG_TOOLPREFIX="xtensa-esp32-elf-"
|
||||
CONFIG_PYTHON="python"
|
||||
CONFIG_MAKE_WARN_UNDEFINED_VARIABLES=y
|
||||
|
||||
#
|
||||
# Bootloader config
|
||||
#
|
||||
CONFIG_LOG_BOOTLOADER_LEVEL_NONE=
|
||||
CONFIG_LOG_BOOTLOADER_LEVEL_ERROR=
|
||||
CONFIG_LOG_BOOTLOADER_LEVEL_WARN=
|
||||
CONFIG_LOG_BOOTLOADER_LEVEL_INFO=y
|
||||
CONFIG_LOG_BOOTLOADER_LEVEL_DEBUG=
|
||||
CONFIG_LOG_BOOTLOADER_LEVEL_VERBOSE=
|
||||
CONFIG_LOG_BOOTLOADER_LEVEL=3
|
||||
CONFIG_BOOTLOADER_VDDSDIO_BOOST_1_9V=y
|
||||
CONFIG_BOOTLOADER_FACTORY_RESET=
|
||||
CONFIG_BOOTLOADER_APP_TEST=
|
||||
CONFIG_BOOTLOADER_WDT_ENABLE=y
|
||||
CONFIG_BOOTLOADER_WDT_DISABLE_IN_USER_CODE=
|
||||
CONFIG_BOOTLOADER_WDT_TIME_MS=9000
|
||||
|
||||
#
|
||||
# Security features
|
||||
#
|
||||
CONFIG_SECURE_SIGNED_APPS_NO_SECURE_BOOT=
|
||||
CONFIG_SECURE_BOOT_ENABLED=
|
||||
CONFIG_FLASH_ENCRYPTION_ENABLED=
|
||||
|
||||
#
|
||||
# Serial flasher config
|
||||
#
|
||||
CONFIG_ESPTOOLPY_PORT="/dev/ttyUSB0"
|
||||
CONFIG_ESPTOOLPY_BAUD_115200B=y
|
||||
CONFIG_ESPTOOLPY_BAUD_230400B=
|
||||
CONFIG_ESPTOOLPY_BAUD_921600B=
|
||||
CONFIG_ESPTOOLPY_BAUD_2MB=
|
||||
CONFIG_ESPTOOLPY_BAUD_OTHER=
|
||||
CONFIG_ESPTOOLPY_BAUD_OTHER_VAL=115200
|
||||
CONFIG_ESPTOOLPY_BAUD=115200
|
||||
CONFIG_ESPTOOLPY_COMPRESSED=y
|
||||
CONFIG_FLASHMODE_QIO=
|
||||
CONFIG_FLASHMODE_QOUT=
|
||||
CONFIG_FLASHMODE_DIO=y
|
||||
CONFIG_FLASHMODE_DOUT=
|
||||
CONFIG_ESPTOOLPY_FLASHMODE="dio"
|
||||
CONFIG_ESPTOOLPY_FLASHFREQ_80M=y
|
||||
CONFIG_ESPTOOLPY_FLASHFREQ_40M=
|
||||
CONFIG_ESPTOOLPY_FLASHFREQ_26M=
|
||||
CONFIG_ESPTOOLPY_FLASHFREQ_20M=
|
||||
CONFIG_ESPTOOLPY_FLASHFREQ="80m"
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_1MB=
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_2MB=
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_8MB=
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_16MB=
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE="4MB"
|
||||
CONFIG_ESPTOOLPY_FLASHSIZE_DETECT=y
|
||||
CONFIG_ESPTOOLPY_BEFORE_RESET=y
|
||||
CONFIG_ESPTOOLPY_BEFORE_NORESET=
|
||||
CONFIG_ESPTOOLPY_BEFORE="default_reset"
|
||||
CONFIG_ESPTOOLPY_AFTER_RESET=y
|
||||
CONFIG_ESPTOOLPY_AFTER_NORESET=
|
||||
CONFIG_ESPTOOLPY_AFTER="hard_reset"
|
||||
CONFIG_MONITOR_BAUD_9600B=
|
||||
CONFIG_MONITOR_BAUD_57600B=
|
||||
CONFIG_MONITOR_BAUD_115200B=y
|
||||
CONFIG_MONITOR_BAUD_230400B=
|
||||
CONFIG_MONITOR_BAUD_921600B=
|
||||
CONFIG_MONITOR_BAUD_2MB=
|
||||
CONFIG_MONITOR_BAUD_OTHER=
|
||||
CONFIG_MONITOR_BAUD_OTHER_VAL=115200
|
||||
CONFIG_MONITOR_BAUD=115200
|
||||
|
||||
#
|
||||
# Partition Table
|
||||
#
|
||||
CONFIG_PARTITION_TABLE_SINGLE_APP=
|
||||
CONFIG_PARTITION_TABLE_TWO_OTA=
|
||||
CONFIG_PARTITION_TABLE_CUSTOM=y
|
||||
CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions.csv"
|
||||
CONFIG_PARTITION_TABLE_FILENAME="partitions.csv"
|
||||
CONFIG_PARTITION_TABLE_OFFSET=0x8000
|
||||
CONFIG_PARTITION_TABLE_MD5=y
|
||||
|
||||
#
|
||||
# Compiler options
|
||||
#
|
||||
CONFIG_OPTIMIZATION_LEVEL_DEBUG=y
|
||||
CONFIG_OPTIMIZATION_LEVEL_RELEASE=
|
||||
CONFIG_OPTIMIZATION_ASSERTIONS_ENABLED=y
|
||||
CONFIG_OPTIMIZATION_ASSERTIONS_SILENT=
|
||||
CONFIG_OPTIMIZATION_ASSERTIONS_DISABLED=
|
||||
CONFIG_CXX_EXCEPTIONS=
|
||||
CONFIG_STACK_CHECK_NONE=y
|
||||
CONFIG_STACK_CHECK_NORM=
|
||||
CONFIG_STACK_CHECK_STRONG=
|
||||
CONFIG_STACK_CHECK_ALL=
|
||||
CONFIG_STACK_CHECK=
|
||||
CONFIG_WARN_WRITE_STRINGS=
|
||||
CONFIG_DISABLE_GCC8_WARNINGS=
|
||||
|
||||
#
|
||||
# Component config
|
||||
#
|
||||
|
||||
#
|
||||
# Application Level Tracing
|
||||
#
|
||||
CONFIG_ESP32_APPTRACE_DEST_TRAX=
|
||||
CONFIG_ESP32_APPTRACE_DEST_NONE=y
|
||||
CONFIG_ESP32_APPTRACE_ENABLE=
|
||||
CONFIG_ESP32_APPTRACE_LOCK_ENABLE=y
|
||||
CONFIG_AWS_IOT_SDK=
|
||||
|
||||
#
|
||||
# Bluetooth
|
||||
#
|
||||
CONFIG_BT_ENABLED=
|
||||
CONFIG_BTDM_CONTROLLER_BLE_MAX_CONN_EFF=0
|
||||
CONFIG_BTDM_CONTROLLER_BR_EDR_MAX_ACL_CONN_EFF=0
|
||||
CONFIG_BTDM_CONTROLLER_BR_EDR_MAX_SYNC_CONN_EFF=0
|
||||
CONFIG_BTDM_CONTROLLER_PINNED_TO_CORE=0
|
||||
CONFIG_BT_RESERVE_DRAM=0
|
||||
|
||||
#
|
||||
# Driver configurations
|
||||
#
|
||||
|
||||
#
|
||||
# ADC configuration
|
||||
#
|
||||
CONFIG_ADC_FORCE_XPD_FSM=
|
||||
CONFIG_ADC2_DISABLE_DAC=y
|
||||
|
||||
#
|
||||
# SPI configuration
|
||||
#
|
||||
CONFIG_SPI_MASTER_IN_IRAM=
|
||||
CONFIG_SPI_MASTER_ISR_IN_IRAM=y
|
||||
CONFIG_SPI_SLAVE_IN_IRAM=
|
||||
CONFIG_SPI_SLAVE_ISR_IN_IRAM=y
|
||||
|
||||
#
|
||||
# ESP32-specific
|
||||
#
|
||||
CONFIG_ESP32_DEFAULT_CPU_FREQ_80=
|
||||
CONFIG_ESP32_DEFAULT_CPU_FREQ_160=
|
||||
CONFIG_ESP32_DEFAULT_CPU_FREQ_240=y
|
||||
CONFIG_ESP32_DEFAULT_CPU_FREQ_MHZ=240
|
||||
CONFIG_SPIRAM_SUPPORT=y
|
||||
|
||||
#
|
||||
# SPI RAM config
|
||||
#
|
||||
CONFIG_SPIRAM_BOOT_INIT=y
|
||||
CONFIG_SPIRAM_IGNORE_NOTFOUND=
|
||||
CONFIG_SPIRAM_USE_MEMMAP=
|
||||
CONFIG_SPIRAM_USE_CAPS_ALLOC=
|
||||
CONFIG_SPIRAM_USE_MALLOC=y
|
||||
CONFIG_SPIRAM_TYPE_AUTO=y
|
||||
CONFIG_SPIRAM_TYPE_ESPPSRAM32=
|
||||
CONFIG_SPIRAM_TYPE_ESPPSRAM64=
|
||||
CONFIG_SPIRAM_SIZE=-1
|
||||
CONFIG_SPIRAM_SPEED_40M=
|
||||
CONFIG_SPIRAM_SPEED_80M=y
|
||||
CONFIG_SPIRAM_MEMTEST=y
|
||||
CONFIG_SPIRAM_CACHE_WORKAROUND=y
|
||||
CONFIG_SPIRAM_BANKSWITCH_ENABLE=y
|
||||
CONFIG_SPIRAM_BANKSWITCH_RESERVE=8
|
||||
CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=16384
|
||||
CONFIG_WIFI_LWIP_ALLOCATION_FROM_SPIRAM_FIRST=
|
||||
CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=32768
|
||||
CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY=
|
||||
CONFIG_SPIRAM_ALLOW_BSS_SEG_EXTERNAL_MEMORY=
|
||||
CONFIG_SPIRAM_OCCUPY_HSPI_HOST=
|
||||
CONFIG_SPIRAM_OCCUPY_VSPI_HOST=y
|
||||
CONFIG_PICO_PSRAM_CS_IO=10
|
||||
CONFIG_MEMMAP_TRACEMEM=
|
||||
CONFIG_MEMMAP_TRACEMEM_TWOBANKS=
|
||||
CONFIG_ESP32_TRAX=
|
||||
CONFIG_TRACEMEM_RESERVE_DRAM=0x0
|
||||
CONFIG_ESP32_ENABLE_COREDUMP_TO_FLASH=
|
||||
CONFIG_ESP32_ENABLE_COREDUMP_TO_UART=
|
||||
CONFIG_ESP32_ENABLE_COREDUMP_TO_NONE=y
|
||||
CONFIG_ESP32_ENABLE_COREDUMP=
|
||||
CONFIG_TWO_UNIVERSAL_MAC_ADDRESS=
|
||||
CONFIG_FOUR_UNIVERSAL_MAC_ADDRESS=y
|
||||
CONFIG_NUMBER_OF_UNIVERSAL_MAC_ADDRESS=4
|
||||
CONFIG_SYSTEM_EVENT_QUEUE_SIZE=32
|
||||
CONFIG_SYSTEM_EVENT_TASK_STACK_SIZE=2304
|
||||
CONFIG_MAIN_TASK_STACK_SIZE=3584
|
||||
CONFIG_IPC_TASK_STACK_SIZE=1024
|
||||
CONFIG_TIMER_TASK_STACK_SIZE=3584
|
||||
CONFIG_NEWLIB_STDOUT_LINE_ENDING_CRLF=y
|
||||
CONFIG_NEWLIB_STDOUT_LINE_ENDING_LF=
|
||||
CONFIG_NEWLIB_STDOUT_LINE_ENDING_CR=
|
||||
CONFIG_NEWLIB_STDIN_LINE_ENDING_CRLF=
|
||||
CONFIG_NEWLIB_STDIN_LINE_ENDING_LF=
|
||||
CONFIG_NEWLIB_STDIN_LINE_ENDING_CR=y
|
||||
CONFIG_NEWLIB_NANO_FORMAT=
|
||||
CONFIG_CONSOLE_UART_DEFAULT=y
|
||||
CONFIG_CONSOLE_UART_CUSTOM=
|
||||
CONFIG_CONSOLE_UART_NONE=
|
||||
CONFIG_CONSOLE_UART_NUM=0
|
||||
CONFIG_CONSOLE_UART_BAUDRATE=115200
|
||||
CONFIG_ULP_COPROC_ENABLED=
|
||||
CONFIG_ULP_COPROC_RESERVE_MEM=0
|
||||
CONFIG_ESP32_PANIC_PRINT_HALT=
|
||||
CONFIG_ESP32_PANIC_PRINT_REBOOT=y
|
||||
CONFIG_ESP32_PANIC_SILENT_REBOOT=
|
||||
CONFIG_ESP32_PANIC_GDBSTUB=
|
||||
CONFIG_ESP32_DEBUG_OCDAWARE=y
|
||||
CONFIG_ESP32_DEBUG_STUBS_ENABLE=y
|
||||
CONFIG_INT_WDT=
|
||||
CONFIG_TASK_WDT=
|
||||
CONFIG_BROWNOUT_DET=y
|
||||
CONFIG_BROWNOUT_DET_LVL_SEL_0=y
|
||||
CONFIG_BROWNOUT_DET_LVL_SEL_1=
|
||||
CONFIG_BROWNOUT_DET_LVL_SEL_2=
|
||||
CONFIG_BROWNOUT_DET_LVL_SEL_3=
|
||||
CONFIG_BROWNOUT_DET_LVL_SEL_4=
|
||||
CONFIG_BROWNOUT_DET_LVL_SEL_5=
|
||||
CONFIG_BROWNOUT_DET_LVL_SEL_6=
|
||||
CONFIG_BROWNOUT_DET_LVL_SEL_7=
|
||||
CONFIG_BROWNOUT_DET_LVL=0
|
||||
CONFIG_REDUCE_PHY_TX_POWER=y
|
||||
CONFIG_ESP32_TIME_SYSCALL_USE_RTC_FRC1=y
|
||||
CONFIG_ESP32_TIME_SYSCALL_USE_RTC=
|
||||
CONFIG_ESP32_TIME_SYSCALL_USE_FRC1=
|
||||
CONFIG_ESP32_TIME_SYSCALL_USE_NONE=
|
||||
CONFIG_ESP32_RTC_CLOCK_SOURCE_INTERNAL_RC=y
|
||||
CONFIG_ESP32_RTC_CLOCK_SOURCE_EXTERNAL_CRYSTAL=
|
||||
CONFIG_ESP32_RTC_CLOCK_SOURCE_EXTERNAL_OSC=
|
||||
CONFIG_ESP32_RTC_CLOCK_SOURCE_INTERNAL_8MD256=
|
||||
CONFIG_ESP32_RTC_CLK_CAL_CYCLES=1024
|
||||
CONFIG_ESP32_DEEP_SLEEP_WAKEUP_DELAY=2000
|
||||
CONFIG_ESP32_XTAL_FREQ_40=y
|
||||
CONFIG_ESP32_XTAL_FREQ_26=
|
||||
CONFIG_ESP32_XTAL_FREQ_AUTO=
|
||||
CONFIG_ESP32_XTAL_FREQ=40
|
||||
CONFIG_DISABLE_BASIC_ROM_CONSOLE=
|
||||
CONFIG_NO_BLOBS=
|
||||
CONFIG_ESP_TIMER_PROFILING=
|
||||
CONFIG_COMPATIBLE_PRE_V2_1_BOOTLOADERS=
|
||||
CONFIG_ESP_ERR_TO_NAME_LOOKUP=y
|
||||
|
||||
#
|
||||
# Wi-Fi
|
||||
#
|
||||
CONFIG_ESP32_WIFI_STATIC_RX_BUFFER_NUM=10
|
||||
CONFIG_ESP32_WIFI_DYNAMIC_RX_BUFFER_NUM=32
|
||||
CONFIG_ESP32_WIFI_STATIC_TX_BUFFER=y
|
||||
CONFIG_ESP32_WIFI_TX_BUFFER_TYPE=0
|
||||
CONFIG_ESP32_WIFI_STATIC_TX_BUFFER_NUM=16
|
||||
CONFIG_ESP32_WIFI_CSI_ENABLED=
|
||||
CONFIG_ESP32_WIFI_AMPDU_TX_ENABLED=y
|
||||
CONFIG_ESP32_WIFI_TX_BA_WIN=6
|
||||
CONFIG_ESP32_WIFI_AMPDU_RX_ENABLED=y
|
||||
CONFIG_ESP32_WIFI_RX_BA_WIN=6
|
||||
CONFIG_ESP32_WIFI_NVS_ENABLED=y
|
||||
CONFIG_ESP32_WIFI_TASK_PINNED_TO_CORE_0=y
|
||||
CONFIG_ESP32_WIFI_TASK_PINNED_TO_CORE_1=
|
||||
CONFIG_ESP32_WIFI_SOFTAP_BEACON_MAX_LEN=752
|
||||
|
||||
#
|
||||
# PHY
|
||||
#
|
||||
CONFIG_ESP32_PHY_CALIBRATION_AND_DATA_STORAGE=y
|
||||
CONFIG_ESP32_PHY_INIT_DATA_IN_PARTITION=
|
||||
CONFIG_ESP32_PHY_MAX_WIFI_TX_POWER=20
|
||||
CONFIG_ESP32_PHY_MAX_TX_POWER=20
|
||||
|
||||
#
|
||||
# Power Management
|
||||
#
|
||||
CONFIG_PM_ENABLE=
|
||||
|
||||
#
|
||||
# ADC-Calibration
|
||||
#
|
||||
CONFIG_ADC_CAL_EFUSE_TP_ENABLE=y
|
||||
CONFIG_ADC_CAL_EFUSE_VREF_ENABLE=y
|
||||
CONFIG_ADC_CAL_LUT_ENABLE=y
|
||||
|
||||
#
|
||||
# Event Loop Library
|
||||
#
|
||||
CONFIG_EVENT_LOOP_PROFILING=
|
||||
|
||||
#
|
||||
# ESP HTTP client
|
||||
#
|
||||
CONFIG_ESP_HTTP_CLIENT_ENABLE_HTTPS=y
|
||||
|
||||
#
|
||||
# HTTP Server
|
||||
#
|
||||
CONFIG_HTTPD_MAX_REQ_HDR_LEN=512
|
||||
CONFIG_HTTPD_MAX_URI_LEN=512
|
||||
|
||||
#
|
||||
# Ethernet
|
||||
#
|
||||
CONFIG_DMA_RX_BUF_NUM=10
|
||||
CONFIG_DMA_TX_BUF_NUM=10
|
||||
CONFIG_EMAC_L2_TO_L3_RX_BUF_MODE=y
|
||||
CONFIG_EMAC_CHECK_LINK_PERIOD_MS=2000
|
||||
CONFIG_EMAC_TASK_PRIORITY=20
|
||||
CONFIG_EMAC_TASK_STACK_SIZE=3072
|
||||
|
||||
#
|
||||
# FAT Filesystem support
|
||||
#
|
||||
CONFIG_FATFS_CODEPAGE_DYNAMIC=
|
||||
CONFIG_FATFS_CODEPAGE_437=y
|
||||
CONFIG_FATFS_CODEPAGE_720=
|
||||
CONFIG_FATFS_CODEPAGE_737=
|
||||
CONFIG_FATFS_CODEPAGE_771=
|
||||
CONFIG_FATFS_CODEPAGE_775=
|
||||
CONFIG_FATFS_CODEPAGE_850=
|
||||
CONFIG_FATFS_CODEPAGE_852=
|
||||
CONFIG_FATFS_CODEPAGE_855=
|
||||
CONFIG_FATFS_CODEPAGE_857=
|
||||
CONFIG_FATFS_CODEPAGE_860=
|
||||
CONFIG_FATFS_CODEPAGE_861=
|
||||
CONFIG_FATFS_CODEPAGE_862=
|
||||
CONFIG_FATFS_CODEPAGE_863=
|
||||
CONFIG_FATFS_CODEPAGE_864=
|
||||
CONFIG_FATFS_CODEPAGE_865=
|
||||
CONFIG_FATFS_CODEPAGE_866=
|
||||
CONFIG_FATFS_CODEPAGE_869=
|
||||
CONFIG_FATFS_CODEPAGE_932=
|
||||
CONFIG_FATFS_CODEPAGE_936=
|
||||
CONFIG_FATFS_CODEPAGE_949=
|
||||
CONFIG_FATFS_CODEPAGE_950=
|
||||
CONFIG_FATFS_CODEPAGE=437
|
||||
CONFIG_FATFS_LFN_NONE=y
|
||||
CONFIG_FATFS_LFN_HEAP=
|
||||
CONFIG_FATFS_LFN_STACK=
|
||||
CONFIG_FATFS_FS_LOCK=0
|
||||
CONFIG_FATFS_TIMEOUT_MS=10000
|
||||
CONFIG_FATFS_PER_FILE_CACHE=y
|
||||
|
||||
#
|
||||
# Modbus configuration
|
||||
#
|
||||
CONFIG_MB_QUEUE_LENGTH=20
|
||||
CONFIG_MB_SERIAL_TASK_STACK_SIZE=2048
|
||||
CONFIG_MB_SERIAL_BUF_SIZE=256
|
||||
CONFIG_MB_SERIAL_TASK_PRIO=10
|
||||
CONFIG_MB_CONTROLLER_SLAVE_ID_SUPPORT=
|
||||
CONFIG_MB_CONTROLLER_NOTIFY_TIMEOUT=20
|
||||
CONFIG_MB_CONTROLLER_NOTIFY_QUEUE_SIZE=20
|
||||
CONFIG_MB_CONTROLLER_STACK_SIZE=4096
|
||||
CONFIG_MB_EVENT_QUEUE_TIMEOUT=20
|
||||
CONFIG_MB_TIMER_PORT_ENABLED=y
|
||||
CONFIG_MB_TIMER_GROUP=0
|
||||
CONFIG_MB_TIMER_INDEX=0
|
||||
|
||||
#
|
||||
# FreeRTOS
|
||||
#
|
||||
CONFIG_FREERTOS_UNICORE=
|
||||
CONFIG_FREERTOS_NO_AFFINITY=0x7FFFFFFF
|
||||
CONFIG_FREERTOS_CORETIMER_0=y
|
||||
CONFIG_FREERTOS_CORETIMER_1=
|
||||
CONFIG_FREERTOS_HZ=100
|
||||
CONFIG_FREERTOS_ASSERT_ON_UNTESTED_FUNCTION=y
|
||||
CONFIG_FREERTOS_CHECK_STACKOVERFLOW_NONE=
|
||||
CONFIG_FREERTOS_CHECK_STACKOVERFLOW_PTRVAL=
|
||||
CONFIG_FREERTOS_CHECK_STACKOVERFLOW_CANARY=y
|
||||
CONFIG_FREERTOS_WATCHPOINT_END_OF_STACK=
|
||||
CONFIG_FREERTOS_INTERRUPT_BACKTRACE=y
|
||||
CONFIG_FREERTOS_THREAD_LOCAL_STORAGE_POINTERS=1
|
||||
CONFIG_FREERTOS_ASSERT_FAIL_ABORT=y
|
||||
CONFIG_FREERTOS_ASSERT_FAIL_PRINT_CONTINUE=
|
||||
CONFIG_FREERTOS_ASSERT_DISABLE=
|
||||
CONFIG_FREERTOS_IDLE_TASK_STACKSIZE=1536
|
||||
CONFIG_FREERTOS_ISR_STACKSIZE=1536
|
||||
CONFIG_FREERTOS_LEGACY_HOOKS=
|
||||
CONFIG_FREERTOS_MAX_TASK_NAME_LEN=16
|
||||
CONFIG_SUPPORT_STATIC_ALLOCATION=y
|
||||
CONFIG_ENABLE_STATIC_TASK_CLEAN_UP_HOOK=
|
||||
CONFIG_TIMER_TASK_PRIORITY=1
|
||||
CONFIG_TIMER_TASK_STACK_DEPTH=2048
|
||||
CONFIG_TIMER_QUEUE_LENGTH=10
|
||||
CONFIG_FREERTOS_QUEUE_REGISTRY_SIZE=0
|
||||
CONFIG_FREERTOS_USE_TRACE_FACILITY=
|
||||
CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS=
|
||||
CONFIG_FREERTOS_DEBUG_INTERNALS=
|
||||
CONFIG_FREERTOS_TASK_FUNCTION_WRAPPER=y
|
||||
|
||||
#
|
||||
# Heap memory debugging
|
||||
#
|
||||
CONFIG_HEAP_POISONING_DISABLED=y
|
||||
CONFIG_HEAP_POISONING_LIGHT=
|
||||
CONFIG_HEAP_POISONING_COMPREHENSIVE=
|
||||
CONFIG_HEAP_TRACING=
|
||||
|
||||
#
|
||||
# libsodium
|
||||
#
|
||||
CONFIG_LIBSODIUM_USE_MBEDTLS_SHA=y
|
||||
|
||||
#
|
||||
# Log output
|
||||
#
|
||||
CONFIG_LOG_DEFAULT_LEVEL_NONE=
|
||||
CONFIG_LOG_DEFAULT_LEVEL_ERROR=
|
||||
CONFIG_LOG_DEFAULT_LEVEL_WARN=
|
||||
CONFIG_LOG_DEFAULT_LEVEL_INFO=y
|
||||
CONFIG_LOG_DEFAULT_LEVEL_DEBUG=
|
||||
CONFIG_LOG_DEFAULT_LEVEL_VERBOSE=
|
||||
CONFIG_LOG_DEFAULT_LEVEL=3
|
||||
CONFIG_LOG_COLORS=y
|
||||
|
||||
#
|
||||
# LWIP
|
||||
#
|
||||
CONFIG_L2_TO_L3_COPY=
|
||||
CONFIG_LWIP_IRAM_OPTIMIZATION=
|
||||
CONFIG_LWIP_MAX_SOCKETS=10
|
||||
CONFIG_USE_ONLY_LWIP_SELECT=
|
||||
CONFIG_LWIP_SO_REUSE=y
|
||||
CONFIG_LWIP_SO_REUSE_RXTOALL=y
|
||||
CONFIG_LWIP_SO_RCVBUF=
|
||||
CONFIG_LWIP_DHCP_MAX_NTP_SERVERS=1
|
||||
CONFIG_LWIP_IP_FRAG=
|
||||
CONFIG_LWIP_IP_REASSEMBLY=
|
||||
CONFIG_LWIP_STATS=
|
||||
CONFIG_LWIP_ETHARP_TRUST_IP_MAC=
|
||||
CONFIG_ESP_GRATUITOUS_ARP=y
|
||||
CONFIG_GARP_TMR_INTERVAL=60
|
||||
CONFIG_TCPIP_RECVMBOX_SIZE=32
|
||||
CONFIG_LWIP_DHCP_DOES_ARP_CHECK=y
|
||||
CONFIG_LWIP_DHCP_RESTORE_LAST_IP=
|
||||
|
||||
#
|
||||
# DHCP server
|
||||
#
|
||||
CONFIG_LWIP_DHCPS_LEASE_UNIT=60
|
||||
CONFIG_LWIP_DHCPS_MAX_STATION_NUM=8
|
||||
CONFIG_LWIP_AUTOIP=
|
||||
CONFIG_LWIP_NETIF_LOOPBACK=y
|
||||
CONFIG_LWIP_LOOPBACK_MAX_PBUFS=8
|
||||
|
||||
#
|
||||
# TCP
|
||||
#
|
||||
CONFIG_LWIP_MAX_ACTIVE_TCP=16
|
||||
CONFIG_LWIP_MAX_LISTENING_TCP=16
|
||||
CONFIG_TCP_MAXRTX=12
|
||||
CONFIG_TCP_SYNMAXRTX=6
|
||||
CONFIG_TCP_MSS=1436
|
||||
CONFIG_TCP_MSL=60000
|
||||
CONFIG_TCP_SND_BUF_DEFAULT=5744
|
||||
CONFIG_TCP_WND_DEFAULT=5744
|
||||
CONFIG_TCP_RECVMBOX_SIZE=6
|
||||
CONFIG_TCP_QUEUE_OOSEQ=y
|
||||
CONFIG_ESP_TCP_KEEP_CONNECTION_WHEN_IP_CHANGES=
|
||||
CONFIG_TCP_OVERSIZE_MSS=y
|
||||
CONFIG_TCP_OVERSIZE_QUARTER_MSS=
|
||||
CONFIG_TCP_OVERSIZE_DISABLE=
|
||||
|
||||
#
|
||||
# UDP
|
||||
#
|
||||
CONFIG_LWIP_MAX_UDP_PCBS=16
|
||||
CONFIG_UDP_RECVMBOX_SIZE=6
|
||||
CONFIG_TCPIP_TASK_STACK_SIZE=3072
|
||||
CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
|
||||
CONFIG_TCPIP_TASK_AFFINITY_CPU0=
|
||||
CONFIG_TCPIP_TASK_AFFINITY_CPU1=
|
||||
CONFIG_TCPIP_TASK_AFFINITY=0x7FFFFFFF
|
||||
CONFIG_PPP_SUPPORT=
|
||||
|
||||
#
|
||||
# ICMP
|
||||
#
|
||||
CONFIG_LWIP_MULTICAST_PING=
|
||||
CONFIG_LWIP_BROADCAST_PING=
|
||||
|
||||
#
|
||||
# LWIP RAW API
|
||||
#
|
||||
CONFIG_LWIP_MAX_RAW_PCBS=16
|
||||
|
||||
#
|
||||
# ESP Speech Recognition
|
||||
#
|
||||
CONFIG_SR_MODEL_WN3_QUANT=
|
||||
CONFIG_SR_MODEL_WN4_QUANT=
|
||||
CONFIG_SR_MODEL_WN5_QUANT=
|
||||
CONFIG_SR_MODEL_WN6_QUANT=y
|
||||
CONFIG_SR_WN6_HILEXIN=y
|
||||
CONFIG_SR_WN6_CUSTOMIZED_WORD=
|
||||
CONFIG_SR_MN1_MODEL_QUANT=y
|
||||
CONFIG_SR_MN1_CHINESE_QUANT=y
|
||||
|
||||
#
|
||||
# mbedTLS
|
||||
#
|
||||
CONFIG_MBEDTLS_INTERNAL_MEM_ALLOC=y
|
||||
CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC=
|
||||
CONFIG_MBEDTLS_DEFAULT_MEM_ALLOC=
|
||||
CONFIG_MBEDTLS_CUSTOM_MEM_ALLOC=
|
||||
CONFIG_MBEDTLS_SSL_MAX_CONTENT_LEN=16384
|
||||
CONFIG_MBEDTLS_ASYMMETRIC_CONTENT_LEN=
|
||||
CONFIG_MBEDTLS_DEBUG=
|
||||
CONFIG_MBEDTLS_HARDWARE_AES=y
|
||||
CONFIG_MBEDTLS_HARDWARE_MPI=
|
||||
CONFIG_MBEDTLS_HARDWARE_SHA=
|
||||
CONFIG_MBEDTLS_HAVE_TIME=y
|
||||
CONFIG_MBEDTLS_HAVE_TIME_DATE=
|
||||
CONFIG_MBEDTLS_TLS_SERVER_AND_CLIENT=y
|
||||
CONFIG_MBEDTLS_TLS_SERVER_ONLY=
|
||||
CONFIG_MBEDTLS_TLS_CLIENT_ONLY=
|
||||
CONFIG_MBEDTLS_TLS_DISABLED=
|
||||
CONFIG_MBEDTLS_TLS_SERVER=y
|
||||
CONFIG_MBEDTLS_TLS_CLIENT=y
|
||||
CONFIG_MBEDTLS_TLS_ENABLED=y
|
||||
|
||||
#
|
||||
# TLS Key Exchange Methods
|
||||
#
|
||||
CONFIG_MBEDTLS_PSK_MODES=
|
||||
CONFIG_MBEDTLS_KEY_EXCHANGE_RSA=y
|
||||
CONFIG_MBEDTLS_KEY_EXCHANGE_DHE_RSA=y
|
||||
CONFIG_MBEDTLS_KEY_EXCHANGE_ELLIPTIC_CURVE=y
|
||||
CONFIG_MBEDTLS_KEY_EXCHANGE_ECDHE_RSA=y
|
||||
CONFIG_MBEDTLS_KEY_EXCHANGE_ECDHE_ECDSA=y
|
||||
CONFIG_MBEDTLS_KEY_EXCHANGE_ECDH_ECDSA=y
|
||||
CONFIG_MBEDTLS_KEY_EXCHANGE_ECDH_RSA=y
|
||||
CONFIG_MBEDTLS_SSL_RENEGOTIATION=y
|
||||
CONFIG_MBEDTLS_SSL_PROTO_SSL3=
|
||||
CONFIG_MBEDTLS_SSL_PROTO_TLS1=y
|
||||
CONFIG_MBEDTLS_SSL_PROTO_TLS1_1=y
|
||||
CONFIG_MBEDTLS_SSL_PROTO_TLS1_2=y
|
||||
CONFIG_MBEDTLS_SSL_PROTO_DTLS=
|
||||
CONFIG_MBEDTLS_SSL_ALPN=y
|
||||
CONFIG_MBEDTLS_SSL_SESSION_TICKETS=y
|
||||
|
||||
#
|
||||
# Symmetric Ciphers
|
||||
#
|
||||
CONFIG_MBEDTLS_AES_C=y
|
||||
CONFIG_MBEDTLS_CAMELLIA_C=
|
||||
CONFIG_MBEDTLS_DES_C=
|
||||
CONFIG_MBEDTLS_RC4_DISABLED=y
|
||||
CONFIG_MBEDTLS_RC4_ENABLED_NO_DEFAULT=
|
||||
CONFIG_MBEDTLS_RC4_ENABLED=
|
||||
CONFIG_MBEDTLS_BLOWFISH_C=
|
||||
CONFIG_MBEDTLS_XTEA_C=
|
||||
CONFIG_MBEDTLS_CCM_C=y
|
||||
CONFIG_MBEDTLS_GCM_C=y
|
||||
CONFIG_MBEDTLS_RIPEMD160_C=
|
||||
|
||||
#
|
||||
# Certificates
|
||||
#
|
||||
CONFIG_MBEDTLS_PEM_PARSE_C=y
|
||||
CONFIG_MBEDTLS_PEM_WRITE_C=y
|
||||
CONFIG_MBEDTLS_X509_CRL_PARSE_C=y
|
||||
CONFIG_MBEDTLS_X509_CSR_PARSE_C=y
|
||||
CONFIG_MBEDTLS_ECP_C=y
|
||||
CONFIG_MBEDTLS_ECDH_C=y
|
||||
CONFIG_MBEDTLS_ECDSA_C=y
|
||||
CONFIG_MBEDTLS_ECP_DP_SECP192R1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_SECP224R1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_SECP256R1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_SECP384R1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_SECP521R1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_SECP192K1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_SECP224K1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_SECP256K1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_BP256R1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_BP384R1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_BP512R1_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_DP_CURVE25519_ENABLED=y
|
||||
CONFIG_MBEDTLS_ECP_NIST_OPTIM=y
|
||||
|
||||
#
|
||||
# mDNS
|
||||
#
|
||||
CONFIG_MDNS_MAX_SERVICES=10
|
||||
|
||||
#
|
||||
# ESP-MQTT Configurations
|
||||
#
|
||||
CONFIG_MQTT_PROTOCOL_311=y
|
||||
CONFIG_MQTT_TRANSPORT_SSL=y
|
||||
CONFIG_MQTT_TRANSPORT_WEBSOCKET=y
|
||||
CONFIG_MQTT_TRANSPORT_WEBSOCKET_SECURE=y
|
||||
CONFIG_MQTT_USE_CUSTOM_CONFIG=
|
||||
CONFIG_MQTT_TASK_CORE_SELECTION_ENABLED=
|
||||
CONFIG_MQTT_CUSTOM_OUTBOX=
|
||||
|
||||
#
|
||||
# NVS
|
||||
#
|
||||
|
||||
#
|
||||
# OpenSSL
|
||||
#
|
||||
CONFIG_OPENSSL_DEBUG=
|
||||
CONFIG_OPENSSL_ASSERT_DO_NOTHING=y
|
||||
CONFIG_OPENSSL_ASSERT_EXIT=
|
||||
|
||||
#
|
||||
# PThreads
|
||||
#
|
||||
CONFIG_ESP32_PTHREAD_TASK_PRIO_DEFAULT=5
|
||||
CONFIG_ESP32_PTHREAD_TASK_STACK_SIZE_DEFAULT=3072
|
||||
CONFIG_PTHREAD_STACK_MIN=768
|
||||
|
||||
#
|
||||
# SPI Flash driver
|
||||
#
|
||||
CONFIG_SPI_FLASH_VERIFY_WRITE=
|
||||
CONFIG_SPI_FLASH_ENABLE_COUNTERS=
|
||||
CONFIG_SPI_FLASH_ROM_DRIVER_PATCH=y
|
||||
CONFIG_SPI_FLASH_WRITING_DANGEROUS_REGIONS_ABORTS=y
|
||||
CONFIG_SPI_FLASH_WRITING_DANGEROUS_REGIONS_FAILS=
|
||||
CONFIG_SPI_FLASH_WRITING_DANGEROUS_REGIONS_ALLOWED=
|
||||
|
||||
#
|
||||
# SPIFFS Configuration
|
||||
#
|
||||
CONFIG_SPIFFS_MAX_PARTITIONS=3
|
||||
|
||||
#
|
||||
# SPIFFS Cache Configuration
|
||||
#
|
||||
CONFIG_SPIFFS_CACHE=y
|
||||
CONFIG_SPIFFS_CACHE_WR=y
|
||||
CONFIG_SPIFFS_CACHE_STATS=
|
||||
CONFIG_SPIFFS_PAGE_CHECK=y
|
||||
CONFIG_SPIFFS_GC_MAX_RUNS=10
|
||||
CONFIG_SPIFFS_GC_STATS=
|
||||
CONFIG_SPIFFS_PAGE_SIZE=256
|
||||
CONFIG_SPIFFS_OBJ_NAME_LEN=32
|
||||
CONFIG_SPIFFS_USE_MAGIC=y
|
||||
CONFIG_SPIFFS_USE_MAGIC_LENGTH=y
|
||||
CONFIG_SPIFFS_META_LENGTH=4
|
||||
CONFIG_SPIFFS_USE_MTIME=y
|
||||
|
||||
#
|
||||
# Debug Configuration
|
||||
#
|
||||
CONFIG_SPIFFS_DBG=
|
||||
CONFIG_SPIFFS_API_DBG=
|
||||
CONFIG_SPIFFS_GC_DBG=
|
||||
CONFIG_SPIFFS_CACHE_DBG=
|
||||
CONFIG_SPIFFS_CHECK_DBG=
|
||||
CONFIG_SPIFFS_TEST_VISUALISATION=
|
||||
|
||||
#
|
||||
# TCP/IP Adapter
|
||||
#
|
||||
CONFIG_IP_LOST_TIMER_INTERVAL=120
|
||||
CONFIG_TCPIP_LWIP=y
|
||||
|
||||
#
|
||||
# Virtual file system
|
||||
#
|
||||
CONFIG_SUPPRESS_SELECT_DEBUG_OUTPUT=y
|
||||
CONFIG_SUPPORT_TERMIOS=y
|
||||
|
||||
#
|
||||
# Wear Levelling
|
||||
#
|
||||
CONFIG_WL_SECTOR_SIZE_512=
|
||||
CONFIG_WL_SECTOR_SIZE_4096=y
|
||||
CONFIG_WL_SECTOR_SIZE=4096
|
||||
@ -0,0 +1,175 @@
|
||||
# MultiNet Introduction
|
||||
|
||||
MultiNet is a lightweight model specially designed based on [CRNN](https://arxiv.org/pdf/1703.05390.pdf) and [CTC](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.75.6306&rep=rep1&type=pdf) for the implementation of multi-command recognization with ESP32. Now, up to 100 speech commands, including customized commands, are supported.
|
||||
|
||||
## Overview
|
||||
|
||||
MultiNet uses the **MFCC features** of an audio clip as input, and the **phonemes** (Chinese or English) as output. By comparing the output phonemes, the relevant Chinese or English word is identified.
|
||||
|
||||
## Commands Recognition Process
|
||||
|
||||
1. Add customized commands to the speech command queue.
|
||||
2. Prepare an audio clip of 30 ms (16 KHz, 6 bit, mono).
|
||||
3. Input this audio to the MFCC model and get its **MFCC features**.
|
||||
4. Input the obtained **MFCC features** to MultiNet and get the output **phoneme**.
|
||||
5. Input the obtained **phoneme** to the Language model and get the output.
|
||||
6. Compare the output against the existing command words one by one, and output the Command ID of the matching command (if any).
|
||||
|
||||
Please see the flow diagram below:
|
||||
|
||||

|
||||
|
||||
|
||||
## User Guide
|
||||
|
||||
### User-defined Command
|
||||
|
||||
Currently, users can define their own command words in the code. You can refer to the method of adding command words in `multinet_test.c`, there are already 20 commands pre-stored in `multinet_test.c`.
|
||||
|
||||
|Command ID|Command|Command ID|Command|Command ID|Command|Command ID|Command|
|
||||
|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
|
||||
|0|打开空调 (Turn on the air conditioner)|5|降低一度 (Decrease by one degree)|10| 关闭节能模式 (Disable power-saving mode)|15| 播放歌曲 (Play a song)
|
||||
|1|关闭空调 (Turn on the air conditioner)|6|制热模式 (Heating mode)|11| 除湿模式 (Dehumidifying mode)|16| 暂停播放 (Pause playing)
|
||||
|2|增大风速 (Give me more wind)|7|制冷模式 (Cooling mode)|12| 关闭除湿模式 (Disable dehumidifying mode)|17| 定时一小时 (Set timer to 1 hour)
|
||||
|3|减少风速 (Give me less wind)|8|送风模式 (Ventilating mode)|13| 打开蓝牙 (Enable the Bluetooth)|18| 打开电灯 (Turn on the light)
|
||||
|4| 升高一度 (Increase by one degree)|9|节能模式 (Power-saving mode)|10| 关闭节能模式 (Disable power-saving mode)|19| 关闭电灯 (Turn off the light)
|
||||
|
||||
|
||||
MultiNet supports user-defined commands. You can add your own commands to MultiNet. Note that the newly added command should obtain its command ID before it can be recognized by MultiNet.
|
||||
|
||||
### Basic Configuration
|
||||
|
||||
Define the following two variables before using the command recognition model:
|
||||
|
||||
1. Model version
|
||||
The model version has been configured in `menuconfig` to facilitate your development. Please configure in `menuconfig` and add the following line in your code:
|
||||
|
||||
`static const esp_mn_iface_t *multinet = &MULTINET_MODEL;`
|
||||
|
||||
2. Model parameter
|
||||
The language supported and the effectiveness of the model is determined by model parameters. Now only commands in Chinese are supported. Please configure the `MULTINET_COEFF` option in `menuconfig` and add the following line in your code to generate the model handle.
|
||||
|
||||
`model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);`
|
||||
|
||||
### API Reference
|
||||
|
||||
#### Header
|
||||
- esp_mn_iface.h
|
||||
- esp_mn_models.h
|
||||
|
||||
#### Function
|
||||
|
||||
- `typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const model_coeff_getter_t *coeff, int sample_length);`
|
||||
|
||||
**Definition**
|
||||
|
||||
Easy function type to initialize a model instance with a coefficient.
|
||||
|
||||
**Parameter**
|
||||
|
||||
* coeff: The coefficient for speech commands recognition.
|
||||
* sample_length Audio length for speech recognition, in ms. The range of sample_length is 0~6000.
|
||||
|
||||
**Return**
|
||||
|
||||
Handle to the model data.
|
||||
|
||||
- `typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);`
|
||||
|
||||
**Definition**
|
||||
|
||||
Callback function type to fetch the amount of samples that need to be passed to the detection function. Every speech recognition model processes a certain number of samples at the same time. This function can be used to query the amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
|
||||
**Parameter**
|
||||
|
||||
model: The model object to query
|
||||
|
||||
**Return**
|
||||
|
||||
The amount of samples to feed the detect function
|
||||
|
||||
|
||||
|
||||
- `typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);`
|
||||
|
||||
**Definition**
|
||||
|
||||
Callback function type to fetch the number of frames recognized by the command word.
|
||||
|
||||
**Parameter**
|
||||
|
||||
model: The model object to query
|
||||
|
||||
**Return**
|
||||
|
||||
The number of the frames recognized by the command word
|
||||
|
||||
- `typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);`
|
||||
|
||||
**Definition**
|
||||
|
||||
Get the sample rate of the samples to feed to the detection function
|
||||
|
||||
**Parameter**
|
||||
|
||||
model: The model object to query
|
||||
|
||||
**Return**
|
||||
|
||||
The sample rate, in Hz
|
||||
|
||||
- `typedef int (*esp_mn_iface_op_add_speech_commands_t)(model_iface_data_t *model, int command_id, char *phrase_spelling, char *phrase_str);`
|
||||
|
||||
**Definition**
|
||||
|
||||
Add a command word and set its command ID.
|
||||
|
||||
**Parameters**
|
||||
|
||||
* model: The model object to query
|
||||
|
||||
* command_id: The command ID of this word
|
||||
|
||||
* phrase_spelling: The speech command in Chinese spelled using prescribed rules
|
||||
|
||||
* phrase_str: Auxiliary information of words
|
||||
|
||||
**Return**
|
||||
|
||||
1: Setting success.
|
||||
|
||||
**Note**
|
||||
|
||||
The `phrase_spelling` is the mandarin syllables provided one by one in the form of **one Type A element** and **one Type B element**, which can be seen below:
|
||||
|
||||
* Type A element: `b bi c ch chu cu d di du f g gu h hu j ji ju k ku l li lu m mi n ni nu p pi q qi qu r ru s sh shu su t ti tu w
|
||||
x xi xu y yu z zhu zu`
|
||||
|
||||
* Type B element: `a ai an ang ao e ei en eng er i ie in ing iu o ong ou u ue ui un v ve`
|
||||
|
||||
For example, the Type A and Type B elements for "tiao" are "ti" and "ao", and the syllable "tiao" should provided to the API as "ti ao". Similarly, the command of "dai kai kong tiao", which means turn on the air conditioner, should be provided to the API as "d ai k ai k ong ti ao".
|
||||
|
||||
- `typedef float* (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);`
|
||||
|
||||
**Definition**
|
||||
|
||||
Easy function type to initialize a model instance with a coefficient.
|
||||
|
||||
**Parameter**
|
||||
|
||||
coeff: The coefficient for speech commands recognition.
|
||||
|
||||
**Return**
|
||||
|
||||
* The command id, if a matching command is found
|
||||
* -1, if no matching command is found
|
||||
|
||||
- `typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);`
|
||||
|
||||
**Definition**
|
||||
|
||||
Destroy a voiceprint recognition model
|
||||
|
||||
**Parameters**
|
||||
|
||||
model: Model object to destroy
|
||||
11
speech_commands_recognition/component.mk
Normal file
11
speech_commands_recognition/component.mk
Normal file
@ -0,0 +1,11 @@
|
||||
COMPONENT_ADD_INCLUDEDIRS := include
|
||||
|
||||
COMPONENT_SRCDIRS := .
|
||||
|
||||
LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib*.a)
|
||||
|
||||
LIBS := $(patsubst lib%.a,-l%,$(notdir $(LIB_FILES)))
|
||||
|
||||
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/ $(LIBS)
|
||||
|
||||
ALL_LIB_FILES += $(LIB_FILES)
|
||||
131
speech_commands_recognition/demo.md
Normal file
131
speech_commands_recognition/demo.md
Normal file
@ -0,0 +1,131 @@
|
||||
# Recognizing Speech Commands with ESP32-LyraT-Mini
|
||||
|
||||
Currently, Espressif's ESP32-based speech command recognition model [MultiNet](README.md) supports up to 100 Chinese speech commands (We will add supports for English speech commands in the next release of [esp-sr](../README.md)).
|
||||
|
||||
This demo demonstrates the basic process of recognizing Chinese speech commands with ESP32-LyraT-Mini. Please also see a flow diagram below.
|
||||
|
||||

|
||||
|
||||
For more information about ESP32-LyraT-Mini, please see [ESP32-LyraT-Mini Getting Started Guide]().
|
||||
|
||||
# 1. Quick Start
|
||||
|
||||
### 1.1 Basic Configuration
|
||||
|
||||
Go to `make menuconfig`, and complete the following configuration:
|
||||
|
||||
- Basic hardware configuration
|
||||
|
||||
Navigate to `Audio Media HAL`, and configure the following parameters as instructed.
|
||||
- `Audio hardware board`: select `ESP32-Lyrat Mini V1.1`;
|
||||
- `Audio codec chip`: select `CODEC IS ES8311`;
|
||||
- `use external adc`: select `use es7243`;
|
||||
- `Audio DSP chip`: select `No DSP chip`.
|
||||
|
||||

|
||||
|
||||
- Basic software configuration
|
||||
|
||||
Navigate to `ESP32 Hotword Detection`, and configure the following parameters as instructed.
|
||||
- `Speech recognition audio source`: select `Live microphone on LyraT-board`;
|
||||
- `wake word model to use`: select `WakeNet 6 (quantized)`;
|
||||
- `wake word name`: select `hilexin (WakeNet6)`;
|
||||
- `LVCSR model to use`: select `MultiNet 1 (quantized)`;
|
||||
- `langugae`: select `chinese (MultiNet1)`
|
||||
|
||||

|
||||
|
||||
Then save the configuration and exit.
|
||||
|
||||
### 1.2 Compiling and Running
|
||||
|
||||
Run `make flash monitor` to compile, flash and run this example, and check the output log:
|
||||
|
||||
```
|
||||
...
|
||||
I (126) MSC_DSP: CONFIG_CODEC_CHIP_IS_ES8311
|
||||
wake word number = 1, word1 name = hilexin
|
||||
-----------awaits to be waken up-----------
|
||||
```
|
||||
|
||||
### 1.3 Waking up the Board
|
||||
|
||||
Find the pre-defined wake-up word of the board in the printed log. In this example, the wake-up word is “Hi Lexin" [Ləsɪ:n].
|
||||
|
||||
Then, say “Hi Lexin" ([Ləsɪ:n]) to wake up the board, which then wakes up and prints the following log:
|
||||
|
||||
```
|
||||
hilexin DETECTED.
|
||||
-----------LISTENING-----------
|
||||
```
|
||||
|
||||
### 1.4 Recognizing Speech Commands
|
||||
|
||||
Then, the board enters the Listening status, waiting for new speech commands.
|
||||
|
||||
Currently, the MultiNet model already defined 20 speech commands, which can be seen in [MultiNet](README.md).
|
||||
|
||||
Now, you can give one speech command, for example, "turn on the air conditioner",
|
||||
|
||||
* If this command exists in the supported speech command list, the board prints out the command id of this command in its log:
|
||||
|
||||
```
|
||||
-----------LISTENING-----------
|
||||
phrase:d a k ai k ong ti ao, prob:0.423639
|
||||
command_id:0
|
||||
--------------END--------------
|
||||
|
||||
```
|
||||
* If this command does not exist in the supported speech command list, the board prints an error message of "can not recognize any speech commands" in its log:
|
||||
|
||||
|
||||
```
|
||||
-----------LISTENING-----------
|
||||
can not recognize any speech commands
|
||||
--------------END--------------
|
||||
|
||||
```
|
||||
|
||||
Also, the board prints `--------------END--------------` when it ends the current recognition cycle and re-enters the Waiting-for-Wakeup status.
|
||||
|
||||
**Notices:**
|
||||
|
||||
The board can only stay in the Listening status for up to six seconds. After that, it ends the current recognition cycle and re-enters the Waiting-for-wakeup status. Therefore, you must give speech commands in six seconds after the board wakes up.
|
||||
|
||||
### 1.5 Adding Customized Speech Commands
|
||||
|
||||
Now, the MultiNet model supports 20 pre-defined speech commands, and also allows more customized speech commands by providing users an easy-to-use `add_speech_commands` API.
|
||||
|
||||
Note that you should use mandarin syllables when creating your speech commands, and each syllable should be provided to the API in the form of **one Type A element** and **one Type B element**, which can be seen below:
|
||||
|
||||
* Type A element: `b bi c ch chu cu d di du f g gu h hu j ji ju k ku l li lu m mi n ni nu p pi q qi qu r ru s sh shu su t ti tu w
|
||||
x xi xu y yu z zhu zu`
|
||||
|
||||
* Type B element: `a ai an ang ao e ei en eng er i ie in ing iu o ong ou u ue ui un v ve`
|
||||
|
||||
|
||||
For example, the Type A and Type B elements for "tiao" are "ti" and "ao", and the syllable "tiao" should provided to the API as "ti ao". Similarly, the command of "dai kai kong tiao", which means turn on the air conditioner, should be provided to the API as "d ai k ai k ong ti ao".
|
||||
|
||||
For details on how to use API `add_speech_commands`, please click [Here](./README.md).
|
||||
|
||||
|
||||
# 2. Workflow Walkthrough
|
||||
### 2.1 Hardware Initialization
|
||||
|
||||
You don't need any special-purpose boards to run the **WakeNet** and **MultiNet** examples. Currently, Espressif has launched several audio boards and one of them is ESP32-LyraT-Mini, which is what we use in this example.
|
||||
|
||||
For details on the initialization of the ESP32-LyraT-Mini board, please see codes in `components/hardware_driver`.
|
||||
|
||||
If you want to choose other development boards other than ESP32-LyraT-Mini, please go to [esp-adf](https://github.com/espressif/esp-adf), which is Espressif's development framework for building audio applications based on ESP32 products, for more detailed information on hardware drivers.
|
||||
|
||||
### 2.2 Wake-up by Keyword
|
||||
|
||||
The board enters the Waiting-for-wakeup status after waking up, during which the board will pick up audio data with the on-board microphone, and feed them to the **WakeNet** model frame by frame (30 ms, 16 KHz, 6 bit, mono).
|
||||
|
||||
Currently, you cannot customize wake-up word yourself. Therefore, please contact us for such requests.
|
||||
|
||||
### 2.3 Recognizing Speech Commands
|
||||
|
||||
During the recognition, the board feeds data frame by frame (30 ms, 16 KHz, 16 bit, mono) to the **MultiNet** model for six seconds. Then, the model compares the speech command received against the pre-defined commands in the list, and return the command id or an error message depending on the recognition result.
|
||||
|
||||
Please see section 1.5 on how to customize your speech command.
|
||||
78
speech_commands_recognition/include/esp_mn_iface.h
Normal file
78
speech_commands_recognition/include/esp_mn_iface.h
Normal file
@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "esp_err.h"
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "esp_wn_iface.h"
|
||||
// //Opaque model data container
|
||||
// typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
/**
|
||||
* @brief Initialze a model instance with specified model coefficient.
|
||||
*
|
||||
* @param coeff The wakenet model coefficient.
|
||||
* @returns Handle to the model data.
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const model_coeff_getter_t *coeff);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Add a command word and set its command ID.
|
||||
*
|
||||
* @param model The model object to query.
|
||||
* @param command_id The command id of this word.
|
||||
* @param phrase_spelling The chinese command word spelled using prescribed rules.
|
||||
* @param phrase_str Auxiliary information of phrase.
|
||||
* @return 1: setting success. 0: setting failure
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_add_speech_commands_t)(model_iface_data_t *model,
|
||||
int command_id,
|
||||
char *phrase_spelling,
|
||||
char *phrase_str);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
|
||||
*
|
||||
* @param model The model object to query.
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The command id, return 0 if no command word is detected,
|
||||
*/
|
||||
typedef int (*esp_mn_iface_op_detect_t)(model_iface_data_t *model,
|
||||
int16_t *samples);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Destroy a voiceprint recognition model
|
||||
*
|
||||
* @param model The Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
typedef struct {
|
||||
esp_mn_iface_op_create_t create;
|
||||
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_mn_iface_op_add_speech_commands_t add_speech_commands;
|
||||
esp_mn_iface_op_detect_t detect;
|
||||
esp_mn_iface_op_destroy_t destroy;
|
||||
} esp_mn_iface_t;
|
||||
51
speech_commands_recognition/include/esp_mn_models.h
Normal file
51
speech_commands_recognition/include/esp_mn_models.h
Normal file
@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
#include "esp_mn_iface.h"
|
||||
|
||||
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
|
||||
//a specific phrase or word.
|
||||
extern const esp_mn_iface_t esp_sr_multinet1_quantized;
|
||||
|
||||
/*
|
||||
Configure network to use based on what's selected in menuconfig.
|
||||
*/
|
||||
#if CONFIG_SR_MN1_MODEL_QUANT
|
||||
#define MULTINET_MODEL esp_sr_multinet1_quantized
|
||||
#else
|
||||
#error No valid neural network model selected.
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
Configure wake word to use based on what's selected in menuconfig.
|
||||
*/
|
||||
#if CONFIG_SR_MN1_CHINESE_QUANT
|
||||
#include "multinet1_ch.h"
|
||||
#define MULTINET_COEFF get_coeff_multinet1_ch
|
||||
#else
|
||||
#error No valid wake word selected.
|
||||
#endif
|
||||
|
||||
|
||||
/* example
|
||||
|
||||
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
|
||||
|
||||
//Initialize MultiNet model data
|
||||
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
|
||||
add_speech_commands(multinet, model_data);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
8
speech_commands_recognition/include/multinet1_ch.h
Normal file
8
speech_commands_recognition/include/multinet1_ch.h
Normal file
@ -0,0 +1,8 @@
|
||||
//Generated by mkmodel
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_multinet1_ch;
|
||||
BIN
speech_commands_recognition/libmultinet1_ch.a
Normal file
BIN
speech_commands_recognition/libmultinet1_ch.a
Normal file
Binary file not shown.
79
wake_words_engine/ESP_Wake_Words_Customization.md
Normal file
79
wake_words_engine/ESP_Wake_Words_Customization.md
Normal file
@ -0,0 +1,79 @@
|
||||
# Espressif Speech Wake Words Customization Process
|
||||
|
||||
#### Offline Wake Words Customization
|
||||
|
||||
Espressif provides users with the **Off-line Wake Words Customization** service, which allows users to use both publicly available Wake Wordss (such as "Hi Lexin", "Alexa", and "Espressif") and customized Wake Wordss.
|
||||
|
||||
1. If you want to use publicly available Wake Wordss for commercial use,
|
||||
- please check the Wake Words provided in ADF/ASR Demos;
|
||||
- We will continue to provide more and more Wake Wordss that are free for commercial use.
|
||||
|
||||
2. If you want to use your own wake words, we can also provide the **Off-line Wake Words Customization** service.
|
||||
- If you are able to provide a training corpus meeting the requirements described in the following **Requirements on Corpus**.
|
||||
- We need two to three weeks for training and optimization.
|
||||
- Service fee will be charged by Espressif in this case.
|
||||
|
||||
- Otherwise
|
||||
- We will provide the training corpus (all your corpus won't be comprised and shared)
|
||||
- We need two to three weeks for training and optimization.
|
||||
- Service fee will be charged by Espressif in this case (Fee incurred from collecting the training corpus will be charged separately).
|
||||
|
||||
- For details on the fee and time required for customization, please email us at [sales@espressif.com](sales@espressif.com).
|
||||
- We will agree on a reasonable plan based on how many wake words for customization and how large is your scale of product production.
|
||||
|
||||
3. About Espressif Wake Words Model
|
||||
- Now, a single wake words model can recognize up to five Wake Wordss
|
||||
- Normally, each Wake Words contains three to six syllables, such as "Hi Le xin" (3 syllables), “Alexa” (3 syllables), "小爱同学" (4 syllables).
|
||||
- Several wake words can be used in combination based on your actual requirement.
|
||||
|
||||
#### Requirements on Corpus Texts
|
||||
|
||||
You can provide us your training corpus by preparing it yourself or purchasing one from a third party service provider. However, please make sure your corpus meets the following requirements.
|
||||
|
||||
- Audio File Format
|
||||
- Sample rate: 16 KHz
|
||||
- Encoding method: 16-bit signed int
|
||||
- Channel type: mono
|
||||
- File format: wav
|
||||
|
||||
- Sampling
|
||||
- Sample size: no less than 500 people, among which,
|
||||
- The number of males and females should be similar;
|
||||
- The number of people in different age-group should be similar;
|
||||
- The number of Children should be larger than 100 (If the child is one of your target users).
|
||||
- Environment:
|
||||
- It's advise to collect your sample with a Hi-Fi microphone in a professional audio room, with an ambient noise lower than 40 dB.
|
||||
- Each participant should
|
||||
- Position himself/herself at a distance of one meter from the microphone, and repeat the Wake Wordss for 15 times (5 times fast, 10 times normal);
|
||||
- Position himself/herself at a distance of three meters from the microphone, and repeat the Wake Wordss for 15 times (5 times fast, 10 times normal);
|
||||
- The naming of sample file should reflect the sex, age, and speech speed of the sample himself/herself. An example for naming your sample file is `female_age_fast_id.wav`. Or you can provide a separate form to record these information.
|
||||
|
||||
#### Hardware Design and Test
|
||||
|
||||
1. The performance of wake word detection is heavily impacted by the hardware design and cavity structure. Therefore, please go through the following requirements on hardware design.
|
||||
|
||||
- Hardware design: We provide reference design files for smart speakers, including schematic diagrams and PCB designs. Please refer to these files when designing your own speaker. It's advised that you send your designs to Espressif for review to avoid some most common design issues.
|
||||
|
||||
- Cavity structure: We don't provide reference designs for cavity structures. Therefore, it's advised to involve acoustic professionals during the design and take reference form other mainstream speakers in the market, such as TmallGenie(天猫精灵), Baidu speaker(小度音箱)and Google speaker(谷歌音箱).
|
||||
|
||||
2. You can evaluate the performance of your design by performing the following tests. Note that all the tests below are designed to be performed in an audio room. Please make adjustment according to your actual situation.
|
||||
|
||||
- Record test to evaluate the gain and distortion for MIC and codec.
|
||||
- Play audio samples (90 dB, 0.1 meter away from the MIC), and make sure the recording sample is not unsaturated by adjusting the gain of MIC.
|
||||
- Play frequency sweep file (0~20 KHz), and record it using a sample rate of 16 KHz. No prominent aliasing should be observed.
|
||||
- Use the publicly released speech recognize API provided on the cloud to recognize 100 audio samples. The recognition rate should meet certain standard.
|
||||
|
||||
- Playing test to verify the distortion of the PA and speaker by measuring:
|
||||
- PA power @1% THD.
|
||||
|
||||
- Test the performance of DSP, and verify if the DSP parameters are configured correctly, meanwhile minimizing the non-linear distortion in the DSP arithmetic.
|
||||
- Test the performance of the **Noise Suppression** algorithm
|
||||
- Test the performance of the **Acoustic Echo Cancellation** algorithm
|
||||
- Test the performance of the **Speech Enhancement** algorithm
|
||||
|
||||
3. After you hardware design, it's advised to **send** 1 or 2 pieces of your hardware, so we can optimize its performance for wake words detection on a whole product level.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
89
wake_words_engine/README.md
Normal file
89
wake_words_engine/README.md
Normal file
@ -0,0 +1,89 @@
|
||||
# WakeNet
|
||||
|
||||
WakeNet, which is a wake words engine built upon neural network, is specially designed for low-power embedded MCUs. Now, the WakeNet model supports up to 5 wake wordss.
|
||||
|
||||
## Overview
|
||||
|
||||
Please see the flow diagram of WakeNet below:
|
||||
|
||||
<center>
|
||||
<img src="../img/wakenet_workflow.png" width = "800" />
|
||||
</center>
|
||||
|
||||
- Speech Feature:
|
||||
The WakeNet uses [MFCC](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum) to obtain the features of the input audio clip (16 KHz, 16 bit, single track). The window width and step width of each frame of the audio clip are both 30 ms.
|
||||
|
||||
- Neural Network:
|
||||
Now, the natural network structure has been updated to the sixth edition, among which,
|
||||
- WakeNet1 and WakeNet2 had been out of use.
|
||||
- WakeNet3 and WakeNet4 are built upon the [CRNN](https://arxiv.org/abs/1703.05390) structure.
|
||||
- WakeNet5 and WakeNet6 are built upon the [Dilated Convolution](https://arxiv.org/pdf/1609.03499.pdf) structure.
|
||||
|
||||
|
||||
- Keyword Triggering Method
|
||||
For continuous audio stream, we calculate the average recognition results (M) for several frames and generate a smoothing prediction result, to improve the accuracy of keyword triggering. Only when the M value is larger than the set threshold, a triggering command is sent.
|
||||
|
||||
|
||||
## API Introduction
|
||||
|
||||
- How to select the WakeNet model
|
||||
|
||||
Go to `make menuconfig`, navigate to `Component config` >> `ESP Speech Recognition` >> `Wake word engine`. See below:
|
||||
|
||||
<center>
|
||||
<img src="../img/model_sel.png" width = "500" />
|
||||
</center>
|
||||
|
||||
|
||||
- How to select the wake words
|
||||
Go to `make menuconfig`, and navigate to `Component config` >> `ESP Speech Recognition` >> `Wake words list`. See below:
|
||||
|
||||
<center>
|
||||
<img src="../img/word_sel.png" width = "500" />
|
||||
</center>
|
||||
|
||||
Note that, the `customized word` option only supports WakeNet5 and WakeNet6. WakeNet3 and WakeNet4 are only compatible with earlier versions. If you want to use your own wake words, please overwrite existing models in `wake_word_engine` directory with your own words model.
|
||||
|
||||
- How to set the triggering threshold
|
||||
1. The triggering threshold (0, 0.9999) for wake word can be set to adjust the accuracy of the wake words model. The threshold can be configured separately for each wake words if there are more than one words supported in a model.
|
||||
2. The smaller the triggering threshold is, the higher the risk of false triggering is (and vice versa). Please configure your threshold according to your applications.
|
||||
3. The wake word engine predefines two thresholds for each wake word during the initialization. See below:
|
||||
|
||||
```
|
||||
typedef enum {
|
||||
DET_MODE_90 = 0, //Normal, response accuracy rate about 90%
|
||||
DET_MODE_95 //Aggressive, response accuracy rate about 95%
|
||||
} det_mode_t;
|
||||
```
|
||||
|
||||
4. Use the `set_det_threshold()` function to configure the thresholds for different wake words after the initialization.
|
||||
|
||||
- How to get the sampling rate and frame size.
|
||||
- Use `get_samp_rate` to get the sampling rate of the audio stream to be recognized.
|
||||
- Use `get_samp_chunksize` to get the sampling point of each frame. The encoding of audio data is `signed 16-bit int`.
|
||||
|
||||
## Performance Test
|
||||
|
||||
### 1. Resource Occupancy(ESP32)
|
||||
|
||||
|Model Type|Parameter Size|RAM|Average Running Time per Frame| Frame Length|
|
||||
|:---:|:---:|:---:|:---:|:---:|
|
||||
|Quantized WakeNet3|26 K|20 KB|29 ms|90 ms|
|
||||
|Quantised WakeNet4|53 K|22 KB|48 ms|90 ms|
|
||||
|Quantised WakeNet5|41 K|15 KB|7 ms|30 ms|
|
||||
|Quantised WakeNet6|41 K|20 KB|9 ms|30 ms|
|
||||
|
||||
### 2. Performance
|
||||
|
||||
|Distance| Quiet | Stationary Noise (SNR = 0 ~ 10 dB)| Speech Noise (SNR = 0 ~ 10 dB)| AEC Interruption (-5 ~ -15 dB)|
|
||||
|:---:|:---:|:---:|:---:|:---:|
|
||||
|1 m|97%|90%|88%|89%|
|
||||
|3 m|95%|85%|75%|73%|
|
||||
|
||||
False triggering rate: 1 time in 20 hours
|
||||
|
||||
**Note**: We use the ESP32-LyraT-Mini development board and the WakeNet6 model in our test. The performance is limited because ESP32-LyraT-Mini only has one microphone. We expect a better recognition performance when more microphones are involved in the test.
|
||||
|
||||
## Wake Word Customization
|
||||
|
||||
For details on how to customize your wake words, please see [Espressif Speech Wake Word Customization Process](ESP_Wake_Words_Customization.md).
|
||||
11
wake_words_engine/component.mk
Normal file
11
wake_words_engine/component.mk
Normal file
@ -0,0 +1,11 @@
|
||||
COMPONENT_ADD_INCLUDEDIRS := include
|
||||
|
||||
COMPONENT_SRCDIRS := .
|
||||
|
||||
LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib*.a)
|
||||
|
||||
LIBS := $(patsubst lib%.a,-l%,$(notdir $(LIB_FILES)))
|
||||
|
||||
COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/ $(LIBS)
|
||||
|
||||
ALL_LIB_FILES += $(LIB_FILES)
|
||||
8
wake_words_engine/include/customized_word_wn5.h
Normal file
8
wake_words_engine/include/customized_word_wn5.h
Normal file
@ -0,0 +1,8 @@
|
||||
//Generated by mkmodel
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_customized_word_wn5;
|
||||
8
wake_words_engine/include/customized_word_wn6.h
Normal file
8
wake_words_engine/include/customized_word_wn6.h
Normal file
@ -0,0 +1,8 @@
|
||||
//Generated by mkmodel
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_customized_word_wn6;
|
||||
122
wake_words_engine/include/esp_wn_iface.h
Normal file
122
wake_words_engine/include/esp_wn_iface.h
Normal file
@ -0,0 +1,122 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
|
||||
//Opaque model data container
|
||||
typedef struct model_iface_data_t model_iface_data_t;
|
||||
|
||||
//Set wake words recognition operating mode
|
||||
//The probability of being wake words is increased with increasing mode,
|
||||
//As a consequence also the false alarm rate goes up
|
||||
typedef enum {
|
||||
DET_MODE_90 = 0, //Normal, response accuracy rate about 90%
|
||||
DET_MODE_95 //Aggressive, response accuracy rate about 95%
|
||||
} det_mode_t;
|
||||
|
||||
typedef struct {
|
||||
int wake_word_num; //The number of all wake words
|
||||
char **wake_word_list; //The name list of wake words
|
||||
} wake_word_info_t;
|
||||
|
||||
/**
|
||||
* @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient
|
||||
*
|
||||
* @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95
|
||||
* @param model_coeff The specified wake word model coefficient
|
||||
* @returns Handle to the model data
|
||||
*/
|
||||
typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const model_coeff_getter_t *model_coeff, det_mode_t det_mode);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
|
||||
*
|
||||
* Every speech recognition model processes a certain number of samples at the same time. This function
|
||||
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The amount of samples to feed the detect function
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the sample rate of the samples to feed to the detect function
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @return The sample rate, in hz
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the number of wake words
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @returns the number of wake words
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get the name of wake word by index
|
||||
*
|
||||
* @Warning The index of wake word start with 1
|
||||
|
||||
* @param model The model object to query
|
||||
* @param word_index The index of wake word
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.5~0.9999
|
||||
* @param word_index The index of wake word
|
||||
* @return 0: setting failed, 1: setting success
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Get the wake word detection threshold of different modes
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param word_index The index of wake word
|
||||
* @returns the detection threshold
|
||||
*/
|
||||
typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index);
|
||||
|
||||
/**
|
||||
* @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found.
|
||||
*
|
||||
* @Warning The index of wake word start with 1, 0 means no wake words is detected.
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words.
|
||||
*/
|
||||
typedef int (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
|
||||
|
||||
/**
|
||||
* @brief Destroy a speech recognition model
|
||||
*
|
||||
* @param model Model object to destroy
|
||||
*/
|
||||
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
|
||||
/**
|
||||
* This structure contains the functions used to do operations on a wake word detection model.
|
||||
*/
|
||||
typedef struct {
|
||||
esp_wn_iface_op_create_t create;
|
||||
esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize;
|
||||
esp_wn_iface_op_get_samp_rate_t get_samp_rate;
|
||||
esp_wn_iface_op_get_word_num_t get_word_num;
|
||||
esp_wn_iface_op_get_word_name_t get_word_name;
|
||||
esp_wn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_wn_iface_op_get_det_threshold_t get_det_threshold;
|
||||
esp_wn_iface_op_detect_t detect;
|
||||
esp_wn_iface_op_destroy_t destroy;
|
||||
} esp_wn_iface_t;
|
||||
80
wake_words_engine/include/esp_wn_models.h
Normal file
80
wake_words_engine/include/esp_wn_models.h
Normal file
@ -0,0 +1,80 @@
|
||||
#pragma once
|
||||
#include "esp_wn_iface.h"
|
||||
|
||||
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
|
||||
//a specific phrase or word.
|
||||
|
||||
extern const esp_wn_iface_t esp_sr_wakenet3_quantized;
|
||||
extern const esp_wn_iface_t esp_sr_wakenet4_quantized;
|
||||
extern const esp_wn_iface_t esp_sr_wakenet5_quantized;
|
||||
extern const esp_wn_iface_t esp_sr_wakenet6_quantized;
|
||||
|
||||
/*
|
||||
Configure network to use based on what's selected in menuconfig.
|
||||
*/
|
||||
#if CONFIG_SR_MODEL_WN3_QUANT
|
||||
#define WAKENET_MODEL esp_sr_wakenet3_quantized
|
||||
#elif CONFIG_SR_MODEL_WN4_QUANT
|
||||
#define WAKENET_MODEL esp_sr_wakenet4_quantized
|
||||
#elif CONFIG_SR_MODEL_WN5_QUANT
|
||||
#define WAKENET_MODEL esp_sr_wakenet5_quantized
|
||||
#elif CONFIG_SR_MODEL_WN6_QUANT
|
||||
#define WAKENET_MODEL esp_sr_wakenet6_quantized
|
||||
#else
|
||||
#error No valid neural network model selected.
|
||||
#endif
|
||||
|
||||
/*
|
||||
Configure wake word to use based on what's selected in menuconfig.
|
||||
*/
|
||||
#if CONFIG_SR_WN3_HILEXIN
|
||||
#include "hilexin_wn3.h"
|
||||
#define WAKENET_COEFF get_coeff_hilexin_wn3
|
||||
|
||||
#elif CONFIG_SR_WN4_HILEXIN
|
||||
#include "hilexin_wn4.h"
|
||||
#define WAKENET_COEFF get_coeff_hilexin_wn4
|
||||
|
||||
#elif CONFIG_SR_WN5_HILEXIN
|
||||
#include "hilexin_wn5.h"
|
||||
#define WAKENET_COEFF get_coeff_hilexin_wn5
|
||||
|
||||
#elif CONFIG_SR_WN6_HILEXIN
|
||||
#include "hilexin_wn6.h"
|
||||
#define WAKENET_COEFF get_coeff_hilexin_wn6
|
||||
|
||||
#elif CONFIG_SR_WN5_CUSTOMIZED_WORD
|
||||
#include "customized_word_wn5.h"
|
||||
#define WAKENET_COEFF get_coeff_customized_word_wn5
|
||||
|
||||
#elif CONFIG_SR_WN6_CUSTOMIZED_WORD
|
||||
#include "customized_word_wn6.h"
|
||||
#define WAKENET_COEFF get_coeff_customized_word_wn6
|
||||
|
||||
#else
|
||||
#error No valid wake word selected.
|
||||
#endif
|
||||
|
||||
|
||||
/* example
|
||||
|
||||
static const sr_model_iface_t *model = &WAKENET_MODEL;
|
||||
|
||||
//Initialize wakeNet model data
|
||||
static model_iface_data_t *model_data=model->create(DET_MODE_90);
|
||||
|
||||
//Set parameters of buffer
|
||||
int audio_chunksize=model->get_samp_chunksize(model_data);
|
||||
int frequency = model->get_samp_rate(model_data);
|
||||
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
|
||||
|
||||
//Detect
|
||||
int r=model->detect(model_data, buffer);
|
||||
if (r>0) {
|
||||
printf("Detection triggered output %d.\n", r);
|
||||
}
|
||||
|
||||
//Destroy model
|
||||
model->destroy(model_data)
|
||||
|
||||
*/
|
||||
8
wake_words_engine/include/hilexin_wn3.h
Normal file
8
wake_words_engine/include/hilexin_wn3.h
Normal file
@ -0,0 +1,8 @@
|
||||
//Generated by mkmodel
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn3;
|
||||
8
wake_words_engine/include/hilexin_wn4.h
Normal file
8
wake_words_engine/include/hilexin_wn4.h
Normal file
@ -0,0 +1,8 @@
|
||||
//Generated by mkmodel
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn4;
|
||||
8
wake_words_engine/include/hilexin_wn5.h
Normal file
8
wake_words_engine/include/hilexin_wn5.h
Normal file
@ -0,0 +1,8 @@
|
||||
//Generated by mkmodel
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn5;
|
||||
8
wake_words_engine/include/hilexin_wn6.h
Normal file
8
wake_words_engine/include/hilexin_wn6.h
Normal file
@ -0,0 +1,8 @@
|
||||
//Generated by mkmodel
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "dl_lib_coefgetter_if.h"
|
||||
#include "dl_lib_matrix.h"
|
||||
#include "dl_lib_matrixq.h"
|
||||
|
||||
extern const model_coeff_getter_t get_coeff_hilexin_wn6;
|
||||
BIN
wake_words_engine/libcustomized_word_wn5.a
Normal file
BIN
wake_words_engine/libcustomized_word_wn5.a
Normal file
Binary file not shown.
BIN
wake_words_engine/libcustomized_word_wn6.a
Normal file
BIN
wake_words_engine/libcustomized_word_wn6.a
Normal file
Binary file not shown.
BIN
wake_words_engine/libhilexin_wn3.a
Normal file
BIN
wake_words_engine/libhilexin_wn3.a
Normal file
Binary file not shown.
BIN
wake_words_engine/libhilexin_wn4.a
Normal file
BIN
wake_words_engine/libhilexin_wn4.a
Normal file
Binary file not shown.
BIN
wake_words_engine/libhilexin_wn5.a
Normal file
BIN
wake_words_engine/libhilexin_wn5.a
Normal file
Binary file not shown.
BIN
wake_words_engine/libhilexin_wn6.a
Normal file
BIN
wake_words_engine/libhilexin_wn6.a
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user