diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 28c058d..b65bb86 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -49,6 +49,9 @@ before_script: .patterns-test_esp_sr: &patterns-test_esp32c5 - "lib/esp32c5/*" + - "lib/esp32c3/*" + - "lib/esp32c6/*" + - "lib/esp32s2/*" - "include/esp32c5/*" - "src/**/*" - "model/**/*" diff --git a/CHANGELOG.md b/CHANGELOG.md index a1cef47..37d0e16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Change log for esp-sr +## unreleased +- esp32c3 support wakenet9s and aec +- esp32c5 support wakenet9s and aec +- esp32c6 support wakenet9s and aec +- esp32s2 support wakenet9s and aec + ## 2.0.5 - Fix fftr bug diff --git a/CMakeLists.txt b/CMakeLists.txt index d7a3bd8..cfba116 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,8 +69,7 @@ if((${IDF_TARGET} STREQUAL "esp32s3") OR (${IDF_TARGET} STREQUAL "esp32p4") OR ( "-Wl,--end-group") -elseif(${IDF_TARGET} STREQUAL "esp32c5") - +elseif((${IDF_TARGET} STREQUAL "esp32c5") OR (${IDF_TARGET} STREQUAL "esp32c3") OR (${IDF_TARGET} STREQUAL "esp32c6") OR (${IDF_TARGET} STREQUAL "esp32s2")) set(srcs "src/model_path.c" ) @@ -78,6 +77,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32c5") set(include_dirs "include/${IDF_TARGET}" "src/include" + "esp-tts/esp_tts_chinese/include" ) set(requires @@ -99,6 +99,8 @@ elseif(${IDF_TARGET} STREQUAL "esp32c5") add_prebuilt_library(c_speech_features "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libc_speech_features.a" PRIV_REQUIRES ${COMPONENT_NAME}) add_prebuilt_library(hufzip "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libhufzip.a" PRIV_REQUIRES ${COMPONENT_NAME}) add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME}) + add_prebuilt_library(esp_tts_chinese "${CMAKE_CURRENT_SOURCE_DIR}/esp-tts/esp_tts_chinese/${IDF_TARGET}/libesp_tts_chinese.a" PRIV_REQUIRES ${COMPONENT_NAME}) + add_prebuilt_library(voice_set_xiaole "${CMAKE_CURRENT_SOURCE_DIR}/esp-tts/esp_tts_chinese/${IDF_TARGET}/libvoice_set_xiaole.a" PRIV_REQUIRES ${COMPONENT_NAME}) target_link_libraries(${COMPONENT_LIB} PRIVATE esp_audio_processor) target_link_libraries(${COMPONENT_LIB} PRIVATE esp_audio_front_end) @@ -106,30 +108,8 @@ elseif(${IDF_TARGET} STREQUAL "esp32c5") target_link_libraries(${COMPONENT_LIB} PRIVATE c_speech_features) target_link_libraries(${COMPONENT_LIB} PRIVATE hufzip) target_link_libraries(${COMPONENT_LIB} PRIVATE wakenet) - -elseif((${IDF_TARGET} STREQUAL "esp32s2") OR (${IDF_TARGET} STREQUAL "esp32c3") OR (${IDF_TARGET} STREQUAL "esp32c6")) -#Only support TTS on esp32s2, esp32c3 and esp32c6 - -set(requires - spiffs - ) - -IF (IDF_VERSION_MAJOR GREATER 4) - list(APPEND requires esp_partition) -ENDIF (IDF_VERSION_MAJOR GREATER 4) - -idf_component_register(SRCS . - INCLUDE_DIRS esp-tts/esp_tts_chinese/include - REQUIRES ${requires} - PRIV_REQUIRES spi_flash) - -target_link_libraries(${COMPONENT_TARGET} INTERFACE "-L ${CMAKE_CURRENT_SOURCE_DIR}/esp-tts/esp_tts_chinese/${IDF_TARGET}") -add_prebuilt_library(esp_tts_chinese "${CMAKE_CURRENT_SOURCE_DIR}/esp-tts/esp_tts_chinese/${IDF_TARGET}/libesp_tts_chinese.a" PRIV_REQUIRES ${COMPONENT_NAME}) -add_prebuilt_library(voice_set_xiaole "${CMAKE_CURRENT_SOURCE_DIR}/esp-tts/esp_tts_chinese/${IDF_TARGET}/libvoice_set_xiaole.a" PRIV_REQUIRES ${COMPONENT_NAME}) -target_link_libraries(${COMPONENT_TARGET} INTERFACE "-Wl,--start-group" - esp_tts_chinese - voice_set_xiaole - "-Wl,--end-group") + target_link_libraries(${COMPONENT_LIB} PRIVATE esp_tts_chinese) + target_link_libraries(${COMPONENT_LIB} PRIVATE voice_set_xiaole) endif() diff --git a/Kconfig.projbuild b/Kconfig.projbuild index 3de3e02..c10ad0d 100644 --- a/Kconfig.projbuild +++ b/Kconfig.projbuild @@ -54,7 +54,7 @@ endchoice menu "Load Multiple Wake Words" - depends on IDF_TARGET_ESP32C5 || IDF_TARGET_ESP32C3 + depends on IDF_TARGET_ESP32C5 || IDF_TARGET_ESP32C3 || IDF_TARGET_ESP32C6 || IDF_TARGET_ESP32S2 config SR_WN_WN9S_HILEXIN bool "Hi,乐鑫 (wn9s_hilexin)" diff --git a/esp-tts/esp_tts_chinese/esp32c5/libesp_tts_chinese.a b/esp-tts/esp_tts_chinese/esp32c5/libesp_tts_chinese.a new file mode 100644 index 0000000..8677b51 Binary files /dev/null and b/esp-tts/esp_tts_chinese/esp32c5/libesp_tts_chinese.a differ diff --git a/esp-tts/esp_tts_chinese/esp32c5/libvoice_set_xiaole.a b/esp-tts/esp_tts_chinese/esp32c5/libvoice_set_xiaole.a new file mode 100644 index 0000000..4d274fe Binary files /dev/null and b/esp-tts/esp_tts_chinese/esp32c5/libvoice_set_xiaole.a differ diff --git a/include/esp32c3/c_speech_features_config.h b/include/esp32c3/c_speech_features_config.h new file mode 100644 index 0000000..e21e020 --- /dev/null +++ b/include/esp32c3/c_speech_features_config.h @@ -0,0 +1,29 @@ +#pragma once +#include +#include + +/* #undef ENABLE_DOUBLE */ + +#ifdef ENABLE_DOUBLE +# define csf_float double +# define csf_ceil ceil +# define csf_floor floor +# define csf_sin sin +# define csf_log log +# define csf_log10 log10 +# define csf_pow pow +# define csf_sqrt sqrt +# define csf_abs fabs +# define csf_float_min DBL_MIN +#else +# define csf_float float +# define csf_ceil ceilf +# define csf_floor floorf +# define csf_sin sinf +# define csf_log logf +# define csf_log10 log10f +# define csf_pow powf +# define csf_sqrt sqrtf +# define csf_abs fabsf +# define csf_float_min FLT_MIN +#endif diff --git a/include/esp32c3/dl_lib.h b/include/esp32c3/dl_lib.h new file mode 100644 index 0000000..47e7c86 --- /dev/null +++ b/include/esp32c3/dl_lib.h @@ -0,0 +1,418 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_H +#define DL_LIB_H + +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" + +#ifdef ESP_PLATFORM +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/queue.h" +#include "esp_system.h" +#include "esp_heap_caps.h" +#include "sdkconfig.h" +#define DL_SPIRAM_SUPPORT 1 +#endif + +#ifdef CONFIG_IDF_TARGET_ESP32S3 +#include "esp32s3/rom/cache.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int padding_state; + +// /** +// * @brief Allocate a chunk of memory which has the given capabilities. +// * Equivalent semantics to libc malloc(), for capability-aware memory. +// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT). +// * +// * @param size In bytes, of the amount of memory to allocate +// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned +// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM +// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off +// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA +// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation +// * @return Pointer to currently allocated heap memory +// **/ +// void *heap_caps_malloc(size_t size, uint32_t caps); + +/** + * @brief Allocate aligned memory from internal memory or external memory. + * if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM + * else, allocate memory from PSRAM + * + * @param cnt Number of continuing chunks of memory to allocate + * @param size Size, in bytes, of a chunk of memory to allocate + * @param align Aligned size, in bits + * @return Pointer to currently allocated heap memory + */ +void *dl_lib_calloc(int cnt, int size, int align); + +/** + * @brief Always allocate aligned memory from external memory. + * + * @param cnt Number of continuing chunks of memory to allocate + * @param size Size, in bytes, of a chunk of memory to allocate + * @param align Aligned size, in bits + * @return Pointer to currently aligned heap memory + */ +void *dl_lib_calloc_psram(int cnt, int size, int align); + +/** + * @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram` + * + * @param ptr Pointer to free + */ +void dl_lib_free(void *ptr); + +/** + * @brief Does a fast version of the exp() operation on a floating point number. + * + * As described in https://codingforspeed.com/using-faster-exponential-approximation/ + * Should be good til an input of 5 or so with a steps factor of 8. + * + * @param in Floating point input + * @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes. + * @return Exp()'ed output + */ +fptp_t fast_exp(double x, int steps); + +/** + * @brief Does a fast version of the exp() operation on a floating point number. + * + * @param in Floating point input + * @return Exp()'ed output + */ +double fast_exp_pro(double x); + +/** + * @brief Does a softmax operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out); + + +/** + * @brief Does a softmax operation on a quantized matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a sigmoid operation on a floating point number + * + * @param in Floating point input + * @return Sigmoid output + */ + +fptp_t dl_sigmoid_op(fptp_t in); + + +/** + * @brief Does a sigmoid operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out); + +/** + * @brief Does a tanh operation on a floating point number + * + * @param in Floating point input number + * @return Tanh value + */ +fptp_t dl_tanh_op(fptp_t v); + +/** + * @brief Does a tanh operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out); + + +/** + * @brief Does a relu (Rectifier Linear Unit) operation on a floating point number + * + * @param in Floating point input + * @param clip If value is higher than this, it will be clipped to this value + * @return Relu output + */ +fptp_t dl_relu_op(fptp_t in, fptp_t clip); + +/** + * @brief Does a ReLu operation on a matrix. + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out); + +/** + * @brief Fully connected layer operation + * + * @param in Input vector + * @param weight Weights of the neurons + * @param bias Biases for the neurons. Can be NULL if a bias of 0 is required. + * @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix. + */ +void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out); + +/** + * @brief Pre-calculate the sqrtvari variable for the batch_normalize function. + * The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence, + * this matrix only needs to be calculated once. This function does that. + * + * @param + * @return + */ +void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out); + +/** + * @brief Batch-normalize a matrix + * + * @param m The matrix to normalize + * @param offset Offset matrix + * @param scale Scale matrix + * @param mean Mean matrix + * @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar + * @return + */ +void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale, + const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari); + +/** + * @brief Do a basic LSTM layer pass. + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @return Output values of the neurons + */ +dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2d_t *weight, const dl_matrix2d_t *bias); + +/** + * @brief Do a basic LSTM layer pass, partial quantized version. + * This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias. + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons, need to be quantised + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @return Output values of the neurons + */ +dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias); + +/** + * @brief Do a fully-connected layer pass, fully-quantized version. + * + * @param in Input vector + * @param weight Weights of the neurons + * @param bias Bias values of the neurons. Can be NULL if no bias is needed. + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return Output values of the neurons + */ +void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift); + +/** + * @brief Do a basic LSTM layer pass, fully-quantized version + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return Output values of the neurons + */ +dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift); + +/** + * @brief Batch-normalize a matrix, fully-quantized version + * + * @param m The matrix to normalize + * @param offset Offset matrix + * @param scale Scale matrix + * @param mean Mean matrix + * @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return + */ +void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale, + const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift); + +/** + * @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number + * This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @param clip If value is higher than this, it will be clipped to this value + * @return Relu output + */ +qtp_t dl_relu_q_op(qtp_t in, qtp_t clip); + +/** + * @brief Does a ReLu operation on a matrix, quantized version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out); + +/** + * @brief Does a sigmoid operation on a fixed-point number. + * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @return Sigmoid output + */ +int dl_sigmoid_op_q(const int in); +int16_t dl_sigmoid_op_q8(const int16_t in); +/** + * @brief Does a sigmoid operation on a matrix, quantized version + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a tanh operation on a matrix, quantized version + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a tanh operation on a fixed-point number. + * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @return tanh output + */ +int dl_tanh_op_q(int v); +int16_t dl_tanh_op_q8(int16_t v); + +void load_mat_psram_mn4(void); +void load_mat_psram_mn3(void); +void free_mat_psram_mn4(void); +void free_mat_psram_mn3(void); +qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent); +qtp_t dl_hard_tanh_op(qtp_t in, int exponent); + +int16_t dl_table_tanh_op(int16_t in, int exponent); +int16_t dl_table_sigmoid_op(int16_t in, int exponent); + +void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); +void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); +void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + + +/** + * @brief Filter out the number greater than clip in the matrix, quantized version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out); + +/** + * @brief Filter out the number greater than clip in the matrix, float version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out); +/** + * @brief Do a basic CNN layer pass. + * + * @Warning This just supports the single channel input image, and the output is single row matrix. + That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height + * + * @param in Input single channel image + * @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height + * @param bias Bias for the CNN layer. + * @param filter_height The height of convolution kernel + * @param filter_width The width of convolution kernel + * @param out_channels The number of output channels of convolution kernel + * @param stride_x The step length of the convolution window in x(width) direction + * @param stride_y The step length of the convolution window in y(height) direction + * @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME" + * @param out The result of CNN layer, out->h=1. + * @return The result of CNN layer. + */ +dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height, + const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out); + + +/** + * @brief Do a basic CNN layer pass, quantised wersion. + * + * @Warning This just supports the single channel input image, and the output is single row matrix. + That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height + * + * @param in Input single channel image + * @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height, + * @param bias Bias of the neurons. + * @param filter_height The height of convolution kernel + * @param filter_width The width of convolution kernel + * @param out_channels The number of output channels of convolution kernel + * @param stride_x The step length of the convolution window in x(width) direction + * @param stride_y The step length of the convolution window in y(height) direction + * @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME" + * @param out The result of CNN layer, out->h=1 + * @return The result of CNN layer + */ +dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height, + const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c3/dl_lib_coefgetter_if.h b/include/esp32c3/dl_lib_coefgetter_if.h new file mode 100644 index 0000000..a21de8d --- /dev/null +++ b/include/esp32c3/dl_lib_coefgetter_if.h @@ -0,0 +1,80 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_COEFGETTER_IF_H +#define DL_LIB_COEFGETTER_IF_H + +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" +#include "cJSON.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by +//dl_batch_normalize_get_sqrtvar first. +#define COEF_GETTER_HINT_BNVAR (1<<0) + +/* +This struct describes the basic information of model data: +word_num: the number of wake words or speech commands +word_list: the name list of wake words or speech commands +thres_list: the threshold list of wake words or speech commands +info_str: the string used to reflect the version and information of model data + which consist of the architecture of network, the version of model data, wake words and their threshold +*/ +typedef struct { + int word_num; + char **word_list; + int *win_list; + float *thresh_list; + char *info_str; +} model_info_t; + +/* +Alphabet struct describes the basic grapheme or phoneme. +item_num: the number of baisc item(grapheme or phonemr) +items: the list of basic item +*/ +typedef struct { + int item_num; + char **items; +}alphabet_t; + +/* +This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network. +For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the +coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument +to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument +is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the +memory for the returned matrices, when applicable. +*/ +typedef struct { + const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint); + const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint); + const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint); + void (*free_f)(const dl_matrix2d_t *m); + void (*free_q)(const dl_matrix2dq_t *m); + void (*free_q8)(const dl_matrix2dq8_t *m); + const model_info_t* (*getter_info)(void *arg); + const alphabet_t* (*getter_alphabet)(void *arg); + const cJSON* (*getter_config)(void *arg); +} model_coeff_getter_t; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c3/dl_lib_conv_queue.h b/include/esp32c3/dl_lib_conv_queue.h new file mode 100644 index 0000000..7cb9bf9 --- /dev/null +++ b/include/esp32c3/dl_lib_conv_queue.h @@ -0,0 +1,180 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONV_QUEUE_H +#define DL_LIB_CONV_QUEUE_H + + +#include "dl_lib_matrix.h" +#ifdef __cplusplus +extern "C" { +#endif + +typedef float fptp_t; + +//Flags for matrices +// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */ + +//Float convolution FIFO queue. +typedef struct { + int n; /*< the length of queue */ + int c; /*< the channel number of queue element*/ + int front; /*< the front(top) position of queue */ + int flag; /*< not used*/ + fptp_t *item; /*< Pointer to item array */ +} dl_conv_queue_t; + +/** + * @brief Allocate a convolution queue + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_conv_queue_t *dl_conv_queue_alloc(int n, int c); + +/** + * @brief Allocate a convolution queue from psram + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c); + +/** + * @brief Free a convolution queue + * + * @param cq The convolution queue to free + */ +void dl_conv_queue_free(dl_conv_queue_t *cq); + +void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input convolution queue + * @return Pointer of oldest element + */ +fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq); + +/** + * @brief Remove the oldest element, then insert the input element at the end of queue + * + * @param cq Input convolution queue + * @param item The new element + */ +void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item); + + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a sigmoid operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a tanh operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a tanh operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a softmax operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a softmax operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset); + +fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset); +fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset); +dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b); +dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2d_t *weight, const dl_matrix2d_t *bias); +/** + * @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is first element of output queue and should not be freed separately. + * + * @param in Input convolution queue + * @param out Output convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @return The result of atrous convolution + */ +fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size, + dl_matrix2d_t* kernel, dl_matrix2d_t* bias); +fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size, + dl_matrix2d_t* kernel, dl_matrix2d_t* bias); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is first element of output queue and should not be freed separately. + * + * @param in Input convolution queue + * @param out Output convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @return The result of dilation layer + */ +fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size, + dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias, + dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias); + + +void test_atrous_conv(int size, int rate, int in_channel, int out_channel); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c3/dl_lib_convq8_queue.h b/include/esp32c3/dl_lib_convq8_queue.h new file mode 100644 index 0000000..28c5da7 --- /dev/null +++ b/include/esp32c3/dl_lib_convq8_queue.h @@ -0,0 +1,303 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONVQ8_QUEUE_H +#define DL_LIB_CONVQ8_QUEUE_H + + +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" +#include "dl_lib_conv_queue.h" +#include "dl_lib_convq_queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//[nch, n, c] +typedef struct { + int n; /*< the length of queue */ + int c; /*< the number of queue element*/ + int front; /*< the front(top) position of queue */ + int nch; /*< the channel of queue */ + int exponent; /*< The values in items should be multiplied by pow(2,exponent) + to get the real values */ + q8tp_t *itemq; /*< Pointer to item array */ +} dl_convq8_queue_t; + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c); + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param c The channel of queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch); + +/** + * @brief Allocate a bit fixed-point convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch); + +/** + * @brief Free a fixed-point convolution queue + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq8_queue_free(dl_convq8_queue_t *cq); + +/** + * @brief Set itemq of convolution queue to 0 + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input fixed-point convolution queue + * @return Pointer of oldest element + */ +q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq); +q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n); + +/** + * @brief Insert the float-point element at the end of queue. + * The precision of fixed-point numbers is described by the Qm.f notation, + * + * @param cq Input fixed-point convolution queue + * @param item The float-point element + * @param m_bit The number of integer bits including the sign bits + * @param f_bit The number of fractional bits + */ +void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param ch Channel index of queue + * @return Pointer of the element + */ +q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel Kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + * @return The result of atrous convolution + */ +void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias, + int out_exponent, int offset, int prenum); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + * @return The result of dilation layer + */ +void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias, + int offset, int prenum); + + + + +dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2); + +int8_t dl_sigmoid_lutq8(int in); +/** + * @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch  The channel number + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch); + +/** + * @brief Free a 8-bit fixed-point Multi-Channel convolution queue + * + * @param cqm The fixed-point convolution queue to free + * @param nch The channel number + */ +void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch); + +/** + * @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input + * + * @param cqm Input 8-bit fixed-point Multi-Channel convolution queue + * @param offset Offset used to calculate the beginning of input conv queue + * @param nch The channel number + */ +void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch); + +/** + * @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * Usually, this layer is used as first layer for 8-bit network. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * Input is a 16-bit queue point, Output is an 8-bit queue point. + * + * @param in Input 16bit fixed-point convolution queue array + * @param out Output 8bit fixed-point convolution queue array + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Exponent of output + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum); + +/** + * @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input 8bit fixed-point convolution queue array + * @param out Output 8bit fixed-point convolution queue array + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Exponent of output + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, + int nch, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias, + int out_exponent, int offset, int prenum); + +/** + * @brief Fast implement of 8-bit dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input 8-bit fixed-point convolution queue + * @param out Output 8-bit fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias, + int offset, int prenum); + +void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch); + + + +dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c); + +qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + + +qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + +dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c, + dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight, + const dl_matrix2dq_t *bias, int prenum); + +qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum); + +void print_convq8(dl_convq8_queue_t *cq, int offset); +void print_convq(dl_convq_queue_t *cq, int offset); +void dl_relu_convq8(dl_convq8_queue_t *cq); + +void lstmq8_free(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c3/dl_lib_convq_queue.h b/include/esp32c3/dl_lib_convq_queue.h new file mode 100644 index 0000000..ff190fe --- /dev/null +++ b/include/esp32c3/dl_lib_convq_queue.h @@ -0,0 +1,382 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONVQ_QUEUE_H +#define DL_LIB_CONVQ_QUEUE_H + +#include "dl_lib_matrixq.h" +#include "dl_lib_conv_queue.h" +#include "dl_lib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//fixed-point convolution FIFO queue. +//[nch, n, c] +typedef struct { + int n; /*< the length of queue */ + int c; /*< the number of queue element*/ + int front; /*< the front(top) position of queue */ + int nch; /*< the multiple of queue*/ + int exponent; /*< The values in items should be multiplied by pow(2,exponent) + to get the real values */ + qtp_t *itemq; /*< Pointer to item array */ +} dl_convq_queue_t; + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc(int n, int c); + +/** + * @brief Allocate a fixed-point convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c); + +/** + * @brief Allocate a fixed-point multi-channel convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of conv queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch); + +/** + * @brief Allocate a fixed-point multi-channel convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of conv queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch); + + +void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row); + +/** + * @brief Free a fixed-point convolution queue + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq_queue_free(dl_convq_queue_t *cq); + +/** + * @brief Set itemq of convolution queue to 0 + * + * @param cq The fixed-point convolution queue point + */ +void dl_convq_queue_bzero(dl_convq_queue_t *cq); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input fixed-point convolution queue + * @return Pointer of oldest element + */ +qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq); +qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n); +/** + * @brief Remove the oldest element, then insert the input element at the end of queue + * + * @param cq Input fixed-point convolution queue + * @param item The new element + */ +void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift); + +/** + * @brief Insert the float-point element at the end of queue. + * The precision of fixed-point numbers is described by the Qm.f notation, + * + * @param cq Input fixed-point convolution queue + * @param item The float-point element + * @param m_bit The number of integer bits including the sign bits + * @param f_bit The number of fractional bits + */ +void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param last_num Offset from the front of the queue + * @return Pointer of the element + */ +qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param ch Channel index of convolution queue + * @return Pointer of the element + */ +qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch); + +/** + * @brief Does a tanh operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * tanh operation by this pointer, then return the pointer + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +void dl_tanh_convq(dl_convq_queue_t *cq, int offset); + +/** + * @brief Does a tanh operation on the one of element in multi channel convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * tanh operation by this pointer, then return the pointer + * + * @param cq Input fixed-point multi channnel convolution queue + * @param offset Offset from the front of the queue + * @param nch The channel number of cqm + * @return Pointer of the element + */ +void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch); + +/** + * @brief Does a relu operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * relu operation by this pointer, then return the pointer + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num); + +/** + * @brief Does a softmax operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, input data + stay as it is. Results are saved into the *out* array. + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param out Old array to re-use. Passing NULL will allocate a new matrix. + * @return softmax results + */ +fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param shift Shift ratio used in dot operation between two 16-bit fixed point vector + * @return The result of atrous convolution + */ +qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector + * @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector + * @return The result of dilation layer + */ +qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, + int filter_shift, int gate_shift, int offset, int prenum); + + +qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, + int filter_shift, int gate_shift, int prenum); + +qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + + +qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum); + +/** + * @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue + * + * @param cq1 First fixed-point convolution queue + * @param cq2 Seconf fixed-point convolution queue + * @return The result of float-point convolution queue + */ +dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2); + +/** + * @brief Fast implement of LSTM layer by dl_atrous_conv1dq function + * + * @Warning LSTM kernel is split into two part, the first part input is the last layer output, + * and kernel is parameter *in_weight*. The second part input is the last frame LSTM output, + * the kernel is parameters *h_weight*. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param in_weight the LSTM kernel needed by first part + * @param h_weight the LSTM kernel needed by second part + * @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required. + * @in_shift Shift ratio used in first part + * @h_shift Shift ratio used in second part + * @return The result of LSTM layer + */ +dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c, + dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight, + const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum); +dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift); + +dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c, + dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight, + dl_matrix2dq_t *bias, int prenum); + +/** + * @brief Allocate a fixed-point multi channel convolution queue + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @param nch the channel numbet of convolution queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch); + +/** + * @brief Free a fixed-point multi channel convolution queue + * + * @param cqm The fixed-point convolution queue to free + * @param nch The channel number of cqm + */ +void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param nch The channel number of input + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param shift Shift ratio used in dot operation between two 16-bit fixed point vector + * @param offset the offset to calculate input convq + * @param prenum the preload size, 0: do not use preload function + * @return The result of atrous convolution + */ +qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in, + dl_convq_queue_t **out, + int nch, + int rate, + int size, + dl_matrix2dq_t* kernel, + dl_matrix2dq_t* bias, + int shift, + int offset, + int prenum); + +/** + * @brief Fast implement of dilation layer as follows for multi channel input + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param nch The channel number of input + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector + * @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector + * @param offset The offset to calculate input convq + * @param prenum The preload size, 0: do not use preload function + * @return The result of dilation layer + */ +qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in, + dl_convq_queue_t **out, + int nch, + int rate, + int size, + dl_matrix2dq_t* filter_kernel, + dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, + dl_matrix2dq_t* gate_bias, + int filter_shift, + int gate_shift, + int offset, + int prenum); + +void test_atrous_convq(int size, int rate, int in_channel, int out_channel); +void test_lstm_convq(int size, int in_dim, int lstm_cell); +void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch); +void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch); +void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c3/dl_lib_matrix.h b/include/esp32c3/dl_lib_matrix.h new file mode 100644 index 0000000..59f7d79 --- /dev/null +++ b/include/esp32c3/dl_lib_matrix.h @@ -0,0 +1,257 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIX_H +#define DL_LIB_MATRIX_H + +#ifdef ESP_PLATFORM +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/queue.h" +#include "esp_system.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef float fptp_t; + +#if CONFIG_BT_SHARE_MEM_REUSE +extern multi_heap_handle_t gst_heap; +#endif + +//Flags for matrices +#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */ +#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */ + +//'Normal' float matrix +typedef struct { + int w; /*< Width */ + int h; /*< Height */ + int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */ + int flags; /*< Flags. OR of DL_MF_* values */ + fptp_t *item; /*< Pointer to item array */ +} dl_matrix2d_t; + +//Macro to quickly access the raw items in a matrix +#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride] + + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_alloc(int w, int h); + + +/** + * @brief Free a matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrix_free(dl_matrix2d_t *m); + +/** + * @brief Zero out the matrix + * Sets all entries in the matrix to 0. + * + * @param m Matrix to zero + */ +void dl_matrix_zero(dl_matrix2d_t *m); + +/** + * @brief Copy the matrix into psram + * Copy the matrix from flash or iram/psram into psram + * + * @param m Matrix to zero + */ +dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m); + +/** + * @brief Generate a new matrix using a range of items from an existing matrix. + * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer + * to the existing data. Changing the data in the resulting matrix, as a result, will also change + * the data in the existing matrix that has been sliced. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. + * @return The resulting slice matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in); + +/** + * @brief select a range of items from an existing matrix and flatten them into one dimension. + * + * @Warning The results are flattened in row-major order. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. + * @return The resulting flatten matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in); + +/** + * @brief Generate a matrix from existing floating-point data + * + * @param w Width of resulting matrix + * @param h Height of resulting matrix + * @param data Data to populate matrix with + * @return A newaly allocated matrix populated with the given input data, or NULL if out of memory. + */ +dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data); + + +/** + * @brief Multiply a pair of matrices item-by-item: res=a*b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Multiplicated data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of two matrices : res=a.b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res); + +/** + * @brief Add a pair of matrices item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Added data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + + +/** + * @brief Divide a pair of matrices item-by-item: res=a/b + * + * @param a First matrix + * @param b Second matrix + * @param res Divided data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + +/** + * @brief Subtract a matrix from another, item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Subtracted data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + +/** + * @brief Add a constant to every item of the matrix + * + * @param subj Matrix to add the constant to + * @param add The constant + */ +void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add); + + +/** + * @brief Concatenate the rows of two matrices into a new matrix + * + * @param a First matrix + * @param b Second matrix + * @return A newly allocated array with as avlues a|b + */ +dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + +dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b); + +/** + * @brief Print the contents of a matrix to stdout. Used for debugging. + * + * @param a The matrix to print. + */ +void dl_printmatrix(const dl_matrix2d_t *a); + +/** + * @brief Return the average square error given a correct and a test matrix. + * + * ...Well, more or less. If anything, it gives an indication of the error between + * the two. Check the code for the exact implementation. + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return value indicating the relative difference between matrices + */ +float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + + +/** + * @brief Check if two matrices have the same shape, that is, the same amount of rows and columns + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return true if the two matrices are shaped the same, false otherwise. + */ +int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + +/** + * @brief Get a specific item from the matrix + * + * Please use these for external matrix access instead of DL_ITM + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @return Value in that position + */ +inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) { + return DL_ITM(m, x, y); +} + +/** + * @brief Set a specific item in the matrix to the given value + * + * Please use these for external matrix access instead of DL_ITM + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @param val Value to write to that position + */ +inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) { + DL_ITM(m, x, y)=val; +} + +void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/include/esp32c3/dl_lib_matrixq.h b/include/esp32c3/dl_lib_matrixq.h new file mode 100644 index 0000000..8ad397b --- /dev/null +++ b/include/esp32c3/dl_lib_matrixq.h @@ -0,0 +1,387 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIXQ_H +#define DL_LIB_MATRIXQ_H + +#include +#include "dl_lib_matrix.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int16_t qtp_t; + +//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted +//for easy use as a multiplicand without stressing out the flash cache too much. +typedef struct { + int w; + int h; + int stride; //Normally equals h, not w! + int flags; + int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values. + qtp_t *itemq; +} dl_matrix2dq_t; + +#define DL_QTP_SHIFT 15 +#define DL_QTP_RANGE ((1<itemq[(y)+(x)*m->stride] +#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null + +#define DL_SHIFT_AUTO 32 + +/** + * @info About quantized matrices and shift values + * + * Grab a coffee (or tea, or hot water) and sit down when you read this for the first + * time. Quantized matrices can speed up your operations, but come with some quirks, and + * it's good to understand how they work before using them. + * + * The data in the quantized matrix type is stored similarily to floating-point types: + * when storing a real value, the value is stored as a mantissa (base number) and an + * exponent. The 'real' value that can be re-derived from those two numbers is something + * similar to mantissa*2^exponent. Up to this point, there's not that much difference from + * the standard floating point implementations like e.g. IEEE-754. + * + * The difference with respect to quantized matrices is that for a quantized matrix, it is + * assumed all values stored have more-or-less the same order of magnitude. This allows the + * matrix to only store all the mantissas, while the exponents are shared; there is only one + * exponent for the entire matrix. This makes it quicker to handle matrix operations - the + * logic to fix the exponents only needs to happen once, while the rest can be done in simple + * integer arithmetic. It also nets us some memory savings - while normally a floating point + * number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the + * memory requirements. + * + * While most of the details of handling the intricacies of the quantized matrixes are done + * transparently by the code in dl_lib_matrixq.c, some implementation details leak out, + * specifically in places where addition/subtraction/division happens. + * + * The problem is that the routines do not know what the size of the resulting operation is. For + * instance, when adding two matrices of numbers, the resulting numbers *could* be large enough + * to overflow the mantissa of the result if the exponent is the same. However, if by default we + * assume the mantissas needs to be scaled back, we may lose precision. + * + * In order to counter this, all operations that have this issue have a ``shift`` argument. If + * the argument is zero, the routine will be conservative, that is, increase the exponent of + * the result to such an extent it's mathematically impossible a value in the result will exceed + * the maximum value that can be stored. However, when this argument is larger than zero, the + * algorithm will hold back on this scaling by the indicated amount of bits, preserving precision + * but increasing the chance of some of the calculated values not fitting in the mantissa anymore. + * If this happens, the value will be clipped to the largest (or, for negative values, smallest) + * value possible. (Neural networks usually are okay with this happening for a limited amount + * of matrix indices). + * + * For deciding on these shift values, it is recommended to start with a shift value of one, then + * use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value. + * If it indicates bits are under-used, increase it. Note that for adding and subtraction, only + * shift values of 0 or 1 make sense; these routines will error out if you try to do something + * else. + * + * For neural networks and other noise-tolerant applications, note that even when + * dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead + * to slightly improved precision. Feel free to experiment. + **/ + + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_alloc(int w, int h); +dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h); +/** + * @brief Convert a floating-point matrix to a quantized matrix + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + */ +dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out); + +/** + * TODO: DESCRIBE THIS FUNCTION + */ +dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit); + + +/** + * @brief Convert a quantized matrix to a floating-point one. + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + **/ +dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out); + + +/** + * @brief Free a quantized matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrixq_free(dl_matrix2dq_t *m); + +/** + * @brief Zero out the matrix + * Sets all entries in the matrix to 0. + * + * @param m Matrix to zero + */ +void dl_matrixq_zero(dl_matrix2dq_t *m); + +/** + * @brief Copy the matrix into psram + * Copy the matrix from flash or iram/psram into psram + * + * @param m Matrix to copy + */ +dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + * @param shift Shift ratio + */ +void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. + * + * Result is a fixed-point matrix. + * + * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be + * much slower than dl_matrixq_dot . + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + * @param shift Shift ratio + */ +void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. + * + * Result is a floating-point matrix. + * + * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be + * much slower than dl_matrixq_dot_matrix_out. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix. + * + * @param a First multiplicand; float matrix + * @param b Second multiplicand; quantized matrix + * @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b! + */ +void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + + +/** + * @brief Print the contents of a quantized matrix to stdout. Used for debugging. + * + * @param a The matrix to print. + */ +void dl_printmatrixq(const dl_matrix2dq_t *a); + + +/** + * @brief Add a pair of quantizedmatrices item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Added data. Can be equal to a or b to overwrite that. + * @param shift Shift value. Only 0 or 1 makes sense here. + */ +void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Generate a new matrix using a range of items from an existing matrix. + * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer + * to the existing data. Changing the data in the resulting matrix, as a result, will also change + * the data in the existing matrix that has been sliced. + * + * @Warning In contrast to the floating point equivalent of this function, the fixed-point version + * of this has the issue that as soon as the output exponent of one of the slices changes, the data + * in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you + * use this function, either treat the slices as read-only, or assume the sliced matrix contains + * garbage after modifying the data in one of the slices. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. + * @return The resulting slice matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); + +/** + * @brief select a range of items from an existing matrix and flatten them into one dimension. + * + * @Warning The results are flattened in row-major order. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. + * @return The resulting flatten matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); + +/** + * @brief Subtract a quantized matrix from another, item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Subtracted data. Can be equal to a or b to overwrite that. + * @param shift Shift value. Only 0 or 1 makes sense here. + */ +void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Multiply a pair of quantized matrices item-by-item: res=a*b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Multiplicated data. Can be equal to a or b to overwrite that matrix. + */ +void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res); + +/** + * @brief Divide a pair of quantized matrices item-by-item: res=a/b + * + * @param a First matrix + * @param b Second matrix + * @param res Divided data. Can be equal to a or b to overwrite that. + */ +void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift); + +/** + * @brief Check if two quantized matrices have the same shape, that is, the same amount of + * rows and columns + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return true if the two matrices are shaped the same, false otherwise. + */ +int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); + +/** + * @brief Concatenate the rows of two quantized matrices into a new matrix + * + * @param a First matrix + * @param b Second matrix + * @return A newly allocated quantized matrix with as values a|b + */ +dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); + +/** + * @brief Add a constant to every item of the quantized matrix + * + * @param subj Matrix to add the constant to + * @param add The constant + */ +void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift); + +/** + * @brief Check the sanity of a quantized matrix + * + * Due to the nature of quantized matrices, depending on the calculations a quantized + * matrix is the result of and the shift values chosen in those calculations, a quantized + * matrix may have an exponent and mantissas that lead to a loss of precision, either because + * most significant mantissa bits are unused, or because a fair amount of mantissas are + * clipped. This function checks if this is the case and will report a message to stdout + * if significant loss of precision is detected. + * + * @param m The quantized matrix to check + * @param name A string to be displayed in the message if the sanity check fails + * @return True if matrix is sane, false otherwise + **/ + +int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name); + +/** + * @brief re-adjust the exponent of the matrix to fit the mantissa better + * + * This function will shift up all the data in the mantissas so there are no + * most-significant bits that are unused in all mantissas. It will also adjust + * the exponent to keep the actua values in the matrix the same. + * + * Some operations done on a matrix, especially operations that re-use the + * result of earlier operations done in the same way, can lead to the loss of + * data because the exponent of the quantized matrix is never re-adjusted. You + * can do that implicitely by calling this function. + * + * @param m The matrix to re-adjust +**/ +void dl_matrixq_readjust_exp(dl_matrix2dq_t *m); + + + +/** + * @brief Get the floating-point value of a specific item from the quantized matrix + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @return Value in that position + */ +fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y); + +/** + * @brief Set a specific item in the quantized matrix to the given + * floating-point value + * + * @warning If the given value is more than the exponent in the quantized matrix + * allows for, all mantissas in the matrix will be shifted down to make the value + * 'fit'. If, however, the exponent is such that the value would result in a + * quantized mantissa of 0, nothing is done. + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @param val Value to write to that position + */ +void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c3/dl_lib_matrixq8.h b/include/esp32c3/dl_lib_matrixq8.h new file mode 100644 index 0000000..377df7c --- /dev/null +++ b/include/esp32c3/dl_lib_matrixq8.h @@ -0,0 +1,80 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIXQ8_H +#define DL_LIB_MATRIXQ8_H + +#include +#include "dl_lib_matrix.h" +#include "dl_lib.h" +#include "dl_lib_matrixq.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int8_t q8tp_t; + +typedef struct { + int w; + int h; + int stride; //Normally equals h, not w! + int flags; + int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values. + q8tp_t *itemq; +} dl_matrix2dq8_t; + +#define DL_Q8TP_SHIFT 7 +#define DL_Q8TP_RANGE ((1<itemq[(y)+(x)*m->stride] + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h); + +/** + * @brief Free a quantized matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrixq8_free(dl_matrix2dq8_t *m); + +/** + * @brief Copy a quantized matrix + * Copy a quantized matrix from flash or iram/psram + * + * @param m Matrix to copy + */ +dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m); + +/** + * @brief Convert a floating-point matrix to a quantized matrix + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + */ +dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/include/esp32c3/esp_aec.h b/include/esp32c3/esp_aec.h new file mode 100644 index 0000000..36de9c1 --- /dev/null +++ b/include/esp32c3/esp_aec.h @@ -0,0 +1,105 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License +#ifndef _ESP_AEC_H_ +#define _ESP_AEC_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define USE_AEC_FFT // Not kiss_fft +#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz +#define AEC_FRAME_LENGTH_MS 32 + +typedef struct aec_handle_t aec_handle_t; +typedef enum { + AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition + AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition + AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication + AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication +} aec_mode_t; + +/** + * @brief Creates an instance to the AEC structure. + * Please get frame size by aec_get_chunksize() function + * + * @param sample_rate The Sampling frequency (Hz) must be 16000. + * @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption. + * @param channel_num The input microphone channel number + * @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST + * @return + * - NULL: Create failed + * - Others: The instance of AEC + */ +aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode); + +/** + * @brief Creates an instance to the AEC structure, same with aec_create(). + * + * @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption. + * @param channel_num The input microphone channel number + * @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST + * @return + * - NULL: Create failed + * - Others: The instance of AEC + */ +aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode); + +/** + * @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic. + * + * @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc(). + * + * @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..." + * @param indata An array of 16-bit signed audio samples from mic. + * @param refdata An array of 16-bit signed audio samples sent to the speaker. + * @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..." + * @return None + * + */ +void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata); + +/** + * @brief Get frame size of AEC (the samples of one frame) + * @param handle The instance of AEC. + * @return Frame size + */ +int aec_get_chunksize(const aec_handle_t *handle); + +/** + * @brief Get AEC mode string + * + * @param aec_mode The mode of AEC. + * + * @return AEC mode string + */ +char * aec_get_mode_string(aec_mode_t aec_mode); + +/** + * @brief Free the AEC instance + * + * @param inst The instance of AEC. + * + * @return None + * + */ +void aec_destroy(aec_handle_t *handel); + +#ifdef __cplusplus +} +#endif + +#endif //_ESP_AEC_H_ diff --git a/include/esp32c3/esp_afe_aec.h b/include/esp32c3/esp_afe_aec.h new file mode 100644 index 0000000..9d60588 --- /dev/null +++ b/include/esp32c3/esp_afe_aec.h @@ -0,0 +1,82 @@ + +#ifndef _ESP_AFE_AEC_H_ +#define _ESP_AFE_AEC_H_ + + +#include "esp_afe_config.h" +#include "esp_aec.h" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + aec_handle_t* handle; + aec_mode_t mode; + afe_pcm_config_t pcm_config; + int frame_size; + int16_t *data; +}afe_aec_handle_t; + + +/** + * @brief Creates an instance to the AEC structure. + * + * @warning Currently only support 1 microphone channel and 1 playback channe. + * If input has multiple microphone channels and playback channels, just the first microphone channel and playback channel will be selected. + * + * The input format, same as afe config: + * M to represent the microphone channel + * R to represent the playback reference channel + * N to represent an unknown or unused channel + * + * For example, input_format="MMNR" indicates that the input data consists of four channels, + * which are the microphone channel, the microphone channel, an unused channel, and the playback channel + * + * @param input_format The input format + * @param filter_length The length of filter. The larger the filter, the higher the CPU loading. + * Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for esp32c5. + * @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC + * @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF + * + * @return afe_config_t* The default config of afe + */ +afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode); + + +/** + * @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic. + * + * @param inst The instance of AEC. + * @param indata Input audio data, format is define by input_format. Note indata will be modified in function call. + * @param outdata Returns near-end signal with echo removed. + + * @return The bytes of outdata. + */ +size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outdata); + +/** + * @brief Get frame size of AEC (the samples of one frame) + * @param handle The instance of AEC. + * @return Frame size + */ +int afe_aec_get_chunksize(afe_aec_handle_t *handle); + + +/** + * @brief Free the AEC instance + * + * @param inst The instance of AEC. + * + * @return None + * + */ +void afe_aec_destroy(afe_aec_handle_t *handel); + +#ifdef __cplusplus +} +#endif + +#endif //_ESP_AEC_H_ diff --git a/include/esp32c3/esp_afe_config.h b/include/esp32c3/esp_afe_config.h new file mode 100644 index 0000000..f9de6fe --- /dev/null +++ b/include/esp32c3/esp_afe_config.h @@ -0,0 +1,69 @@ +#pragma once +#include "esp_aec.h" +#include "stdbool.h" +#include "stdint.h" +#include "stdlib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// AFE: Audio Front-End +// SR: Speech Recognition +// VC: Voice Communication + +// Set AFE_SR mode +typedef enum { + SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode + SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode +} afe_sr_mode_t; + +// Set AFE mode +typedef enum { + AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode + AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode +} afe_mode_t; + +// Set AFE type +typedef enum { + AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression + AFE_TYPE_VC = 1, // Voice communication scenarios, including nonlinear noise suppression +} afe_type_t; + +typedef enum { + AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram + AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance + AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram +} afe_memory_alloc_mode_t; + +typedef enum { + AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB + AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB + AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB + AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain +} afe_mn_peak_agc_mode_t; + +typedef struct { + int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel + int mic_num; // microphone channel number + uint8_t *mic_ids; // microphone channel indices + int ref_num; // playback reference channel number + uint8_t *ref_ids; // playback reference channel indices + int sample_rate; // sample rate of audio +} afe_pcm_config_t; + +typedef enum { + AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC" + AFE_NS_MODE_NET = 1, // please use model name of NSNET +} afe_ns_mode_t; + +typedef enum { + AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC + AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated +} afe_agc_mode_t; + + +#ifdef __cplusplus +} +#endif + diff --git a/include/esp32c3/esp_mfcc_fbank_int16.h b/include/esp32c3/esp_mfcc_fbank_int16.h new file mode 100644 index 0000000..22a5f2c --- /dev/null +++ b/include/esp32c3/esp_mfcc_fbank_int16.h @@ -0,0 +1,86 @@ +#pragma once +#include "esp_speech_features.h" +#include + +/* +This describes an interface for a MFCC runner, that is, some kind of implementation that can be +fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so +multiple implementations can be used. +*/ + +typedef struct esp_mfcc_data_t esp_mfcc_data_t; + +// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features), +// please refer to its documentation for details. +typedef struct { + int winstep_ms; // The step between successive windows in ms. (10) + int winlen_ms; // The length of the analysis window in ms. (25) + int nch; // The number of input channel + int numcep; // The number of cepstrum to return + int nfilter; // The number of filters in the filterbank + int nfft; // The FFT size + int samp_freq; // The sample-rate of the signal. + int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0) + int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq + float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97) + char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey" + bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy + bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum + int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon)) + float log_epsilon; // log epsilon. (e.g. 1e-7) + bool psram_first; // Alloc memory from PSRAM first + bool remove_dc_offset; // Whether to subtract mean of wave before FFT +} esp_mfcc_opts_t; + +/** + * @brief Un-initialize and free a mfcc runner + * + * Function to free a previously allocated mfcc runner. + * + * @param r Runner object to destroy + */ +typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r); + +/** + * @brief Initialize parameters for a mfcc runner. + * + * After creation, a mfcc runner needs to be initialized first; this is usually done + * in the initialization routine of a speech recognition algorithm. This provides + * a pointer to do this for a specific mfcc runner. + * + * @param opt Options for the mfcc process + * @return True if success, false on error. + */ +typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt); + +/** + * @brief Run a mfcc iteration on frame by frame + * + * This will take a set of samples and return a ceptrum. Note that this may be pipelined: + * an initial call to this function may return NULL and subsequent calls may return the + * cepstrum of previous calls. + * + * @param r The mfcc runner + * @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000). + * @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function + * when done with this buffer. Note that some implementations require the buffer to be freed before another call + * to this function is done. + */ +typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch); + +/** + * @brief Clean all state of mfcc handle + * + * @param r The mfcc runner + */ +typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r); + +/** + * @brief Operations possible on a mfcc runner + */ +typedef struct { + esp_mfcc_op_destroy_t destroy; + esp_mfcc_op_create_t create; + esp_mfcc_op_run_step_t run_step; + esp_mfcc_op_clean_t clean; +} esp_mfcc_iface_t; diff --git a/include/esp32c3/esp_mfcc_iface.h b/include/esp32c3/esp_mfcc_iface.h new file mode 100644 index 0000000..0257768 --- /dev/null +++ b/include/esp32c3/esp_mfcc_iface.h @@ -0,0 +1,89 @@ +#pragma once +#include "esp_speech_features.h" +#include + +/* +This describes an interface for a MFCC runner, that is, some kind of implementation that can be +fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so +multiple implementations can be used. +*/ + +typedef struct esp_mfcc_data_t esp_mfcc_data_t; + +// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features), +// please refer to its documentation for details. +typedef struct { + int winstep_ms; // The step between successive windows in ms. (10) + int winlen_ms; // The length of the analysis window in ms. (25) + int nch; // The number of input channel + int numcep; // The number of cepstrum to return + int nfilter; // The number of filters in the filterbank + int nfft; // The FFT size + int samp_freq; // The sample-rate of the signal. + int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0) + int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq + float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97) + char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey" + bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy + bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum + int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon)) + float log_epsilon; // log epsilon. (e.g. 1e-7) + bool psram_first; // Alloc memory from PSRAM first + bool remove_dc_offset; // Whether to subtract mean of wave before FFT +} esp_mfcc_opts_t; + +/** + * @brief Un-initialize and free a mfcc runner + * + * Function to free a previously allocated mfcc runner. + * + * @param r Runner object to destroy + */ +typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r); + +/** + * @brief Initialize parameters for a mfcc runner. + * + * After creation, a mfcc runner needs to be initialized first; this is usually done + * in the initialization routine of a speech recognition algorithm. This provides + * a pointer to do this for a specific mfcc runner. + * + * @param opt Options for the mfcc process + * @return True if success, false on error. + */ +typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt); + +/** + * @brief Run a mfcc iteration on frame by frame + * + * This will take a set of samples and return a ceptrum. Note that this may be pipelined: + * an initial call to this function may return NULL and subsequent calls may return the + * cepstrum of previous calls. + * + * @param r The mfcc runner + * @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000). + * @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function + * when done with this buffer. Note that some implementations require the buffer to be freed before another call + * to this function is done. + */ +typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch); + +typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank); + +/** + * @brief Clean all state of mfcc handle + * + * @param r The mfcc runner + */ +typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r); + +/** + * @brief Operations possible on a mfcc runner + */ +typedef struct { + esp_mfcc_op_destroy_t destroy; + esp_mfcc_op_create_t create; + esp_mfcc_op_run_step_t run_step; + esp_mfcc_op_run_step_s16_t run_step_s16; + esp_mfcc_op_clean_t clean; +} esp_mfcc_iface_t; diff --git a/include/esp32c3/esp_mfcc_models.h b/include/esp32c3/esp_mfcc_models.h new file mode 100644 index 0000000..44086e8 --- /dev/null +++ b/include/esp32c3/esp_mfcc_models.h @@ -0,0 +1,44 @@ +#pragma once +#include "esp_mfcc_iface.h" + +extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle +extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle + +/** + * @brief Return basic opts used in wakenet9 & multinet5 + **/ +esp_mfcc_opts_t *get_mfcc_opts_wn9(); + +/** + * @brief Return basic opts used in wakenet9s + **/ +esp_mfcc_opts_t *get_mfcc_opts_wn9s16(); + +/** + * @brief Return basic opts for default kaldifeat + * + opts->psram_first = true; + opts->use_power = true; + opts->use_log_fbank = 2; // log(max(x, log_epsilon)) + opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps + opts->win_type = "povey"; + opts->low_freq = 20; + opts->high_freq = 7600; + opts->samp_freq = 16000; + opts->nch = 1; + opts->nfft = 512; + opts->nfilter = 80; + opts->numcep = 80; + opts->preemph = 0.97; + opts->append_energy = false; + opts->winlen_ms = 25; + opts->winstep_ms = 10; + opts->remove_dc_offset = true; + * + **/ +esp_mfcc_opts_t *get_mfcc_opts_kaldi(); + +/** + * @brief Print mfcc opts + **/ +void print_mfcc_opts(esp_mfcc_opts_t *opts); diff --git a/include/esp32c3/esp_speech_features.h b/include/esp32c3/esp_speech_features.h new file mode 100644 index 0000000..c1659f9 --- /dev/null +++ b/include/esp32c3/esp_speech_features.h @@ -0,0 +1,62 @@ +#pragma once +#include "c_speech_features_config.h" +#include "stdlib.h" +#include +#include + +#ifndef M_2PI +#define M_2PI 6.283185307179586476925286766559005 +#endif + +typedef struct { + float *coeff; + int *bank_pos; + int nfilter; +} esp_mel_filter_t; + +float *esp_mfcc_malloc(size_t size, bool from_psram); + +void esp_mfcc_free(void *ptr); + +/** + * @brief Initialize FFT table + * @warning For ESP-PLATFORM, use esp-dsp fft + * For Other platform, use kiss fft + * + * @param nfft The input samples number + * @return fft-table + **/ +void *esp_fft_init(int nfft); + +/** + * @brief Free FFT table + * @warning For ESP-PLATFORM, use esp-dsp fft + * For Other platform, use kiss fft + * + * @param fft_table The fft table initialized by esp_fft_init + * @param nfft The input samples number + * @return fft-table + **/ +void esp_fft_deinit(void *fft_table, int nfft); + +/** + * @brief Initial window function + * Currently support hanning, hamming, sine, povey, rectangular, + * wn9(512-hanning to get wakenet9& multinet5 compatible) + **/ +float *esp_win_func_init(char *win_type, float *window_data, int frame_length); + +float *esp_fftr(float *x, int nfft, void *fft_table); + +float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_table); + +void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc); + +float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last); + +esp_mel_filter_t *esp_mel_filter_init( + int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram); + +void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter); + +float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon); diff --git a/include/esp32c3/esp_wn_iface.h b/include/esp32c3/esp_wn_iface.h new file mode 100644 index 0000000..44bab8d --- /dev/null +++ b/include/esp32c3/esp_wn_iface.h @@ -0,0 +1,215 @@ +#pragma once +#include "stdint.h" +#include "dl_lib_convq_queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//Opaque model data container +typedef struct model_iface_data_t model_iface_data_t; + +/** + * @brief The state of wakeup + */ +typedef enum +{ + WAKENET_NO_DETECT = 0, // wake word is not detected + WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified + WAKENET_DETECTED = 1 // wake word is detected +} wakenet_state_t; + +//Set wake words recognition operating mode +//The probability of being wake words is increased with increasing mode, +//As a consequence also the false alarm rate goes up +typedef enum { + DET_MODE_90 = 0, // Normal + DET_MODE_95 = 1, // Aggressive + DET_MODE_2CH_90 = 2, + DET_MODE_2CH_95 = 3, + DET_MODE_3CH_90 = 4, + DET_MODE_3CH_95 = 5, +} det_mode_t; + +typedef struct { + int wake_word_num; //The number of all wake words + char **wake_word_list; //The name list of wake words +} wake_word_info_t; + +/** + * @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient + * + * @param model_name The specified wake word model coefficient + * @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95 + * @returns Handle to the model data + */ +typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode); + +/** + * @brief Get the amount of samples that need to be passed to the detect function + * + * Every speech recognition model processes a certain number of samples at the same time. This function + * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes. + * + * @param model The model object to query + * @return The amount of samples to feed the detect function + */ +typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model); + +/** + * @brief Get the channel number of samples that need to be passed to the detect function + * + * Every speech recognition model processes a certain number of samples at the same time. This function + * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes. + * + * @param model The model object to query + * @return The amount of samples to feed the detect function + */ +typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model); + +/** + * @brief Get the start point of wake word when one wake word is detected. + * + * @Warning: This function should be called when the channel index is verified. + * The returned value is the number of samples from start point of wake word to detected point. + * + * @param model The model object to query + * @return The number of samples from start point to detected point (end point) + */ +typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model); + + +/** + * @brief Get the sample rate of the samples to feed to the detect function + * + * @param model The model object to query + * @return The sample rate, in hz + */ +typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model); + +/** + * @brief Get the number of wake words + * + * @param model The model object to query + * @returns the number of wake words + */ +typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model); + +/** + * @brief Get the name of wake word by index + * + * @Warning The index of wake word start with 1 + + * @param model The model object to query + * @param word_index The index of wake word + * @returns the detection threshold + */ +typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index); + +/** + * @brief Set the detection threshold to manually abjust the probability + * + * @param model The model object to query + * @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.5~0.9999 + * @param word_index The index of wake word + * @return 0: setting failed, 1: setting success + */ +typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index); + +/** + * @brief Get the wake word detection threshold of different modes + * + * @param model The model object to query + * @param word_index The index of wake word + * @returns the detection threshold + */ +typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index); + +/** + * @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found. + * + * @Warning The index of wake word start with 1, 0 means no wake words is detected. + * + * @param model The model object to query + * @param samples An array of 16-bit signed audio samples. The array size used can be queried by the + * get_samp_chunksize function. + * @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words. + */ +typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples); + +/** + * @brief Get the volume gain + * + * @param model The model object to query + * @param target_db The target dB to calculate volume gain + * @returns the volume gain + */ +typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db); + +/** + * @brief Get the triggered channel index. Channel index starts from zero + * + * @param model The model object to query + * @return The channel index + */ +typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model); + +/** + * @brief Clean all states of model + * + * @param model The model object to query + */ +typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model); + +/** + * @brief Destroy a speech recognition model + * + * @param model Model object to destroy + */ +typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model); + +/** + * @brief Feed MFCC of an audio stream to the vad model and detect whether is + * voice. + * + * @param model The model object to query + * @param cq An array of 16-bit MFCC. + * @return The index of wake words, return 0 if no wake word is detected, else + * the index of the wake words. + */ +typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq); + +/** + * @brief Get MFCC of an audio stream + * + * @param model The model object to query + * @return MFCC data + */ +typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model); + + +/** + * This structure contains the functions used to do operations on a wake word detection model. + */ +typedef struct { + esp_wn_iface_op_create_t create; + esp_wn_iface_op_get_start_point_t get_start_point; + esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize; + esp_wn_iface_op_get_channel_num_t get_channel_num; + esp_wn_iface_op_get_samp_rate_t get_samp_rate; + esp_wn_iface_op_get_word_num_t get_word_num; + esp_wn_iface_op_get_word_name_t get_word_name; + esp_wn_iface_op_set_det_threshold_t set_det_threshold; + esp_wn_iface_op_get_det_threshold_t get_det_threshold; + esp_wn_iface_op_get_triggered_channel_t get_triggered_channel; + esp_wn_iface_op_get_vol_gain_t get_vol_gain; + esp_wn_iface_op_detect_t detect; + esp_wn_iface_op_detect_mfcc_t detect_mfcc; + esp_wn_iface_op_get_mfcc_data_t get_mfcc_data; + esp_wn_iface_op_clean_t clean; + esp_wn_iface_op_destroy_t destroy; +} esp_wn_iface_t; + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/include/esp32c3/esp_wn_models.h b/include/esp32c3/esp_wn_models.h new file mode 100644 index 0000000..3a4d7e4 --- /dev/null +++ b/include/esp32c3/esp_wn_models.h @@ -0,0 +1,52 @@ +#pragma once +#include "esp_wn_iface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// The prefix of wakenet model name is used to filter all wakenet from availabel models. +#define ESP_WN_PREFIX "wn" + +/** + * @brief Get the wakenet handle from model name + * + * @param model_name The name of model + * @returns The handle of wakenet + */ +const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); + +/** + * @brief Get the wake word name from model name + * + * @param model_name The name of model + * @returns The wake word name, like "alexa","hilexin","xiaoaitongxue" + */ +char* esp_wn_wakeword_from_name(const char *model_name); + +#ifdef __cplusplus +} +#endif + +/* + +static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name); + +//Initialize wakeNet model data +static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90); + +//Set parameters of buffer +int audio_chunksize=model->get_samp_chunksize(model_data); +int frequency = model->get_samp_rate(model_data); +int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t)); + +//Detect +int r=model->detect(model_data, buffer); +if (r>0) { + printf("Detection triggered output %d.\n", r); +} + +//Destroy model +model->destroy(model_data) + +*/ diff --git a/include/esp32c6/c_speech_features_config.h b/include/esp32c6/c_speech_features_config.h new file mode 100644 index 0000000..e21e020 --- /dev/null +++ b/include/esp32c6/c_speech_features_config.h @@ -0,0 +1,29 @@ +#pragma once +#include +#include + +/* #undef ENABLE_DOUBLE */ + +#ifdef ENABLE_DOUBLE +# define csf_float double +# define csf_ceil ceil +# define csf_floor floor +# define csf_sin sin +# define csf_log log +# define csf_log10 log10 +# define csf_pow pow +# define csf_sqrt sqrt +# define csf_abs fabs +# define csf_float_min DBL_MIN +#else +# define csf_float float +# define csf_ceil ceilf +# define csf_floor floorf +# define csf_sin sinf +# define csf_log logf +# define csf_log10 log10f +# define csf_pow powf +# define csf_sqrt sqrtf +# define csf_abs fabsf +# define csf_float_min FLT_MIN +#endif diff --git a/include/esp32c6/dl_lib.h b/include/esp32c6/dl_lib.h new file mode 100644 index 0000000..47e7c86 --- /dev/null +++ b/include/esp32c6/dl_lib.h @@ -0,0 +1,418 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_H +#define DL_LIB_H + +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" + +#ifdef ESP_PLATFORM +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/queue.h" +#include "esp_system.h" +#include "esp_heap_caps.h" +#include "sdkconfig.h" +#define DL_SPIRAM_SUPPORT 1 +#endif + +#ifdef CONFIG_IDF_TARGET_ESP32S3 +#include "esp32s3/rom/cache.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int padding_state; + +// /** +// * @brief Allocate a chunk of memory which has the given capabilities. +// * Equivalent semantics to libc malloc(), for capability-aware memory. +// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT). +// * +// * @param size In bytes, of the amount of memory to allocate +// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned +// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM +// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off +// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA +// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation +// * @return Pointer to currently allocated heap memory +// **/ +// void *heap_caps_malloc(size_t size, uint32_t caps); + +/** + * @brief Allocate aligned memory from internal memory or external memory. + * if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM + * else, allocate memory from PSRAM + * + * @param cnt Number of continuing chunks of memory to allocate + * @param size Size, in bytes, of a chunk of memory to allocate + * @param align Aligned size, in bits + * @return Pointer to currently allocated heap memory + */ +void *dl_lib_calloc(int cnt, int size, int align); + +/** + * @brief Always allocate aligned memory from external memory. + * + * @param cnt Number of continuing chunks of memory to allocate + * @param size Size, in bytes, of a chunk of memory to allocate + * @param align Aligned size, in bits + * @return Pointer to currently aligned heap memory + */ +void *dl_lib_calloc_psram(int cnt, int size, int align); + +/** + * @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram` + * + * @param ptr Pointer to free + */ +void dl_lib_free(void *ptr); + +/** + * @brief Does a fast version of the exp() operation on a floating point number. + * + * As described in https://codingforspeed.com/using-faster-exponential-approximation/ + * Should be good til an input of 5 or so with a steps factor of 8. + * + * @param in Floating point input + * @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes. + * @return Exp()'ed output + */ +fptp_t fast_exp(double x, int steps); + +/** + * @brief Does a fast version of the exp() operation on a floating point number. + * + * @param in Floating point input + * @return Exp()'ed output + */ +double fast_exp_pro(double x); + +/** + * @brief Does a softmax operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out); + + +/** + * @brief Does a softmax operation on a quantized matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a sigmoid operation on a floating point number + * + * @param in Floating point input + * @return Sigmoid output + */ + +fptp_t dl_sigmoid_op(fptp_t in); + + +/** + * @brief Does a sigmoid operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out); + +/** + * @brief Does a tanh operation on a floating point number + * + * @param in Floating point input number + * @return Tanh value + */ +fptp_t dl_tanh_op(fptp_t v); + +/** + * @brief Does a tanh operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out); + + +/** + * @brief Does a relu (Rectifier Linear Unit) operation on a floating point number + * + * @param in Floating point input + * @param clip If value is higher than this, it will be clipped to this value + * @return Relu output + */ +fptp_t dl_relu_op(fptp_t in, fptp_t clip); + +/** + * @brief Does a ReLu operation on a matrix. + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out); + +/** + * @brief Fully connected layer operation + * + * @param in Input vector + * @param weight Weights of the neurons + * @param bias Biases for the neurons. Can be NULL if a bias of 0 is required. + * @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix. + */ +void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out); + +/** + * @brief Pre-calculate the sqrtvari variable for the batch_normalize function. + * The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence, + * this matrix only needs to be calculated once. This function does that. + * + * @param + * @return + */ +void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out); + +/** + * @brief Batch-normalize a matrix + * + * @param m The matrix to normalize + * @param offset Offset matrix + * @param scale Scale matrix + * @param mean Mean matrix + * @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar + * @return + */ +void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale, + const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari); + +/** + * @brief Do a basic LSTM layer pass. + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @return Output values of the neurons + */ +dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2d_t *weight, const dl_matrix2d_t *bias); + +/** + * @brief Do a basic LSTM layer pass, partial quantized version. + * This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias. + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons, need to be quantised + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @return Output values of the neurons + */ +dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias); + +/** + * @brief Do a fully-connected layer pass, fully-quantized version. + * + * @param in Input vector + * @param weight Weights of the neurons + * @param bias Bias values of the neurons. Can be NULL if no bias is needed. + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return Output values of the neurons + */ +void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift); + +/** + * @brief Do a basic LSTM layer pass, fully-quantized version + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return Output values of the neurons + */ +dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift); + +/** + * @brief Batch-normalize a matrix, fully-quantized version + * + * @param m The matrix to normalize + * @param offset Offset matrix + * @param scale Scale matrix + * @param mean Mean matrix + * @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return + */ +void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale, + const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift); + +/** + * @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number + * This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @param clip If value is higher than this, it will be clipped to this value + * @return Relu output + */ +qtp_t dl_relu_q_op(qtp_t in, qtp_t clip); + +/** + * @brief Does a ReLu operation on a matrix, quantized version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out); + +/** + * @brief Does a sigmoid operation on a fixed-point number. + * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @return Sigmoid output + */ +int dl_sigmoid_op_q(const int in); +int16_t dl_sigmoid_op_q8(const int16_t in); +/** + * @brief Does a sigmoid operation on a matrix, quantized version + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a tanh operation on a matrix, quantized version + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a tanh operation on a fixed-point number. + * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @return tanh output + */ +int dl_tanh_op_q(int v); +int16_t dl_tanh_op_q8(int16_t v); + +void load_mat_psram_mn4(void); +void load_mat_psram_mn3(void); +void free_mat_psram_mn4(void); +void free_mat_psram_mn3(void); +qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent); +qtp_t dl_hard_tanh_op(qtp_t in, int exponent); + +int16_t dl_table_tanh_op(int16_t in, int exponent); +int16_t dl_table_sigmoid_op(int16_t in, int exponent); + +void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); +void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); +void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + + +/** + * @brief Filter out the number greater than clip in the matrix, quantized version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out); + +/** + * @brief Filter out the number greater than clip in the matrix, float version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out); +/** + * @brief Do a basic CNN layer pass. + * + * @Warning This just supports the single channel input image, and the output is single row matrix. + That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height + * + * @param in Input single channel image + * @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height + * @param bias Bias for the CNN layer. + * @param filter_height The height of convolution kernel + * @param filter_width The width of convolution kernel + * @param out_channels The number of output channels of convolution kernel + * @param stride_x The step length of the convolution window in x(width) direction + * @param stride_y The step length of the convolution window in y(height) direction + * @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME" + * @param out The result of CNN layer, out->h=1. + * @return The result of CNN layer. + */ +dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height, + const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out); + + +/** + * @brief Do a basic CNN layer pass, quantised wersion. + * + * @Warning This just supports the single channel input image, and the output is single row matrix. + That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height + * + * @param in Input single channel image + * @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height, + * @param bias Bias of the neurons. + * @param filter_height The height of convolution kernel + * @param filter_width The width of convolution kernel + * @param out_channels The number of output channels of convolution kernel + * @param stride_x The step length of the convolution window in x(width) direction + * @param stride_y The step length of the convolution window in y(height) direction + * @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME" + * @param out The result of CNN layer, out->h=1 + * @return The result of CNN layer + */ +dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height, + const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c6/dl_lib_coefgetter_if.h b/include/esp32c6/dl_lib_coefgetter_if.h new file mode 100644 index 0000000..a21de8d --- /dev/null +++ b/include/esp32c6/dl_lib_coefgetter_if.h @@ -0,0 +1,80 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_COEFGETTER_IF_H +#define DL_LIB_COEFGETTER_IF_H + +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" +#include "cJSON.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by +//dl_batch_normalize_get_sqrtvar first. +#define COEF_GETTER_HINT_BNVAR (1<<0) + +/* +This struct describes the basic information of model data: +word_num: the number of wake words or speech commands +word_list: the name list of wake words or speech commands +thres_list: the threshold list of wake words or speech commands +info_str: the string used to reflect the version and information of model data + which consist of the architecture of network, the version of model data, wake words and their threshold +*/ +typedef struct { + int word_num; + char **word_list; + int *win_list; + float *thresh_list; + char *info_str; +} model_info_t; + +/* +Alphabet struct describes the basic grapheme or phoneme. +item_num: the number of baisc item(grapheme or phonemr) +items: the list of basic item +*/ +typedef struct { + int item_num; + char **items; +}alphabet_t; + +/* +This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network. +For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the +coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument +to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument +is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the +memory for the returned matrices, when applicable. +*/ +typedef struct { + const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint); + const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint); + const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint); + void (*free_f)(const dl_matrix2d_t *m); + void (*free_q)(const dl_matrix2dq_t *m); + void (*free_q8)(const dl_matrix2dq8_t *m); + const model_info_t* (*getter_info)(void *arg); + const alphabet_t* (*getter_alphabet)(void *arg); + const cJSON* (*getter_config)(void *arg); +} model_coeff_getter_t; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c6/dl_lib_conv_queue.h b/include/esp32c6/dl_lib_conv_queue.h new file mode 100644 index 0000000..7cb9bf9 --- /dev/null +++ b/include/esp32c6/dl_lib_conv_queue.h @@ -0,0 +1,180 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONV_QUEUE_H +#define DL_LIB_CONV_QUEUE_H + + +#include "dl_lib_matrix.h" +#ifdef __cplusplus +extern "C" { +#endif + +typedef float fptp_t; + +//Flags for matrices +// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */ + +//Float convolution FIFO queue. +typedef struct { + int n; /*< the length of queue */ + int c; /*< the channel number of queue element*/ + int front; /*< the front(top) position of queue */ + int flag; /*< not used*/ + fptp_t *item; /*< Pointer to item array */ +} dl_conv_queue_t; + +/** + * @brief Allocate a convolution queue + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_conv_queue_t *dl_conv_queue_alloc(int n, int c); + +/** + * @brief Allocate a convolution queue from psram + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c); + +/** + * @brief Free a convolution queue + * + * @param cq The convolution queue to free + */ +void dl_conv_queue_free(dl_conv_queue_t *cq); + +void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input convolution queue + * @return Pointer of oldest element + */ +fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq); + +/** + * @brief Remove the oldest element, then insert the input element at the end of queue + * + * @param cq Input convolution queue + * @param item The new element + */ +void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item); + + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a sigmoid operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a tanh operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a tanh operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a softmax operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a softmax operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset); + +fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset); +fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset); +dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b); +dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2d_t *weight, const dl_matrix2d_t *bias); +/** + * @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is first element of output queue and should not be freed separately. + * + * @param in Input convolution queue + * @param out Output convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @return The result of atrous convolution + */ +fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size, + dl_matrix2d_t* kernel, dl_matrix2d_t* bias); +fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size, + dl_matrix2d_t* kernel, dl_matrix2d_t* bias); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is first element of output queue and should not be freed separately. + * + * @param in Input convolution queue + * @param out Output convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @return The result of dilation layer + */ +fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size, + dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias, + dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias); + + +void test_atrous_conv(int size, int rate, int in_channel, int out_channel); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c6/dl_lib_convq8_queue.h b/include/esp32c6/dl_lib_convq8_queue.h new file mode 100644 index 0000000..28c5da7 --- /dev/null +++ b/include/esp32c6/dl_lib_convq8_queue.h @@ -0,0 +1,303 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONVQ8_QUEUE_H +#define DL_LIB_CONVQ8_QUEUE_H + + +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" +#include "dl_lib_conv_queue.h" +#include "dl_lib_convq_queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//[nch, n, c] +typedef struct { + int n; /*< the length of queue */ + int c; /*< the number of queue element*/ + int front; /*< the front(top) position of queue */ + int nch; /*< the channel of queue */ + int exponent; /*< The values in items should be multiplied by pow(2,exponent) + to get the real values */ + q8tp_t *itemq; /*< Pointer to item array */ +} dl_convq8_queue_t; + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c); + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param c The channel of queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch); + +/** + * @brief Allocate a bit fixed-point convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch); + +/** + * @brief Free a fixed-point convolution queue + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq8_queue_free(dl_convq8_queue_t *cq); + +/** + * @brief Set itemq of convolution queue to 0 + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input fixed-point convolution queue + * @return Pointer of oldest element + */ +q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq); +q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n); + +/** + * @brief Insert the float-point element at the end of queue. + * The precision of fixed-point numbers is described by the Qm.f notation, + * + * @param cq Input fixed-point convolution queue + * @param item The float-point element + * @param m_bit The number of integer bits including the sign bits + * @param f_bit The number of fractional bits + */ +void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param ch Channel index of queue + * @return Pointer of the element + */ +q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel Kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + * @return The result of atrous convolution + */ +void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias, + int out_exponent, int offset, int prenum); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + * @return The result of dilation layer + */ +void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias, + int offset, int prenum); + + + + +dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2); + +int8_t dl_sigmoid_lutq8(int in); +/** + * @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch  The channel number + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch); + +/** + * @brief Free a 8-bit fixed-point Multi-Channel convolution queue + * + * @param cqm The fixed-point convolution queue to free + * @param nch The channel number + */ +void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch); + +/** + * @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input + * + * @param cqm Input 8-bit fixed-point Multi-Channel convolution queue + * @param offset Offset used to calculate the beginning of input conv queue + * @param nch The channel number + */ +void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch); + +/** + * @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * Usually, this layer is used as first layer for 8-bit network. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * Input is a 16-bit queue point, Output is an 8-bit queue point. + * + * @param in Input 16bit fixed-point convolution queue array + * @param out Output 8bit fixed-point convolution queue array + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Exponent of output + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum); + +/** + * @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input 8bit fixed-point convolution queue array + * @param out Output 8bit fixed-point convolution queue array + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Exponent of output + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, + int nch, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias, + int out_exponent, int offset, int prenum); + +/** + * @brief Fast implement of 8-bit dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input 8-bit fixed-point convolution queue + * @param out Output 8-bit fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias, + int offset, int prenum); + +void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch); + + + +dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c); + +qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + + +qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + +dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c, + dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight, + const dl_matrix2dq_t *bias, int prenum); + +qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum); + +void print_convq8(dl_convq8_queue_t *cq, int offset); +void print_convq(dl_convq_queue_t *cq, int offset); +void dl_relu_convq8(dl_convq8_queue_t *cq); + +void lstmq8_free(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c6/dl_lib_convq_queue.h b/include/esp32c6/dl_lib_convq_queue.h new file mode 100644 index 0000000..ff190fe --- /dev/null +++ b/include/esp32c6/dl_lib_convq_queue.h @@ -0,0 +1,382 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONVQ_QUEUE_H +#define DL_LIB_CONVQ_QUEUE_H + +#include "dl_lib_matrixq.h" +#include "dl_lib_conv_queue.h" +#include "dl_lib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//fixed-point convolution FIFO queue. +//[nch, n, c] +typedef struct { + int n; /*< the length of queue */ + int c; /*< the number of queue element*/ + int front; /*< the front(top) position of queue */ + int nch; /*< the multiple of queue*/ + int exponent; /*< The values in items should be multiplied by pow(2,exponent) + to get the real values */ + qtp_t *itemq; /*< Pointer to item array */ +} dl_convq_queue_t; + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc(int n, int c); + +/** + * @brief Allocate a fixed-point convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c); + +/** + * @brief Allocate a fixed-point multi-channel convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of conv queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch); + +/** + * @brief Allocate a fixed-point multi-channel convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of conv queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch); + + +void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row); + +/** + * @brief Free a fixed-point convolution queue + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq_queue_free(dl_convq_queue_t *cq); + +/** + * @brief Set itemq of convolution queue to 0 + * + * @param cq The fixed-point convolution queue point + */ +void dl_convq_queue_bzero(dl_convq_queue_t *cq); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input fixed-point convolution queue + * @return Pointer of oldest element + */ +qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq); +qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n); +/** + * @brief Remove the oldest element, then insert the input element at the end of queue + * + * @param cq Input fixed-point convolution queue + * @param item The new element + */ +void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift); + +/** + * @brief Insert the float-point element at the end of queue. + * The precision of fixed-point numbers is described by the Qm.f notation, + * + * @param cq Input fixed-point convolution queue + * @param item The float-point element + * @param m_bit The number of integer bits including the sign bits + * @param f_bit The number of fractional bits + */ +void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param last_num Offset from the front of the queue + * @return Pointer of the element + */ +qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param ch Channel index of convolution queue + * @return Pointer of the element + */ +qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch); + +/** + * @brief Does a tanh operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * tanh operation by this pointer, then return the pointer + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +void dl_tanh_convq(dl_convq_queue_t *cq, int offset); + +/** + * @brief Does a tanh operation on the one of element in multi channel convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * tanh operation by this pointer, then return the pointer + * + * @param cq Input fixed-point multi channnel convolution queue + * @param offset Offset from the front of the queue + * @param nch The channel number of cqm + * @return Pointer of the element + */ +void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch); + +/** + * @brief Does a relu operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * relu operation by this pointer, then return the pointer + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num); + +/** + * @brief Does a softmax operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, input data + stay as it is. Results are saved into the *out* array. + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param out Old array to re-use. Passing NULL will allocate a new matrix. + * @return softmax results + */ +fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param shift Shift ratio used in dot operation between two 16-bit fixed point vector + * @return The result of atrous convolution + */ +qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector + * @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector + * @return The result of dilation layer + */ +qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, + int filter_shift, int gate_shift, int offset, int prenum); + + +qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, + int filter_shift, int gate_shift, int prenum); + +qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + + +qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum); + +/** + * @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue + * + * @param cq1 First fixed-point convolution queue + * @param cq2 Seconf fixed-point convolution queue + * @return The result of float-point convolution queue + */ +dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2); + +/** + * @brief Fast implement of LSTM layer by dl_atrous_conv1dq function + * + * @Warning LSTM kernel is split into two part, the first part input is the last layer output, + * and kernel is parameter *in_weight*. The second part input is the last frame LSTM output, + * the kernel is parameters *h_weight*. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param in_weight the LSTM kernel needed by first part + * @param h_weight the LSTM kernel needed by second part + * @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required. + * @in_shift Shift ratio used in first part + * @h_shift Shift ratio used in second part + * @return The result of LSTM layer + */ +dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c, + dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight, + const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum); +dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift); + +dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c, + dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight, + dl_matrix2dq_t *bias, int prenum); + +/** + * @brief Allocate a fixed-point multi channel convolution queue + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @param nch the channel numbet of convolution queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch); + +/** + * @brief Free a fixed-point multi channel convolution queue + * + * @param cqm The fixed-point convolution queue to free + * @param nch The channel number of cqm + */ +void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param nch The channel number of input + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param shift Shift ratio used in dot operation between two 16-bit fixed point vector + * @param offset the offset to calculate input convq + * @param prenum the preload size, 0: do not use preload function + * @return The result of atrous convolution + */ +qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in, + dl_convq_queue_t **out, + int nch, + int rate, + int size, + dl_matrix2dq_t* kernel, + dl_matrix2dq_t* bias, + int shift, + int offset, + int prenum); + +/** + * @brief Fast implement of dilation layer as follows for multi channel input + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param nch The channel number of input + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector + * @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector + * @param offset The offset to calculate input convq + * @param prenum The preload size, 0: do not use preload function + * @return The result of dilation layer + */ +qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in, + dl_convq_queue_t **out, + int nch, + int rate, + int size, + dl_matrix2dq_t* filter_kernel, + dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, + dl_matrix2dq_t* gate_bias, + int filter_shift, + int gate_shift, + int offset, + int prenum); + +void test_atrous_convq(int size, int rate, int in_channel, int out_channel); +void test_lstm_convq(int size, int in_dim, int lstm_cell); +void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch); +void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch); +void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c6/dl_lib_matrix.h b/include/esp32c6/dl_lib_matrix.h new file mode 100644 index 0000000..59f7d79 --- /dev/null +++ b/include/esp32c6/dl_lib_matrix.h @@ -0,0 +1,257 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIX_H +#define DL_LIB_MATRIX_H + +#ifdef ESP_PLATFORM +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/queue.h" +#include "esp_system.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef float fptp_t; + +#if CONFIG_BT_SHARE_MEM_REUSE +extern multi_heap_handle_t gst_heap; +#endif + +//Flags for matrices +#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */ +#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */ + +//'Normal' float matrix +typedef struct { + int w; /*< Width */ + int h; /*< Height */ + int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */ + int flags; /*< Flags. OR of DL_MF_* values */ + fptp_t *item; /*< Pointer to item array */ +} dl_matrix2d_t; + +//Macro to quickly access the raw items in a matrix +#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride] + + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_alloc(int w, int h); + + +/** + * @brief Free a matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrix_free(dl_matrix2d_t *m); + +/** + * @brief Zero out the matrix + * Sets all entries in the matrix to 0. + * + * @param m Matrix to zero + */ +void dl_matrix_zero(dl_matrix2d_t *m); + +/** + * @brief Copy the matrix into psram + * Copy the matrix from flash or iram/psram into psram + * + * @param m Matrix to zero + */ +dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m); + +/** + * @brief Generate a new matrix using a range of items from an existing matrix. + * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer + * to the existing data. Changing the data in the resulting matrix, as a result, will also change + * the data in the existing matrix that has been sliced. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. + * @return The resulting slice matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in); + +/** + * @brief select a range of items from an existing matrix and flatten them into one dimension. + * + * @Warning The results are flattened in row-major order. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. + * @return The resulting flatten matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in); + +/** + * @brief Generate a matrix from existing floating-point data + * + * @param w Width of resulting matrix + * @param h Height of resulting matrix + * @param data Data to populate matrix with + * @return A newaly allocated matrix populated with the given input data, or NULL if out of memory. + */ +dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data); + + +/** + * @brief Multiply a pair of matrices item-by-item: res=a*b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Multiplicated data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of two matrices : res=a.b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res); + +/** + * @brief Add a pair of matrices item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Added data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + + +/** + * @brief Divide a pair of matrices item-by-item: res=a/b + * + * @param a First matrix + * @param b Second matrix + * @param res Divided data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + +/** + * @brief Subtract a matrix from another, item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Subtracted data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + +/** + * @brief Add a constant to every item of the matrix + * + * @param subj Matrix to add the constant to + * @param add The constant + */ +void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add); + + +/** + * @brief Concatenate the rows of two matrices into a new matrix + * + * @param a First matrix + * @param b Second matrix + * @return A newly allocated array with as avlues a|b + */ +dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + +dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b); + +/** + * @brief Print the contents of a matrix to stdout. Used for debugging. + * + * @param a The matrix to print. + */ +void dl_printmatrix(const dl_matrix2d_t *a); + +/** + * @brief Return the average square error given a correct and a test matrix. + * + * ...Well, more or less. If anything, it gives an indication of the error between + * the two. Check the code for the exact implementation. + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return value indicating the relative difference between matrices + */ +float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + + +/** + * @brief Check if two matrices have the same shape, that is, the same amount of rows and columns + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return true if the two matrices are shaped the same, false otherwise. + */ +int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + +/** + * @brief Get a specific item from the matrix + * + * Please use these for external matrix access instead of DL_ITM + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @return Value in that position + */ +inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) { + return DL_ITM(m, x, y); +} + +/** + * @brief Set a specific item in the matrix to the given value + * + * Please use these for external matrix access instead of DL_ITM + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @param val Value to write to that position + */ +inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) { + DL_ITM(m, x, y)=val; +} + +void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/include/esp32c6/dl_lib_matrixq.h b/include/esp32c6/dl_lib_matrixq.h new file mode 100644 index 0000000..8ad397b --- /dev/null +++ b/include/esp32c6/dl_lib_matrixq.h @@ -0,0 +1,387 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIXQ_H +#define DL_LIB_MATRIXQ_H + +#include +#include "dl_lib_matrix.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int16_t qtp_t; + +//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted +//for easy use as a multiplicand without stressing out the flash cache too much. +typedef struct { + int w; + int h; + int stride; //Normally equals h, not w! + int flags; + int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values. + qtp_t *itemq; +} dl_matrix2dq_t; + +#define DL_QTP_SHIFT 15 +#define DL_QTP_RANGE ((1<itemq[(y)+(x)*m->stride] +#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null + +#define DL_SHIFT_AUTO 32 + +/** + * @info About quantized matrices and shift values + * + * Grab a coffee (or tea, or hot water) and sit down when you read this for the first + * time. Quantized matrices can speed up your operations, but come with some quirks, and + * it's good to understand how they work before using them. + * + * The data in the quantized matrix type is stored similarily to floating-point types: + * when storing a real value, the value is stored as a mantissa (base number) and an + * exponent. The 'real' value that can be re-derived from those two numbers is something + * similar to mantissa*2^exponent. Up to this point, there's not that much difference from + * the standard floating point implementations like e.g. IEEE-754. + * + * The difference with respect to quantized matrices is that for a quantized matrix, it is + * assumed all values stored have more-or-less the same order of magnitude. This allows the + * matrix to only store all the mantissas, while the exponents are shared; there is only one + * exponent for the entire matrix. This makes it quicker to handle matrix operations - the + * logic to fix the exponents only needs to happen once, while the rest can be done in simple + * integer arithmetic. It also nets us some memory savings - while normally a floating point + * number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the + * memory requirements. + * + * While most of the details of handling the intricacies of the quantized matrixes are done + * transparently by the code in dl_lib_matrixq.c, some implementation details leak out, + * specifically in places where addition/subtraction/division happens. + * + * The problem is that the routines do not know what the size of the resulting operation is. For + * instance, when adding two matrices of numbers, the resulting numbers *could* be large enough + * to overflow the mantissa of the result if the exponent is the same. However, if by default we + * assume the mantissas needs to be scaled back, we may lose precision. + * + * In order to counter this, all operations that have this issue have a ``shift`` argument. If + * the argument is zero, the routine will be conservative, that is, increase the exponent of + * the result to such an extent it's mathematically impossible a value in the result will exceed + * the maximum value that can be stored. However, when this argument is larger than zero, the + * algorithm will hold back on this scaling by the indicated amount of bits, preserving precision + * but increasing the chance of some of the calculated values not fitting in the mantissa anymore. + * If this happens, the value will be clipped to the largest (or, for negative values, smallest) + * value possible. (Neural networks usually are okay with this happening for a limited amount + * of matrix indices). + * + * For deciding on these shift values, it is recommended to start with a shift value of one, then + * use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value. + * If it indicates bits are under-used, increase it. Note that for adding and subtraction, only + * shift values of 0 or 1 make sense; these routines will error out if you try to do something + * else. + * + * For neural networks and other noise-tolerant applications, note that even when + * dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead + * to slightly improved precision. Feel free to experiment. + **/ + + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_alloc(int w, int h); +dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h); +/** + * @brief Convert a floating-point matrix to a quantized matrix + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + */ +dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out); + +/** + * TODO: DESCRIBE THIS FUNCTION + */ +dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit); + + +/** + * @brief Convert a quantized matrix to a floating-point one. + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + **/ +dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out); + + +/** + * @brief Free a quantized matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrixq_free(dl_matrix2dq_t *m); + +/** + * @brief Zero out the matrix + * Sets all entries in the matrix to 0. + * + * @param m Matrix to zero + */ +void dl_matrixq_zero(dl_matrix2dq_t *m); + +/** + * @brief Copy the matrix into psram + * Copy the matrix from flash or iram/psram into psram + * + * @param m Matrix to copy + */ +dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + * @param shift Shift ratio + */ +void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. + * + * Result is a fixed-point matrix. + * + * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be + * much slower than dl_matrixq_dot . + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + * @param shift Shift ratio + */ +void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. + * + * Result is a floating-point matrix. + * + * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be + * much slower than dl_matrixq_dot_matrix_out. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix. + * + * @param a First multiplicand; float matrix + * @param b Second multiplicand; quantized matrix + * @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b! + */ +void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + + +/** + * @brief Print the contents of a quantized matrix to stdout. Used for debugging. + * + * @param a The matrix to print. + */ +void dl_printmatrixq(const dl_matrix2dq_t *a); + + +/** + * @brief Add a pair of quantizedmatrices item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Added data. Can be equal to a or b to overwrite that. + * @param shift Shift value. Only 0 or 1 makes sense here. + */ +void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Generate a new matrix using a range of items from an existing matrix. + * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer + * to the existing data. Changing the data in the resulting matrix, as a result, will also change + * the data in the existing matrix that has been sliced. + * + * @Warning In contrast to the floating point equivalent of this function, the fixed-point version + * of this has the issue that as soon as the output exponent of one of the slices changes, the data + * in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you + * use this function, either treat the slices as read-only, or assume the sliced matrix contains + * garbage after modifying the data in one of the slices. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. + * @return The resulting slice matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); + +/** + * @brief select a range of items from an existing matrix and flatten them into one dimension. + * + * @Warning The results are flattened in row-major order. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. + * @return The resulting flatten matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); + +/** + * @brief Subtract a quantized matrix from another, item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Subtracted data. Can be equal to a or b to overwrite that. + * @param shift Shift value. Only 0 or 1 makes sense here. + */ +void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Multiply a pair of quantized matrices item-by-item: res=a*b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Multiplicated data. Can be equal to a or b to overwrite that matrix. + */ +void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res); + +/** + * @brief Divide a pair of quantized matrices item-by-item: res=a/b + * + * @param a First matrix + * @param b Second matrix + * @param res Divided data. Can be equal to a or b to overwrite that. + */ +void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift); + +/** + * @brief Check if two quantized matrices have the same shape, that is, the same amount of + * rows and columns + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return true if the two matrices are shaped the same, false otherwise. + */ +int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); + +/** + * @brief Concatenate the rows of two quantized matrices into a new matrix + * + * @param a First matrix + * @param b Second matrix + * @return A newly allocated quantized matrix with as values a|b + */ +dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); + +/** + * @brief Add a constant to every item of the quantized matrix + * + * @param subj Matrix to add the constant to + * @param add The constant + */ +void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift); + +/** + * @brief Check the sanity of a quantized matrix + * + * Due to the nature of quantized matrices, depending on the calculations a quantized + * matrix is the result of and the shift values chosen in those calculations, a quantized + * matrix may have an exponent and mantissas that lead to a loss of precision, either because + * most significant mantissa bits are unused, or because a fair amount of mantissas are + * clipped. This function checks if this is the case and will report a message to stdout + * if significant loss of precision is detected. + * + * @param m The quantized matrix to check + * @param name A string to be displayed in the message if the sanity check fails + * @return True if matrix is sane, false otherwise + **/ + +int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name); + +/** + * @brief re-adjust the exponent of the matrix to fit the mantissa better + * + * This function will shift up all the data in the mantissas so there are no + * most-significant bits that are unused in all mantissas. It will also adjust + * the exponent to keep the actua values in the matrix the same. + * + * Some operations done on a matrix, especially operations that re-use the + * result of earlier operations done in the same way, can lead to the loss of + * data because the exponent of the quantized matrix is never re-adjusted. You + * can do that implicitely by calling this function. + * + * @param m The matrix to re-adjust +**/ +void dl_matrixq_readjust_exp(dl_matrix2dq_t *m); + + + +/** + * @brief Get the floating-point value of a specific item from the quantized matrix + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @return Value in that position + */ +fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y); + +/** + * @brief Set a specific item in the quantized matrix to the given + * floating-point value + * + * @warning If the given value is more than the exponent in the quantized matrix + * allows for, all mantissas in the matrix will be shifted down to make the value + * 'fit'. If, however, the exponent is such that the value would result in a + * quantized mantissa of 0, nothing is done. + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @param val Value to write to that position + */ +void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32c6/dl_lib_matrixq8.h b/include/esp32c6/dl_lib_matrixq8.h new file mode 100644 index 0000000..377df7c --- /dev/null +++ b/include/esp32c6/dl_lib_matrixq8.h @@ -0,0 +1,80 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIXQ8_H +#define DL_LIB_MATRIXQ8_H + +#include +#include "dl_lib_matrix.h" +#include "dl_lib.h" +#include "dl_lib_matrixq.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int8_t q8tp_t; + +typedef struct { + int w; + int h; + int stride; //Normally equals h, not w! + int flags; + int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values. + q8tp_t *itemq; +} dl_matrix2dq8_t; + +#define DL_Q8TP_SHIFT 7 +#define DL_Q8TP_RANGE ((1<itemq[(y)+(x)*m->stride] + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h); + +/** + * @brief Free a quantized matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrixq8_free(dl_matrix2dq8_t *m); + +/** + * @brief Copy a quantized matrix + * Copy a quantized matrix from flash or iram/psram + * + * @param m Matrix to copy + */ +dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m); + +/** + * @brief Convert a floating-point matrix to a quantized matrix + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + */ +dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/include/esp32c6/esp_aec.h b/include/esp32c6/esp_aec.h new file mode 100644 index 0000000..36de9c1 --- /dev/null +++ b/include/esp32c6/esp_aec.h @@ -0,0 +1,105 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License +#ifndef _ESP_AEC_H_ +#define _ESP_AEC_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define USE_AEC_FFT // Not kiss_fft +#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz +#define AEC_FRAME_LENGTH_MS 32 + +typedef struct aec_handle_t aec_handle_t; +typedef enum { + AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition + AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition + AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication + AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication +} aec_mode_t; + +/** + * @brief Creates an instance to the AEC structure. + * Please get frame size by aec_get_chunksize() function + * + * @param sample_rate The Sampling frequency (Hz) must be 16000. + * @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption. + * @param channel_num The input microphone channel number + * @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST + * @return + * - NULL: Create failed + * - Others: The instance of AEC + */ +aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode); + +/** + * @brief Creates an instance to the AEC structure, same with aec_create(). + * + * @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption. + * @param channel_num The input microphone channel number + * @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST + * @return + * - NULL: Create failed + * - Others: The instance of AEC + */ +aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode); + +/** + * @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic. + * + * @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc(). + * + * @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..." + * @param indata An array of 16-bit signed audio samples from mic. + * @param refdata An array of 16-bit signed audio samples sent to the speaker. + * @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..." + * @return None + * + */ +void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata); + +/** + * @brief Get frame size of AEC (the samples of one frame) + * @param handle The instance of AEC. + * @return Frame size + */ +int aec_get_chunksize(const aec_handle_t *handle); + +/** + * @brief Get AEC mode string + * + * @param aec_mode The mode of AEC. + * + * @return AEC mode string + */ +char * aec_get_mode_string(aec_mode_t aec_mode); + +/** + * @brief Free the AEC instance + * + * @param inst The instance of AEC. + * + * @return None + * + */ +void aec_destroy(aec_handle_t *handel); + +#ifdef __cplusplus +} +#endif + +#endif //_ESP_AEC_H_ diff --git a/include/esp32c6/esp_afe_aec.h b/include/esp32c6/esp_afe_aec.h new file mode 100644 index 0000000..9d60588 --- /dev/null +++ b/include/esp32c6/esp_afe_aec.h @@ -0,0 +1,82 @@ + +#ifndef _ESP_AFE_AEC_H_ +#define _ESP_AFE_AEC_H_ + + +#include "esp_afe_config.h" +#include "esp_aec.h" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + aec_handle_t* handle; + aec_mode_t mode; + afe_pcm_config_t pcm_config; + int frame_size; + int16_t *data; +}afe_aec_handle_t; + + +/** + * @brief Creates an instance to the AEC structure. + * + * @warning Currently only support 1 microphone channel and 1 playback channe. + * If input has multiple microphone channels and playback channels, just the first microphone channel and playback channel will be selected. + * + * The input format, same as afe config: + * M to represent the microphone channel + * R to represent the playback reference channel + * N to represent an unknown or unused channel + * + * For example, input_format="MMNR" indicates that the input data consists of four channels, + * which are the microphone channel, the microphone channel, an unused channel, and the playback channel + * + * @param input_format The input format + * @param filter_length The length of filter. The larger the filter, the higher the CPU loading. + * Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for esp32c5. + * @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC + * @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF + * + * @return afe_config_t* The default config of afe + */ +afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode); + + +/** + * @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic. + * + * @param inst The instance of AEC. + * @param indata Input audio data, format is define by input_format. Note indata will be modified in function call. + * @param outdata Returns near-end signal with echo removed. + + * @return The bytes of outdata. + */ +size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outdata); + +/** + * @brief Get frame size of AEC (the samples of one frame) + * @param handle The instance of AEC. + * @return Frame size + */ +int afe_aec_get_chunksize(afe_aec_handle_t *handle); + + +/** + * @brief Free the AEC instance + * + * @param inst The instance of AEC. + * + * @return None + * + */ +void afe_aec_destroy(afe_aec_handle_t *handel); + +#ifdef __cplusplus +} +#endif + +#endif //_ESP_AEC_H_ diff --git a/include/esp32c6/esp_afe_config.h b/include/esp32c6/esp_afe_config.h new file mode 100644 index 0000000..f9de6fe --- /dev/null +++ b/include/esp32c6/esp_afe_config.h @@ -0,0 +1,69 @@ +#pragma once +#include "esp_aec.h" +#include "stdbool.h" +#include "stdint.h" +#include "stdlib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// AFE: Audio Front-End +// SR: Speech Recognition +// VC: Voice Communication + +// Set AFE_SR mode +typedef enum { + SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode + SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode +} afe_sr_mode_t; + +// Set AFE mode +typedef enum { + AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode + AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode +} afe_mode_t; + +// Set AFE type +typedef enum { + AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression + AFE_TYPE_VC = 1, // Voice communication scenarios, including nonlinear noise suppression +} afe_type_t; + +typedef enum { + AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram + AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance + AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram +} afe_memory_alloc_mode_t; + +typedef enum { + AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB + AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB + AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB + AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain +} afe_mn_peak_agc_mode_t; + +typedef struct { + int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel + int mic_num; // microphone channel number + uint8_t *mic_ids; // microphone channel indices + int ref_num; // playback reference channel number + uint8_t *ref_ids; // playback reference channel indices + int sample_rate; // sample rate of audio +} afe_pcm_config_t; + +typedef enum { + AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC" + AFE_NS_MODE_NET = 1, // please use model name of NSNET +} afe_ns_mode_t; + +typedef enum { + AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC + AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated +} afe_agc_mode_t; + + +#ifdef __cplusplus +} +#endif + diff --git a/include/esp32c6/esp_mfcc_fbank_int16.h b/include/esp32c6/esp_mfcc_fbank_int16.h new file mode 100644 index 0000000..22a5f2c --- /dev/null +++ b/include/esp32c6/esp_mfcc_fbank_int16.h @@ -0,0 +1,86 @@ +#pragma once +#include "esp_speech_features.h" +#include + +/* +This describes an interface for a MFCC runner, that is, some kind of implementation that can be +fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so +multiple implementations can be used. +*/ + +typedef struct esp_mfcc_data_t esp_mfcc_data_t; + +// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features), +// please refer to its documentation for details. +typedef struct { + int winstep_ms; // The step between successive windows in ms. (10) + int winlen_ms; // The length of the analysis window in ms. (25) + int nch; // The number of input channel + int numcep; // The number of cepstrum to return + int nfilter; // The number of filters in the filterbank + int nfft; // The FFT size + int samp_freq; // The sample-rate of the signal. + int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0) + int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq + float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97) + char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey" + bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy + bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum + int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon)) + float log_epsilon; // log epsilon. (e.g. 1e-7) + bool psram_first; // Alloc memory from PSRAM first + bool remove_dc_offset; // Whether to subtract mean of wave before FFT +} esp_mfcc_opts_t; + +/** + * @brief Un-initialize and free a mfcc runner + * + * Function to free a previously allocated mfcc runner. + * + * @param r Runner object to destroy + */ +typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r); + +/** + * @brief Initialize parameters for a mfcc runner. + * + * After creation, a mfcc runner needs to be initialized first; this is usually done + * in the initialization routine of a speech recognition algorithm. This provides + * a pointer to do this for a specific mfcc runner. + * + * @param opt Options for the mfcc process + * @return True if success, false on error. + */ +typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt); + +/** + * @brief Run a mfcc iteration on frame by frame + * + * This will take a set of samples and return a ceptrum. Note that this may be pipelined: + * an initial call to this function may return NULL and subsequent calls may return the + * cepstrum of previous calls. + * + * @param r The mfcc runner + * @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000). + * @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function + * when done with this buffer. Note that some implementations require the buffer to be freed before another call + * to this function is done. + */ +typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch); + +/** + * @brief Clean all state of mfcc handle + * + * @param r The mfcc runner + */ +typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r); + +/** + * @brief Operations possible on a mfcc runner + */ +typedef struct { + esp_mfcc_op_destroy_t destroy; + esp_mfcc_op_create_t create; + esp_mfcc_op_run_step_t run_step; + esp_mfcc_op_clean_t clean; +} esp_mfcc_iface_t; diff --git a/include/esp32c6/esp_mfcc_iface.h b/include/esp32c6/esp_mfcc_iface.h new file mode 100644 index 0000000..0257768 --- /dev/null +++ b/include/esp32c6/esp_mfcc_iface.h @@ -0,0 +1,89 @@ +#pragma once +#include "esp_speech_features.h" +#include + +/* +This describes an interface for a MFCC runner, that is, some kind of implementation that can be +fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so +multiple implementations can be used. +*/ + +typedef struct esp_mfcc_data_t esp_mfcc_data_t; + +// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features), +// please refer to its documentation for details. +typedef struct { + int winstep_ms; // The step between successive windows in ms. (10) + int winlen_ms; // The length of the analysis window in ms. (25) + int nch; // The number of input channel + int numcep; // The number of cepstrum to return + int nfilter; // The number of filters in the filterbank + int nfft; // The FFT size + int samp_freq; // The sample-rate of the signal. + int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0) + int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq + float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97) + char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey" + bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy + bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum + int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon)) + float log_epsilon; // log epsilon. (e.g. 1e-7) + bool psram_first; // Alloc memory from PSRAM first + bool remove_dc_offset; // Whether to subtract mean of wave before FFT +} esp_mfcc_opts_t; + +/** + * @brief Un-initialize and free a mfcc runner + * + * Function to free a previously allocated mfcc runner. + * + * @param r Runner object to destroy + */ +typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r); + +/** + * @brief Initialize parameters for a mfcc runner. + * + * After creation, a mfcc runner needs to be initialized first; this is usually done + * in the initialization routine of a speech recognition algorithm. This provides + * a pointer to do this for a specific mfcc runner. + * + * @param opt Options for the mfcc process + * @return True if success, false on error. + */ +typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt); + +/** + * @brief Run a mfcc iteration on frame by frame + * + * This will take a set of samples and return a ceptrum. Note that this may be pipelined: + * an initial call to this function may return NULL and subsequent calls may return the + * cepstrum of previous calls. + * + * @param r The mfcc runner + * @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000). + * @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function + * when done with this buffer. Note that some implementations require the buffer to be freed before another call + * to this function is done. + */ +typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch); + +typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank); + +/** + * @brief Clean all state of mfcc handle + * + * @param r The mfcc runner + */ +typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r); + +/** + * @brief Operations possible on a mfcc runner + */ +typedef struct { + esp_mfcc_op_destroy_t destroy; + esp_mfcc_op_create_t create; + esp_mfcc_op_run_step_t run_step; + esp_mfcc_op_run_step_s16_t run_step_s16; + esp_mfcc_op_clean_t clean; +} esp_mfcc_iface_t; diff --git a/include/esp32c6/esp_mfcc_models.h b/include/esp32c6/esp_mfcc_models.h new file mode 100644 index 0000000..44086e8 --- /dev/null +++ b/include/esp32c6/esp_mfcc_models.h @@ -0,0 +1,44 @@ +#pragma once +#include "esp_mfcc_iface.h" + +extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle +extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle + +/** + * @brief Return basic opts used in wakenet9 & multinet5 + **/ +esp_mfcc_opts_t *get_mfcc_opts_wn9(); + +/** + * @brief Return basic opts used in wakenet9s + **/ +esp_mfcc_opts_t *get_mfcc_opts_wn9s16(); + +/** + * @brief Return basic opts for default kaldifeat + * + opts->psram_first = true; + opts->use_power = true; + opts->use_log_fbank = 2; // log(max(x, log_epsilon)) + opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps + opts->win_type = "povey"; + opts->low_freq = 20; + opts->high_freq = 7600; + opts->samp_freq = 16000; + opts->nch = 1; + opts->nfft = 512; + opts->nfilter = 80; + opts->numcep = 80; + opts->preemph = 0.97; + opts->append_energy = false; + opts->winlen_ms = 25; + opts->winstep_ms = 10; + opts->remove_dc_offset = true; + * + **/ +esp_mfcc_opts_t *get_mfcc_opts_kaldi(); + +/** + * @brief Print mfcc opts + **/ +void print_mfcc_opts(esp_mfcc_opts_t *opts); diff --git a/include/esp32c6/esp_speech_features.h b/include/esp32c6/esp_speech_features.h new file mode 100644 index 0000000..c1659f9 --- /dev/null +++ b/include/esp32c6/esp_speech_features.h @@ -0,0 +1,62 @@ +#pragma once +#include "c_speech_features_config.h" +#include "stdlib.h" +#include +#include + +#ifndef M_2PI +#define M_2PI 6.283185307179586476925286766559005 +#endif + +typedef struct { + float *coeff; + int *bank_pos; + int nfilter; +} esp_mel_filter_t; + +float *esp_mfcc_malloc(size_t size, bool from_psram); + +void esp_mfcc_free(void *ptr); + +/** + * @brief Initialize FFT table + * @warning For ESP-PLATFORM, use esp-dsp fft + * For Other platform, use kiss fft + * + * @param nfft The input samples number + * @return fft-table + **/ +void *esp_fft_init(int nfft); + +/** + * @brief Free FFT table + * @warning For ESP-PLATFORM, use esp-dsp fft + * For Other platform, use kiss fft + * + * @param fft_table The fft table initialized by esp_fft_init + * @param nfft The input samples number + * @return fft-table + **/ +void esp_fft_deinit(void *fft_table, int nfft); + +/** + * @brief Initial window function + * Currently support hanning, hamming, sine, povey, rectangular, + * wn9(512-hanning to get wakenet9& multinet5 compatible) + **/ +float *esp_win_func_init(char *win_type, float *window_data, int frame_length); + +float *esp_fftr(float *x, int nfft, void *fft_table); + +float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_table); + +void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc); + +float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last); + +esp_mel_filter_t *esp_mel_filter_init( + int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram); + +void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter); + +float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon); diff --git a/include/esp32c6/esp_wn_iface.h b/include/esp32c6/esp_wn_iface.h new file mode 100644 index 0000000..44bab8d --- /dev/null +++ b/include/esp32c6/esp_wn_iface.h @@ -0,0 +1,215 @@ +#pragma once +#include "stdint.h" +#include "dl_lib_convq_queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//Opaque model data container +typedef struct model_iface_data_t model_iface_data_t; + +/** + * @brief The state of wakeup + */ +typedef enum +{ + WAKENET_NO_DETECT = 0, // wake word is not detected + WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified + WAKENET_DETECTED = 1 // wake word is detected +} wakenet_state_t; + +//Set wake words recognition operating mode +//The probability of being wake words is increased with increasing mode, +//As a consequence also the false alarm rate goes up +typedef enum { + DET_MODE_90 = 0, // Normal + DET_MODE_95 = 1, // Aggressive + DET_MODE_2CH_90 = 2, + DET_MODE_2CH_95 = 3, + DET_MODE_3CH_90 = 4, + DET_MODE_3CH_95 = 5, +} det_mode_t; + +typedef struct { + int wake_word_num; //The number of all wake words + char **wake_word_list; //The name list of wake words +} wake_word_info_t; + +/** + * @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient + * + * @param model_name The specified wake word model coefficient + * @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95 + * @returns Handle to the model data + */ +typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode); + +/** + * @brief Get the amount of samples that need to be passed to the detect function + * + * Every speech recognition model processes a certain number of samples at the same time. This function + * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes. + * + * @param model The model object to query + * @return The amount of samples to feed the detect function + */ +typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model); + +/** + * @brief Get the channel number of samples that need to be passed to the detect function + * + * Every speech recognition model processes a certain number of samples at the same time. This function + * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes. + * + * @param model The model object to query + * @return The amount of samples to feed the detect function + */ +typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model); + +/** + * @brief Get the start point of wake word when one wake word is detected. + * + * @Warning: This function should be called when the channel index is verified. + * The returned value is the number of samples from start point of wake word to detected point. + * + * @param model The model object to query + * @return The number of samples from start point to detected point (end point) + */ +typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model); + + +/** + * @brief Get the sample rate of the samples to feed to the detect function + * + * @param model The model object to query + * @return The sample rate, in hz + */ +typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model); + +/** + * @brief Get the number of wake words + * + * @param model The model object to query + * @returns the number of wake words + */ +typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model); + +/** + * @brief Get the name of wake word by index + * + * @Warning The index of wake word start with 1 + + * @param model The model object to query + * @param word_index The index of wake word + * @returns the detection threshold + */ +typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index); + +/** + * @brief Set the detection threshold to manually abjust the probability + * + * @param model The model object to query + * @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.5~0.9999 + * @param word_index The index of wake word + * @return 0: setting failed, 1: setting success + */ +typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index); + +/** + * @brief Get the wake word detection threshold of different modes + * + * @param model The model object to query + * @param word_index The index of wake word + * @returns the detection threshold + */ +typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index); + +/** + * @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found. + * + * @Warning The index of wake word start with 1, 0 means no wake words is detected. + * + * @param model The model object to query + * @param samples An array of 16-bit signed audio samples. The array size used can be queried by the + * get_samp_chunksize function. + * @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words. + */ +typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples); + +/** + * @brief Get the volume gain + * + * @param model The model object to query + * @param target_db The target dB to calculate volume gain + * @returns the volume gain + */ +typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db); + +/** + * @brief Get the triggered channel index. Channel index starts from zero + * + * @param model The model object to query + * @return The channel index + */ +typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model); + +/** + * @brief Clean all states of model + * + * @param model The model object to query + */ +typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model); + +/** + * @brief Destroy a speech recognition model + * + * @param model Model object to destroy + */ +typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model); + +/** + * @brief Feed MFCC of an audio stream to the vad model and detect whether is + * voice. + * + * @param model The model object to query + * @param cq An array of 16-bit MFCC. + * @return The index of wake words, return 0 if no wake word is detected, else + * the index of the wake words. + */ +typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq); + +/** + * @brief Get MFCC of an audio stream + * + * @param model The model object to query + * @return MFCC data + */ +typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model); + + +/** + * This structure contains the functions used to do operations on a wake word detection model. + */ +typedef struct { + esp_wn_iface_op_create_t create; + esp_wn_iface_op_get_start_point_t get_start_point; + esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize; + esp_wn_iface_op_get_channel_num_t get_channel_num; + esp_wn_iface_op_get_samp_rate_t get_samp_rate; + esp_wn_iface_op_get_word_num_t get_word_num; + esp_wn_iface_op_get_word_name_t get_word_name; + esp_wn_iface_op_set_det_threshold_t set_det_threshold; + esp_wn_iface_op_get_det_threshold_t get_det_threshold; + esp_wn_iface_op_get_triggered_channel_t get_triggered_channel; + esp_wn_iface_op_get_vol_gain_t get_vol_gain; + esp_wn_iface_op_detect_t detect; + esp_wn_iface_op_detect_mfcc_t detect_mfcc; + esp_wn_iface_op_get_mfcc_data_t get_mfcc_data; + esp_wn_iface_op_clean_t clean; + esp_wn_iface_op_destroy_t destroy; +} esp_wn_iface_t; + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/include/esp32c6/esp_wn_models.h b/include/esp32c6/esp_wn_models.h new file mode 100644 index 0000000..3a4d7e4 --- /dev/null +++ b/include/esp32c6/esp_wn_models.h @@ -0,0 +1,52 @@ +#pragma once +#include "esp_wn_iface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// The prefix of wakenet model name is used to filter all wakenet from availabel models. +#define ESP_WN_PREFIX "wn" + +/** + * @brief Get the wakenet handle from model name + * + * @param model_name The name of model + * @returns The handle of wakenet + */ +const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); + +/** + * @brief Get the wake word name from model name + * + * @param model_name The name of model + * @returns The wake word name, like "alexa","hilexin","xiaoaitongxue" + */ +char* esp_wn_wakeword_from_name(const char *model_name); + +#ifdef __cplusplus +} +#endif + +/* + +static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name); + +//Initialize wakeNet model data +static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90); + +//Set parameters of buffer +int audio_chunksize=model->get_samp_chunksize(model_data); +int frequency = model->get_samp_rate(model_data); +int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t)); + +//Detect +int r=model->detect(model_data, buffer); +if (r>0) { + printf("Detection triggered output %d.\n", r); +} + +//Destroy model +model->destroy(model_data) + +*/ diff --git a/include/esp32s2/c_speech_features_config.h b/include/esp32s2/c_speech_features_config.h new file mode 100644 index 0000000..e21e020 --- /dev/null +++ b/include/esp32s2/c_speech_features_config.h @@ -0,0 +1,29 @@ +#pragma once +#include +#include + +/* #undef ENABLE_DOUBLE */ + +#ifdef ENABLE_DOUBLE +# define csf_float double +# define csf_ceil ceil +# define csf_floor floor +# define csf_sin sin +# define csf_log log +# define csf_log10 log10 +# define csf_pow pow +# define csf_sqrt sqrt +# define csf_abs fabs +# define csf_float_min DBL_MIN +#else +# define csf_float float +# define csf_ceil ceilf +# define csf_floor floorf +# define csf_sin sinf +# define csf_log logf +# define csf_log10 log10f +# define csf_pow powf +# define csf_sqrt sqrtf +# define csf_abs fabsf +# define csf_float_min FLT_MIN +#endif diff --git a/include/esp32s2/dl_lib.h b/include/esp32s2/dl_lib.h new file mode 100644 index 0000000..47e7c86 --- /dev/null +++ b/include/esp32s2/dl_lib.h @@ -0,0 +1,418 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_H +#define DL_LIB_H + +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" + +#ifdef ESP_PLATFORM +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/queue.h" +#include "esp_system.h" +#include "esp_heap_caps.h" +#include "sdkconfig.h" +#define DL_SPIRAM_SUPPORT 1 +#endif + +#ifdef CONFIG_IDF_TARGET_ESP32S3 +#include "esp32s3/rom/cache.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int padding_state; + +// /** +// * @brief Allocate a chunk of memory which has the given capabilities. +// * Equivalent semantics to libc malloc(), for capability-aware memory. +// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT). +// * +// * @param size In bytes, of the amount of memory to allocate +// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned +// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM +// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off +// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA +// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation +// * @return Pointer to currently allocated heap memory +// **/ +// void *heap_caps_malloc(size_t size, uint32_t caps); + +/** + * @brief Allocate aligned memory from internal memory or external memory. + * if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM + * else, allocate memory from PSRAM + * + * @param cnt Number of continuing chunks of memory to allocate + * @param size Size, in bytes, of a chunk of memory to allocate + * @param align Aligned size, in bits + * @return Pointer to currently allocated heap memory + */ +void *dl_lib_calloc(int cnt, int size, int align); + +/** + * @brief Always allocate aligned memory from external memory. + * + * @param cnt Number of continuing chunks of memory to allocate + * @param size Size, in bytes, of a chunk of memory to allocate + * @param align Aligned size, in bits + * @return Pointer to currently aligned heap memory + */ +void *dl_lib_calloc_psram(int cnt, int size, int align); + +/** + * @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram` + * + * @param ptr Pointer to free + */ +void dl_lib_free(void *ptr); + +/** + * @brief Does a fast version of the exp() operation on a floating point number. + * + * As described in https://codingforspeed.com/using-faster-exponential-approximation/ + * Should be good til an input of 5 or so with a steps factor of 8. + * + * @param in Floating point input + * @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes. + * @return Exp()'ed output + */ +fptp_t fast_exp(double x, int steps); + +/** + * @brief Does a fast version of the exp() operation on a floating point number. + * + * @param in Floating point input + * @return Exp()'ed output + */ +double fast_exp_pro(double x); + +/** + * @brief Does a softmax operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out); + + +/** + * @brief Does a softmax operation on a quantized matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a sigmoid operation on a floating point number + * + * @param in Floating point input + * @return Sigmoid output + */ + +fptp_t dl_sigmoid_op(fptp_t in); + + +/** + * @brief Does a sigmoid operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out); + +/** + * @brief Does a tanh operation on a floating point number + * + * @param in Floating point input number + * @return Tanh value + */ +fptp_t dl_tanh_op(fptp_t v); + +/** + * @brief Does a tanh operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out); + + +/** + * @brief Does a relu (Rectifier Linear Unit) operation on a floating point number + * + * @param in Floating point input + * @param clip If value is higher than this, it will be clipped to this value + * @return Relu output + */ +fptp_t dl_relu_op(fptp_t in, fptp_t clip); + +/** + * @brief Does a ReLu operation on a matrix. + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out); + +/** + * @brief Fully connected layer operation + * + * @param in Input vector + * @param weight Weights of the neurons + * @param bias Biases for the neurons. Can be NULL if a bias of 0 is required. + * @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix. + */ +void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out); + +/** + * @brief Pre-calculate the sqrtvari variable for the batch_normalize function. + * The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence, + * this matrix only needs to be calculated once. This function does that. + * + * @param + * @return + */ +void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out); + +/** + * @brief Batch-normalize a matrix + * + * @param m The matrix to normalize + * @param offset Offset matrix + * @param scale Scale matrix + * @param mean Mean matrix + * @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar + * @return + */ +void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale, + const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari); + +/** + * @brief Do a basic LSTM layer pass. + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @return Output values of the neurons + */ +dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2d_t *weight, const dl_matrix2d_t *bias); + +/** + * @brief Do a basic LSTM layer pass, partial quantized version. + * This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias. + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons, need to be quantised + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @return Output values of the neurons + */ +dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias); + +/** + * @brief Do a fully-connected layer pass, fully-quantized version. + * + * @param in Input vector + * @param weight Weights of the neurons + * @param bias Bias values of the neurons. Can be NULL if no bias is needed. + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return Output values of the neurons + */ +void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift); + +/** + * @brief Do a basic LSTM layer pass, fully-quantized version + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return Output values of the neurons + */ +dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift); + +/** + * @brief Batch-normalize a matrix, fully-quantized version + * + * @param m The matrix to normalize + * @param offset Offset matrix + * @param scale Scale matrix + * @param mean Mean matrix + * @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return + */ +void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale, + const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift); + +/** + * @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number + * This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @param clip If value is higher than this, it will be clipped to this value + * @return Relu output + */ +qtp_t dl_relu_q_op(qtp_t in, qtp_t clip); + +/** + * @brief Does a ReLu operation on a matrix, quantized version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out); + +/** + * @brief Does a sigmoid operation on a fixed-point number. + * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @return Sigmoid output + */ +int dl_sigmoid_op_q(const int in); +int16_t dl_sigmoid_op_q8(const int16_t in); +/** + * @brief Does a sigmoid operation on a matrix, quantized version + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a tanh operation on a matrix, quantized version + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a tanh operation on a fixed-point number. + * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @return tanh output + */ +int dl_tanh_op_q(int v); +int16_t dl_tanh_op_q8(int16_t v); + +void load_mat_psram_mn4(void); +void load_mat_psram_mn3(void); +void free_mat_psram_mn4(void); +void free_mat_psram_mn3(void); +qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent); +qtp_t dl_hard_tanh_op(qtp_t in, int exponent); + +int16_t dl_table_tanh_op(int16_t in, int exponent); +int16_t dl_table_sigmoid_op(int16_t in, int exponent); + +void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); +void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); +void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + + +/** + * @brief Filter out the number greater than clip in the matrix, quantized version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out); + +/** + * @brief Filter out the number greater than clip in the matrix, float version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out); +/** + * @brief Do a basic CNN layer pass. + * + * @Warning This just supports the single channel input image, and the output is single row matrix. + That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height + * + * @param in Input single channel image + * @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height + * @param bias Bias for the CNN layer. + * @param filter_height The height of convolution kernel + * @param filter_width The width of convolution kernel + * @param out_channels The number of output channels of convolution kernel + * @param stride_x The step length of the convolution window in x(width) direction + * @param stride_y The step length of the convolution window in y(height) direction + * @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME" + * @param out The result of CNN layer, out->h=1. + * @return The result of CNN layer. + */ +dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height, + const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out); + + +/** + * @brief Do a basic CNN layer pass, quantised wersion. + * + * @Warning This just supports the single channel input image, and the output is single row matrix. + That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height + * + * @param in Input single channel image + * @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height, + * @param bias Bias of the neurons. + * @param filter_height The height of convolution kernel + * @param filter_width The width of convolution kernel + * @param out_channels The number of output channels of convolution kernel + * @param stride_x The step length of the convolution window in x(width) direction + * @param stride_y The step length of the convolution window in y(height) direction + * @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME" + * @param out The result of CNN layer, out->h=1 + * @return The result of CNN layer + */ +dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height, + const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32s2/dl_lib_coefgetter_if.h b/include/esp32s2/dl_lib_coefgetter_if.h new file mode 100644 index 0000000..a21de8d --- /dev/null +++ b/include/esp32s2/dl_lib_coefgetter_if.h @@ -0,0 +1,80 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_COEFGETTER_IF_H +#define DL_LIB_COEFGETTER_IF_H + +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" +#include "cJSON.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by +//dl_batch_normalize_get_sqrtvar first. +#define COEF_GETTER_HINT_BNVAR (1<<0) + +/* +This struct describes the basic information of model data: +word_num: the number of wake words or speech commands +word_list: the name list of wake words or speech commands +thres_list: the threshold list of wake words or speech commands +info_str: the string used to reflect the version and information of model data + which consist of the architecture of network, the version of model data, wake words and their threshold +*/ +typedef struct { + int word_num; + char **word_list; + int *win_list; + float *thresh_list; + char *info_str; +} model_info_t; + +/* +Alphabet struct describes the basic grapheme or phoneme. +item_num: the number of baisc item(grapheme or phonemr) +items: the list of basic item +*/ +typedef struct { + int item_num; + char **items; +}alphabet_t; + +/* +This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network. +For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the +coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument +to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument +is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the +memory for the returned matrices, when applicable. +*/ +typedef struct { + const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint); + const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint); + const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint); + void (*free_f)(const dl_matrix2d_t *m); + void (*free_q)(const dl_matrix2dq_t *m); + void (*free_q8)(const dl_matrix2dq8_t *m); + const model_info_t* (*getter_info)(void *arg); + const alphabet_t* (*getter_alphabet)(void *arg); + const cJSON* (*getter_config)(void *arg); +} model_coeff_getter_t; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32s2/dl_lib_conv_queue.h b/include/esp32s2/dl_lib_conv_queue.h new file mode 100644 index 0000000..7cb9bf9 --- /dev/null +++ b/include/esp32s2/dl_lib_conv_queue.h @@ -0,0 +1,180 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONV_QUEUE_H +#define DL_LIB_CONV_QUEUE_H + + +#include "dl_lib_matrix.h" +#ifdef __cplusplus +extern "C" { +#endif + +typedef float fptp_t; + +//Flags for matrices +// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */ + +//Float convolution FIFO queue. +typedef struct { + int n; /*< the length of queue */ + int c; /*< the channel number of queue element*/ + int front; /*< the front(top) position of queue */ + int flag; /*< not used*/ + fptp_t *item; /*< Pointer to item array */ +} dl_conv_queue_t; + +/** + * @brief Allocate a convolution queue + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_conv_queue_t *dl_conv_queue_alloc(int n, int c); + +/** + * @brief Allocate a convolution queue from psram + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c); + +/** + * @brief Free a convolution queue + * + * @param cq The convolution queue to free + */ +void dl_conv_queue_free(dl_conv_queue_t *cq); + +void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input convolution queue + * @return Pointer of oldest element + */ +fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq); + +/** + * @brief Remove the oldest element, then insert the input element at the end of queue + * + * @param cq Input convolution queue + * @param item The new element + */ +void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item); + + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a sigmoid operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a tanh operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a tanh operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a softmax operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a softmax operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset); + +fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset); +fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset); +dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b); +dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2d_t *weight, const dl_matrix2d_t *bias); +/** + * @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is first element of output queue and should not be freed separately. + * + * @param in Input convolution queue + * @param out Output convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @return The result of atrous convolution + */ +fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size, + dl_matrix2d_t* kernel, dl_matrix2d_t* bias); +fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size, + dl_matrix2d_t* kernel, dl_matrix2d_t* bias); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is first element of output queue and should not be freed separately. + * + * @param in Input convolution queue + * @param out Output convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @return The result of dilation layer + */ +fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size, + dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias, + dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias); + + +void test_atrous_conv(int size, int rate, int in_channel, int out_channel); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32s2/dl_lib_convq8_queue.h b/include/esp32s2/dl_lib_convq8_queue.h new file mode 100644 index 0000000..28c5da7 --- /dev/null +++ b/include/esp32s2/dl_lib_convq8_queue.h @@ -0,0 +1,303 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONVQ8_QUEUE_H +#define DL_LIB_CONVQ8_QUEUE_H + + +#include "dl_lib_matrixq.h" +#include "dl_lib_matrixq8.h" +#include "dl_lib_conv_queue.h" +#include "dl_lib_convq_queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//[nch, n, c] +typedef struct { + int n; /*< the length of queue */ + int c; /*< the number of queue element*/ + int front; /*< the front(top) position of queue */ + int nch; /*< the channel of queue */ + int exponent; /*< The values in items should be multiplied by pow(2,exponent) + to get the real values */ + q8tp_t *itemq; /*< Pointer to item array */ +} dl_convq8_queue_t; + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c); + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param c The channel of queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch); + +/** + * @brief Allocate a bit fixed-point convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch); + +/** + * @brief Free a fixed-point convolution queue + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq8_queue_free(dl_convq8_queue_t *cq); + +/** + * @brief Set itemq of convolution queue to 0 + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input fixed-point convolution queue + * @return Pointer of oldest element + */ +q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq); +q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n); + +/** + * @brief Insert the float-point element at the end of queue. + * The precision of fixed-point numbers is described by the Qm.f notation, + * + * @param cq Input fixed-point convolution queue + * @param item The float-point element + * @param m_bit The number of integer bits including the sign bits + * @param f_bit The number of fractional bits + */ +void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param ch Channel index of queue + * @return Pointer of the element + */ +q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel Kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + * @return The result of atrous convolution + */ +void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias, + int out_exponent, int offset, int prenum); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + * @return The result of dilation layer + */ +void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias, + int offset, int prenum); + + + + +dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2); + +int8_t dl_sigmoid_lutq8(int in); +/** + * @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch  The channel number + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch); + +/** + * @brief Free a 8-bit fixed-point Multi-Channel convolution queue + * + * @param cqm The fixed-point convolution queue to free + * @param nch The channel number + */ +void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch); + +/** + * @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input + * + * @param cqm Input 8-bit fixed-point Multi-Channel convolution queue + * @param offset Offset used to calculate the beginning of input conv queue + * @param nch The channel number + */ +void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch); + +/** + * @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * Usually, this layer is used as first layer for 8-bit network. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * Input is a 16-bit queue point, Output is an 8-bit queue point. + * + * @param in Input 16bit fixed-point convolution queue array + * @param out Output 8bit fixed-point convolution queue array + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Exponent of output + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum); + +/** + * @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input 8bit fixed-point convolution queue array + * @param out Output 8bit fixed-point convolution queue array + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param out_exponent Exponent of output + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, + int nch, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias, + int out_exponent, int offset, int prenum); + +/** + * @brief Fast implement of 8-bit dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input 8-bit fixed-point convolution queue + * @param out Output 8-bit fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param offset Offset used to calculate the beginning of input conv queue + * @param prenum The num to control the parameter size of preload operation + */ +void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias, + int offset, int prenum); + +void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch); + + + +dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c); + +qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + + +qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size, + dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + +dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c, + dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight, + const dl_matrix2dq_t *bias, int prenum); + +qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum); + +void print_convq8(dl_convq8_queue_t *cq, int offset); +void print_convq(dl_convq_queue_t *cq, int offset); +void dl_relu_convq8(dl_convq8_queue_t *cq); + +void lstmq8_free(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32s2/dl_lib_convq_queue.h b/include/esp32s2/dl_lib_convq_queue.h new file mode 100644 index 0000000..ff190fe --- /dev/null +++ b/include/esp32s2/dl_lib_convq_queue.h @@ -0,0 +1,382 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONVQ_QUEUE_H +#define DL_LIB_CONVQ_QUEUE_H + +#include "dl_lib_matrixq.h" +#include "dl_lib_conv_queue.h" +#include "dl_lib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//fixed-point convolution FIFO queue. +//[nch, n, c] +typedef struct { + int n; /*< the length of queue */ + int c; /*< the number of queue element*/ + int front; /*< the front(top) position of queue */ + int nch; /*< the multiple of queue*/ + int exponent; /*< The values in items should be multiplied by pow(2,exponent) + to get the real values */ + qtp_t *itemq; /*< Pointer to item array */ +} dl_convq_queue_t; + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc(int n, int c); + +/** + * @brief Allocate a fixed-point convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c); + +/** + * @brief Allocate a fixed-point multi-channel convolution queue + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of conv queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch); + +/** + * @brief Allocate a fixed-point multi-channel convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of conv queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch); + + +void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row); + +/** + * @brief Free a fixed-point convolution queue + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq_queue_free(dl_convq_queue_t *cq); + +/** + * @brief Set itemq of convolution queue to 0 + * + * @param cq The fixed-point convolution queue point + */ +void dl_convq_queue_bzero(dl_convq_queue_t *cq); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input fixed-point convolution queue + * @return Pointer of oldest element + */ +qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq); +qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n); +/** + * @brief Remove the oldest element, then insert the input element at the end of queue + * + * @param cq Input fixed-point convolution queue + * @param item The new element + */ +void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift); + +/** + * @brief Insert the float-point element at the end of queue. + * The precision of fixed-point numbers is described by the Qm.f notation, + * + * @param cq Input fixed-point convolution queue + * @param item The float-point element + * @param m_bit The number of integer bits including the sign bits + * @param f_bit The number of fractional bits + */ +void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param last_num Offset from the front of the queue + * @return Pointer of the element + */ +qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param ch Channel index of convolution queue + * @return Pointer of the element + */ +qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch); + +/** + * @brief Does a tanh operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * tanh operation by this pointer, then return the pointer + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +void dl_tanh_convq(dl_convq_queue_t *cq, int offset); + +/** + * @brief Does a tanh operation on the one of element in multi channel convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * tanh operation by this pointer, then return the pointer + * + * @param cq Input fixed-point multi channnel convolution queue + * @param offset Offset from the front of the queue + * @param nch The channel number of cqm + * @return Pointer of the element + */ +void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch); + +/** + * @brief Does a relu operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * relu operation by this pointer, then return the pointer + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num); + +/** + * @brief Does a softmax operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, input data + stay as it is. Results are saved into the *out* array. + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param out Old array to re-use. Passing NULL will allocate a new matrix. + * @return softmax results + */ +fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param shift Shift ratio used in dot operation between two 16-bit fixed point vector + * @return The result of atrous convolution + */ +qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector + * @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector + * @return The result of dilation layer + */ +qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, + int filter_shift, int gate_shift, int offset, int prenum); + + +qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, + int filter_shift, int gate_shift, int prenum); + +qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum); + + +qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum); + +/** + * @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue + * + * @param cq1 First fixed-point convolution queue + * @param cq2 Seconf fixed-point convolution queue + * @return The result of float-point convolution queue + */ +dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2); + +/** + * @brief Fast implement of LSTM layer by dl_atrous_conv1dq function + * + * @Warning LSTM kernel is split into two part, the first part input is the last layer output, + * and kernel is parameter *in_weight*. The second part input is the last frame LSTM output, + * the kernel is parameters *h_weight*. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param in_weight the LSTM kernel needed by first part + * @param h_weight the LSTM kernel needed by second part + * @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required. + * @in_shift Shift ratio used in first part + * @h_shift Shift ratio used in second part + * @return The result of LSTM layer + */ +dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c, + dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight, + const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum); +dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift); + +dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c, + dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight, + dl_matrix2dq_t *bias, int prenum); + +/** + * @brief Allocate a fixed-point multi channel convolution queue + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @param nch the channel numbet of convolution queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch); + +/** + * @brief Free a fixed-point multi channel convolution queue + * + * @param cqm The fixed-point convolution queue to free + * @param nch The channel number of cqm + */ +void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param nch The channel number of input + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param shift Shift ratio used in dot operation between two 16-bit fixed point vector + * @param offset the offset to calculate input convq + * @param prenum the preload size, 0: do not use preload function + * @return The result of atrous convolution + */ +qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in, + dl_convq_queue_t **out, + int nch, + int rate, + int size, + dl_matrix2dq_t* kernel, + dl_matrix2dq_t* bias, + int shift, + int offset, + int prenum); + +/** + * @brief Fast implement of dilation layer as follows for multi channel input + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param nch The channel number of input + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector + * @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector + * @param offset The offset to calculate input convq + * @param prenum The preload size, 0: do not use preload function + * @return The result of dilation layer + */ +qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in, + dl_convq_queue_t **out, + int nch, + int rate, + int size, + dl_matrix2dq_t* filter_kernel, + dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, + dl_matrix2dq_t* gate_bias, + int filter_shift, + int gate_shift, + int offset, + int prenum); + +void test_atrous_convq(int size, int rate, int in_channel, int out_channel); +void test_lstm_convq(int size, int in_dim, int lstm_cell); +void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch); +void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch); +void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32s2/dl_lib_matrix.h b/include/esp32s2/dl_lib_matrix.h new file mode 100644 index 0000000..59f7d79 --- /dev/null +++ b/include/esp32s2/dl_lib_matrix.h @@ -0,0 +1,257 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIX_H +#define DL_LIB_MATRIX_H + +#ifdef ESP_PLATFORM +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/queue.h" +#include "esp_system.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef float fptp_t; + +#if CONFIG_BT_SHARE_MEM_REUSE +extern multi_heap_handle_t gst_heap; +#endif + +//Flags for matrices +#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */ +#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */ + +//'Normal' float matrix +typedef struct { + int w; /*< Width */ + int h; /*< Height */ + int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */ + int flags; /*< Flags. OR of DL_MF_* values */ + fptp_t *item; /*< Pointer to item array */ +} dl_matrix2d_t; + +//Macro to quickly access the raw items in a matrix +#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride] + + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_alloc(int w, int h); + + +/** + * @brief Free a matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrix_free(dl_matrix2d_t *m); + +/** + * @brief Zero out the matrix + * Sets all entries in the matrix to 0. + * + * @param m Matrix to zero + */ +void dl_matrix_zero(dl_matrix2d_t *m); + +/** + * @brief Copy the matrix into psram + * Copy the matrix from flash or iram/psram into psram + * + * @param m Matrix to zero + */ +dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m); + +/** + * @brief Generate a new matrix using a range of items from an existing matrix. + * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer + * to the existing data. Changing the data in the resulting matrix, as a result, will also change + * the data in the existing matrix that has been sliced. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. + * @return The resulting slice matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in); + +/** + * @brief select a range of items from an existing matrix and flatten them into one dimension. + * + * @Warning The results are flattened in row-major order. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. + * @return The resulting flatten matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in); + +/** + * @brief Generate a matrix from existing floating-point data + * + * @param w Width of resulting matrix + * @param h Height of resulting matrix + * @param data Data to populate matrix with + * @return A newaly allocated matrix populated with the given input data, or NULL if out of memory. + */ +dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data); + + +/** + * @brief Multiply a pair of matrices item-by-item: res=a*b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Multiplicated data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of two matrices : res=a.b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res); + +/** + * @brief Add a pair of matrices item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Added data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + + +/** + * @brief Divide a pair of matrices item-by-item: res=a/b + * + * @param a First matrix + * @param b Second matrix + * @param res Divided data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + +/** + * @brief Subtract a matrix from another, item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Subtracted data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + +/** + * @brief Add a constant to every item of the matrix + * + * @param subj Matrix to add the constant to + * @param add The constant + */ +void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add); + + +/** + * @brief Concatenate the rows of two matrices into a new matrix + * + * @param a First matrix + * @param b Second matrix + * @return A newly allocated array with as avlues a|b + */ +dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + +dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b); + +/** + * @brief Print the contents of a matrix to stdout. Used for debugging. + * + * @param a The matrix to print. + */ +void dl_printmatrix(const dl_matrix2d_t *a); + +/** + * @brief Return the average square error given a correct and a test matrix. + * + * ...Well, more or less. If anything, it gives an indication of the error between + * the two. Check the code for the exact implementation. + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return value indicating the relative difference between matrices + */ +float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + + +/** + * @brief Check if two matrices have the same shape, that is, the same amount of rows and columns + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return true if the two matrices are shaped the same, false otherwise. + */ +int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + +/** + * @brief Get a specific item from the matrix + * + * Please use these for external matrix access instead of DL_ITM + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @return Value in that position + */ +inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) { + return DL_ITM(m, x, y); +} + +/** + * @brief Set a specific item in the matrix to the given value + * + * Please use these for external matrix access instead of DL_ITM + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @param val Value to write to that position + */ +inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) { + DL_ITM(m, x, y)=val; +} + +void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/include/esp32s2/dl_lib_matrixq.h b/include/esp32s2/dl_lib_matrixq.h new file mode 100644 index 0000000..8ad397b --- /dev/null +++ b/include/esp32s2/dl_lib_matrixq.h @@ -0,0 +1,387 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIXQ_H +#define DL_LIB_MATRIXQ_H + +#include +#include "dl_lib_matrix.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int16_t qtp_t; + +//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted +//for easy use as a multiplicand without stressing out the flash cache too much. +typedef struct { + int w; + int h; + int stride; //Normally equals h, not w! + int flags; + int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values. + qtp_t *itemq; +} dl_matrix2dq_t; + +#define DL_QTP_SHIFT 15 +#define DL_QTP_RANGE ((1<itemq[(y)+(x)*m->stride] +#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null + +#define DL_SHIFT_AUTO 32 + +/** + * @info About quantized matrices and shift values + * + * Grab a coffee (or tea, or hot water) and sit down when you read this for the first + * time. Quantized matrices can speed up your operations, but come with some quirks, and + * it's good to understand how they work before using them. + * + * The data in the quantized matrix type is stored similarily to floating-point types: + * when storing a real value, the value is stored as a mantissa (base number) and an + * exponent. The 'real' value that can be re-derived from those two numbers is something + * similar to mantissa*2^exponent. Up to this point, there's not that much difference from + * the standard floating point implementations like e.g. IEEE-754. + * + * The difference with respect to quantized matrices is that for a quantized matrix, it is + * assumed all values stored have more-or-less the same order of magnitude. This allows the + * matrix to only store all the mantissas, while the exponents are shared; there is only one + * exponent for the entire matrix. This makes it quicker to handle matrix operations - the + * logic to fix the exponents only needs to happen once, while the rest can be done in simple + * integer arithmetic. It also nets us some memory savings - while normally a floating point + * number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the + * memory requirements. + * + * While most of the details of handling the intricacies of the quantized matrixes are done + * transparently by the code in dl_lib_matrixq.c, some implementation details leak out, + * specifically in places where addition/subtraction/division happens. + * + * The problem is that the routines do not know what the size of the resulting operation is. For + * instance, when adding two matrices of numbers, the resulting numbers *could* be large enough + * to overflow the mantissa of the result if the exponent is the same. However, if by default we + * assume the mantissas needs to be scaled back, we may lose precision. + * + * In order to counter this, all operations that have this issue have a ``shift`` argument. If + * the argument is zero, the routine will be conservative, that is, increase the exponent of + * the result to such an extent it's mathematically impossible a value in the result will exceed + * the maximum value that can be stored. However, when this argument is larger than zero, the + * algorithm will hold back on this scaling by the indicated amount of bits, preserving precision + * but increasing the chance of some of the calculated values not fitting in the mantissa anymore. + * If this happens, the value will be clipped to the largest (or, for negative values, smallest) + * value possible. (Neural networks usually are okay with this happening for a limited amount + * of matrix indices). + * + * For deciding on these shift values, it is recommended to start with a shift value of one, then + * use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value. + * If it indicates bits are under-used, increase it. Note that for adding and subtraction, only + * shift values of 0 or 1 make sense; these routines will error out if you try to do something + * else. + * + * For neural networks and other noise-tolerant applications, note that even when + * dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead + * to slightly improved precision. Feel free to experiment. + **/ + + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_alloc(int w, int h); +dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h); +/** + * @brief Convert a floating-point matrix to a quantized matrix + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + */ +dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out); + +/** + * TODO: DESCRIBE THIS FUNCTION + */ +dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit); + + +/** + * @brief Convert a quantized matrix to a floating-point one. + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + **/ +dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out); + + +/** + * @brief Free a quantized matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrixq_free(dl_matrix2dq_t *m); + +/** + * @brief Zero out the matrix + * Sets all entries in the matrix to 0. + * + * @param m Matrix to zero + */ +void dl_matrixq_zero(dl_matrix2dq_t *m); + +/** + * @brief Copy the matrix into psram + * Copy the matrix from flash or iram/psram into psram + * + * @param m Matrix to copy + */ +dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + * @param shift Shift ratio + */ +void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. + * + * Result is a fixed-point matrix. + * + * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be + * much slower than dl_matrixq_dot . + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + * @param shift Shift ratio + */ +void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. + * + * Result is a floating-point matrix. + * + * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be + * much slower than dl_matrixq_dot_matrix_out. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix. + * + * @param a First multiplicand; float matrix + * @param b Second multiplicand; quantized matrix + * @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b! + */ +void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + + +/** + * @brief Print the contents of a quantized matrix to stdout. Used for debugging. + * + * @param a The matrix to print. + */ +void dl_printmatrixq(const dl_matrix2dq_t *a); + + +/** + * @brief Add a pair of quantizedmatrices item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Added data. Can be equal to a or b to overwrite that. + * @param shift Shift value. Only 0 or 1 makes sense here. + */ +void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Generate a new matrix using a range of items from an existing matrix. + * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer + * to the existing data. Changing the data in the resulting matrix, as a result, will also change + * the data in the existing matrix that has been sliced. + * + * @Warning In contrast to the floating point equivalent of this function, the fixed-point version + * of this has the issue that as soon as the output exponent of one of the slices changes, the data + * in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you + * use this function, either treat the slices as read-only, or assume the sliced matrix contains + * garbage after modifying the data in one of the slices. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. + * @return The resulting slice matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); + +/** + * @brief select a range of items from an existing matrix and flatten them into one dimension. + * + * @Warning The results are flattened in row-major order. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. + * @return The resulting flatten matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); + +/** + * @brief Subtract a quantized matrix from another, item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Subtracted data. Can be equal to a or b to overwrite that. + * @param shift Shift value. Only 0 or 1 makes sense here. + */ +void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Multiply a pair of quantized matrices item-by-item: res=a*b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Multiplicated data. Can be equal to a or b to overwrite that matrix. + */ +void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res); + +/** + * @brief Divide a pair of quantized matrices item-by-item: res=a/b + * + * @param a First matrix + * @param b Second matrix + * @param res Divided data. Can be equal to a or b to overwrite that. + */ +void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift); + +/** + * @brief Check if two quantized matrices have the same shape, that is, the same amount of + * rows and columns + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return true if the two matrices are shaped the same, false otherwise. + */ +int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); + +/** + * @brief Concatenate the rows of two quantized matrices into a new matrix + * + * @param a First matrix + * @param b Second matrix + * @return A newly allocated quantized matrix with as values a|b + */ +dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); + +/** + * @brief Add a constant to every item of the quantized matrix + * + * @param subj Matrix to add the constant to + * @param add The constant + */ +void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift); + +/** + * @brief Check the sanity of a quantized matrix + * + * Due to the nature of quantized matrices, depending on the calculations a quantized + * matrix is the result of and the shift values chosen in those calculations, a quantized + * matrix may have an exponent and mantissas that lead to a loss of precision, either because + * most significant mantissa bits are unused, or because a fair amount of mantissas are + * clipped. This function checks if this is the case and will report a message to stdout + * if significant loss of precision is detected. + * + * @param m The quantized matrix to check + * @param name A string to be displayed in the message if the sanity check fails + * @return True if matrix is sane, false otherwise + **/ + +int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name); + +/** + * @brief re-adjust the exponent of the matrix to fit the mantissa better + * + * This function will shift up all the data in the mantissas so there are no + * most-significant bits that are unused in all mantissas. It will also adjust + * the exponent to keep the actua values in the matrix the same. + * + * Some operations done on a matrix, especially operations that re-use the + * result of earlier operations done in the same way, can lead to the loss of + * data because the exponent of the quantized matrix is never re-adjusted. You + * can do that implicitely by calling this function. + * + * @param m The matrix to re-adjust +**/ +void dl_matrixq_readjust_exp(dl_matrix2dq_t *m); + + + +/** + * @brief Get the floating-point value of a specific item from the quantized matrix + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @return Value in that position + */ +fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y); + +/** + * @brief Set a specific item in the quantized matrix to the given + * floating-point value + * + * @warning If the given value is more than the exponent in the quantized matrix + * allows for, all mantissas in the matrix will be shifted down to make the value + * 'fit'. If, however, the exponent is such that the value would result in a + * quantized mantissa of 0, nothing is done. + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @param val Value to write to that position + */ +void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/esp32s2/dl_lib_matrixq8.h b/include/esp32s2/dl_lib_matrixq8.h new file mode 100644 index 0000000..377df7c --- /dev/null +++ b/include/esp32s2/dl_lib_matrixq8.h @@ -0,0 +1,80 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIXQ8_H +#define DL_LIB_MATRIXQ8_H + +#include +#include "dl_lib_matrix.h" +#include "dl_lib.h" +#include "dl_lib_matrixq.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int8_t q8tp_t; + +typedef struct { + int w; + int h; + int stride; //Normally equals h, not w! + int flags; + int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values. + q8tp_t *itemq; +} dl_matrix2dq8_t; + +#define DL_Q8TP_SHIFT 7 +#define DL_Q8TP_RANGE ((1<itemq[(y)+(x)*m->stride] + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h); + +/** + * @brief Free a quantized matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrixq8_free(dl_matrix2dq8_t *m); + +/** + * @brief Copy a quantized matrix + * Copy a quantized matrix from flash or iram/psram + * + * @param m Matrix to copy + */ +dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m); + +/** + * @brief Convert a floating-point matrix to a quantized matrix + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + */ +dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/include/esp32s2/esp_aec.h b/include/esp32s2/esp_aec.h new file mode 100644 index 0000000..36de9c1 --- /dev/null +++ b/include/esp32s2/esp_aec.h @@ -0,0 +1,105 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License +#ifndef _ESP_AEC_H_ +#define _ESP_AEC_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define USE_AEC_FFT // Not kiss_fft +#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz +#define AEC_FRAME_LENGTH_MS 32 + +typedef struct aec_handle_t aec_handle_t; +typedef enum { + AEC_MODE_SR_LOW_COST = 0, // Low Cost AEC fro speech recognition + AEC_MODE_SR_HIGH_PERF = 1, // High Perforamce AEC for speech recognition + AEC_MODE_VOIP_LOW_COST = 3, // Low Cost AEC for voice communication + AEC_MODE_VOIP_HIGH_PERF = 4, // High Perforamce AEC for voice communication +} aec_mode_t; + +/** + * @brief Creates an instance to the AEC structure. + * Please get frame size by aec_get_chunksize() function + * + * @param sample_rate The Sampling frequency (Hz) must be 16000. + * @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption. + * @param channel_num The input microphone channel number + * @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST + * @return + * - NULL: Create failed + * - Others: The instance of AEC + */ +aec_handle_t *aec_create(int sample_rate, int filter_length, int channel_num, aec_mode_t mode); + +/** + * @brief Creates an instance to the AEC structure, same with aec_create(). + * + * @param filter_length Number of filter, recommend to set 4. The larger the filter_length, the more resource consumption. + * @param channel_num The input microphone channel number + * @param mode The mode of AEC, recommend to set AEC_MODE_SR_LOW_COST + * @return + * - NULL: Create failed + * - Others: The instance of AEC + */ +aec_handle_t *aec_pro_create(int filter_length, int channel_num, aec_mode_t mode); + +/** + * @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic. + * + * @warning The indata, refdata and outdata must be 16-bit signed. please allocate memory by heap_caps_aligned_alloc(). + * + * @param inst The instance of AEC. Format for multi-channel data is "ch0 ch0 ch0 ..., ch1 ch1 ch1 ..." + * @param indata An array of 16-bit signed audio samples from mic. + * @param refdata An array of 16-bit signed audio samples sent to the speaker. + * @param outdata Returns near-end signal with echo removed. Format for multi-channel data is "ch0 ch0 ch0..., ch1 ch1 ch1 ..." + * @return None + * + */ +void aec_process(const aec_handle_t *handel, int16_t *indata, int16_t *refdata, int16_t *outdata); + +/** + * @brief Get frame size of AEC (the samples of one frame) + * @param handle The instance of AEC. + * @return Frame size + */ +int aec_get_chunksize(const aec_handle_t *handle); + +/** + * @brief Get AEC mode string + * + * @param aec_mode The mode of AEC. + * + * @return AEC mode string + */ +char * aec_get_mode_string(aec_mode_t aec_mode); + +/** + * @brief Free the AEC instance + * + * @param inst The instance of AEC. + * + * @return None + * + */ +void aec_destroy(aec_handle_t *handel); + +#ifdef __cplusplus +} +#endif + +#endif //_ESP_AEC_H_ diff --git a/include/esp32s2/esp_afe_aec.h b/include/esp32s2/esp_afe_aec.h new file mode 100644 index 0000000..9d60588 --- /dev/null +++ b/include/esp32s2/esp_afe_aec.h @@ -0,0 +1,82 @@ + +#ifndef _ESP_AFE_AEC_H_ +#define _ESP_AFE_AEC_H_ + + +#include "esp_afe_config.h" +#include "esp_aec.h" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + aec_handle_t* handle; + aec_mode_t mode; + afe_pcm_config_t pcm_config; + int frame_size; + int16_t *data; +}afe_aec_handle_t; + + +/** + * @brief Creates an instance to the AEC structure. + * + * @warning Currently only support 1 microphone channel and 1 playback channe. + * If input has multiple microphone channels and playback channels, just the first microphone channel and playback channel will be selected. + * + * The input format, same as afe config: + * M to represent the microphone channel + * R to represent the playback reference channel + * N to represent an unknown or unused channel + * + * For example, input_format="MMNR" indicates that the input data consists of four channels, + * which are the microphone channel, the microphone channel, an unused channel, and the playback channel + * + * @param input_format The input format + * @param filter_length The length of filter. The larger the filter, the higher the CPU loading. + * Recommended filter_length = 4 for esp32s3 and esp32p4. Recommended filter_length = 2 for esp32c5. + * @param type The type of afe, AFE_TYPE_SR or AFE_TYPE_VC + * @param mode The mode of afe, AFE_MODE_LOW_COST or AFE_MODE_HIGH_PERF + * + * @return afe_config_t* The default config of afe + */ +afe_aec_handle_t *afe_aec_create(const char *input_format, int filter_length, afe_type_t type, afe_mode_t mode); + + +/** + * @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic. + * + * @param inst The instance of AEC. + * @param indata Input audio data, format is define by input_format. Note indata will be modified in function call. + * @param outdata Returns near-end signal with echo removed. + + * @return The bytes of outdata. + */ +size_t afe_aec_process(afe_aec_handle_t *handel, int16_t *indata, int16_t *outdata); + +/** + * @brief Get frame size of AEC (the samples of one frame) + * @param handle The instance of AEC. + * @return Frame size + */ +int afe_aec_get_chunksize(afe_aec_handle_t *handle); + + +/** + * @brief Free the AEC instance + * + * @param inst The instance of AEC. + * + * @return None + * + */ +void afe_aec_destroy(afe_aec_handle_t *handel); + +#ifdef __cplusplus +} +#endif + +#endif //_ESP_AEC_H_ diff --git a/include/esp32s2/esp_afe_config.h b/include/esp32s2/esp_afe_config.h new file mode 100644 index 0000000..f9de6fe --- /dev/null +++ b/include/esp32s2/esp_afe_config.h @@ -0,0 +1,69 @@ +#pragma once +#include "esp_aec.h" +#include "stdbool.h" +#include "stdint.h" +#include "stdlib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// AFE: Audio Front-End +// SR: Speech Recognition +// VC: Voice Communication + +// Set AFE_SR mode +typedef enum { + SR_MODE_LOW_COST = 0, // Deprecated, please use afe_mode_t, AFE mode: low cost mode + SR_MODE_HIGH_PERF = 1, // Deprecated, please use afe_mode_t, AFE mode: high performance mode +} afe_sr_mode_t; + +// Set AFE mode +typedef enum { + AFE_MODE_LOW_COST = 0, // AFE mode: low cost mode + AFE_MODE_HIGH_PERF = 1, // AFE mode: high performance mode +} afe_mode_t; + +// Set AFE type +typedef enum { + AFE_TYPE_SR = 0, // Speech recognition scenarios, excluding nonlinear noise suppression + AFE_TYPE_VC = 1, // Voice communication scenarios, including nonlinear noise suppression +} afe_type_t; + +typedef enum { + AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram + AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance + AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram +} afe_memory_alloc_mode_t; + +typedef enum { + AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of fetch audio is -9dB + AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of fetch audio is -6dB + AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of fetcg is -3dB + AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain +} afe_mn_peak_agc_mode_t; + +typedef struct { + int total_ch_num; // total channel num, include microphone channel, playback channel and unknown channel + int mic_num; // microphone channel number + uint8_t *mic_ids; // microphone channel indices + int ref_num; // playback reference channel number + uint8_t *ref_ids; // playback reference channel indices + int sample_rate; // sample rate of audio +} afe_pcm_config_t; + +typedef enum { + AFE_NS_MODE_WEBRTC = 0, // please use model name of NS, SSP: "WEBRTC" + AFE_NS_MODE_NET = 1, // please use model name of NSNET +} afe_ns_mode_t; + +typedef enum { + AFE_AGC_MODE_WEBRTC = 0, // WEBRTC AGC + AFE_AGC_MODE_WAKENET = 1, // AGC gain is calculated by wakenet model if wakenet is activated +} afe_agc_mode_t; + + +#ifdef __cplusplus +} +#endif + diff --git a/include/esp32s2/esp_mfcc_fbank_int16.h b/include/esp32s2/esp_mfcc_fbank_int16.h new file mode 100644 index 0000000..22a5f2c --- /dev/null +++ b/include/esp32s2/esp_mfcc_fbank_int16.h @@ -0,0 +1,86 @@ +#pragma once +#include "esp_speech_features.h" +#include + +/* +This describes an interface for a MFCC runner, that is, some kind of implementation that can be +fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so +multiple implementations can be used. +*/ + +typedef struct esp_mfcc_data_t esp_mfcc_data_t; + +// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features), +// please refer to its documentation for details. +typedef struct { + int winstep_ms; // The step between successive windows in ms. (10) + int winlen_ms; // The length of the analysis window in ms. (25) + int nch; // The number of input channel + int numcep; // The number of cepstrum to return + int nfilter; // The number of filters in the filterbank + int nfft; // The FFT size + int samp_freq; // The sample-rate of the signal. + int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0) + int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq + float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97) + char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey" + bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy + bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum + int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon)) + float log_epsilon; // log epsilon. (e.g. 1e-7) + bool psram_first; // Alloc memory from PSRAM first + bool remove_dc_offset; // Whether to subtract mean of wave before FFT +} esp_mfcc_opts_t; + +/** + * @brief Un-initialize and free a mfcc runner + * + * Function to free a previously allocated mfcc runner. + * + * @param r Runner object to destroy + */ +typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r); + +/** + * @brief Initialize parameters for a mfcc runner. + * + * After creation, a mfcc runner needs to be initialized first; this is usually done + * in the initialization routine of a speech recognition algorithm. This provides + * a pointer to do this for a specific mfcc runner. + * + * @param opt Options for the mfcc process + * @return True if success, false on error. + */ +typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt); + +/** + * @brief Run a mfcc iteration on frame by frame + * + * This will take a set of samples and return a ceptrum. Note that this may be pipelined: + * an initial call to this function may return NULL and subsequent calls may return the + * cepstrum of previous calls. + * + * @param r The mfcc runner + * @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000). + * @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function + * when done with this buffer. Note that some implementations require the buffer to be freed before another call + * to this function is done. + */ +typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch); + +/** + * @brief Clean all state of mfcc handle + * + * @param r The mfcc runner + */ +typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r); + +/** + * @brief Operations possible on a mfcc runner + */ +typedef struct { + esp_mfcc_op_destroy_t destroy; + esp_mfcc_op_create_t create; + esp_mfcc_op_run_step_t run_step; + esp_mfcc_op_clean_t clean; +} esp_mfcc_iface_t; diff --git a/include/esp32s2/esp_mfcc_iface.h b/include/esp32s2/esp_mfcc_iface.h new file mode 100644 index 0000000..0257768 --- /dev/null +++ b/include/esp32s2/esp_mfcc_iface.h @@ -0,0 +1,89 @@ +#pragma once +#include "esp_speech_features.h" +#include + +/* +This describes an interface for a MFCC runner, that is, some kind of implementation that can be +fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so +multiple implementations can be used. +*/ + +typedef struct esp_mfcc_data_t esp_mfcc_data_t; + +// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features), +// please refer to its documentation for details. +typedef struct { + int winstep_ms; // The step between successive windows in ms. (10) + int winlen_ms; // The length of the analysis window in ms. (25) + int nch; // The number of input channel + int numcep; // The number of cepstrum to return + int nfilter; // The number of filters in the filterbank + int nfft; // The FFT size + int samp_freq; // The sample-rate of the signal. + int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0) + int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq + float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97) + char *win_type; // Analysis window type to apply to each frame, "hanning","hamming","sine","rectangular","povey" + bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy + bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum + int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon)) + float log_epsilon; // log epsilon. (e.g. 1e-7) + bool psram_first; // Alloc memory from PSRAM first + bool remove_dc_offset; // Whether to subtract mean of wave before FFT +} esp_mfcc_opts_t; + +/** + * @brief Un-initialize and free a mfcc runner + * + * Function to free a previously allocated mfcc runner. + * + * @param r Runner object to destroy + */ +typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r); + +/** + * @brief Initialize parameters for a mfcc runner. + * + * After creation, a mfcc runner needs to be initialized first; this is usually done + * in the initialization routine of a speech recognition algorithm. This provides + * a pointer to do this for a specific mfcc runner. + * + * @param opt Options for the mfcc process + * @return True if success, false on error. + */ +typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt); + +/** + * @brief Run a mfcc iteration on frame by frame + * + * This will take a set of samples and return a ceptrum. Note that this may be pipelined: + * an initial call to this function may return NULL and subsequent calls may return the + * cepstrum of previous calls. + * + * @param r The mfcc runner + * @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000). + * @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function + * when done with this buffer. Note that some implementations require the buffer to be freed before another call + * to this function is done. + */ +typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch); + +typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank); + +/** + * @brief Clean all state of mfcc handle + * + * @param r The mfcc runner + */ +typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r); + +/** + * @brief Operations possible on a mfcc runner + */ +typedef struct { + esp_mfcc_op_destroy_t destroy; + esp_mfcc_op_create_t create; + esp_mfcc_op_run_step_t run_step; + esp_mfcc_op_run_step_s16_t run_step_s16; + esp_mfcc_op_clean_t clean; +} esp_mfcc_iface_t; diff --git a/include/esp32s2/esp_mfcc_models.h b/include/esp32s2/esp_mfcc_models.h new file mode 100644 index 0000000..44086e8 --- /dev/null +++ b/include/esp32s2/esp_mfcc_models.h @@ -0,0 +1,44 @@ +#pragma once +#include "esp_mfcc_iface.h" + +extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle +extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle + +/** + * @brief Return basic opts used in wakenet9 & multinet5 + **/ +esp_mfcc_opts_t *get_mfcc_opts_wn9(); + +/** + * @brief Return basic opts used in wakenet9s + **/ +esp_mfcc_opts_t *get_mfcc_opts_wn9s16(); + +/** + * @brief Return basic opts for default kaldifeat + * + opts->psram_first = true; + opts->use_power = true; + opts->use_log_fbank = 2; // log(max(x, log_epsilon)) + opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps + opts->win_type = "povey"; + opts->low_freq = 20; + opts->high_freq = 7600; + opts->samp_freq = 16000; + opts->nch = 1; + opts->nfft = 512; + opts->nfilter = 80; + opts->numcep = 80; + opts->preemph = 0.97; + opts->append_energy = false; + opts->winlen_ms = 25; + opts->winstep_ms = 10; + opts->remove_dc_offset = true; + * + **/ +esp_mfcc_opts_t *get_mfcc_opts_kaldi(); + +/** + * @brief Print mfcc opts + **/ +void print_mfcc_opts(esp_mfcc_opts_t *opts); diff --git a/include/esp32s2/esp_speech_features.h b/include/esp32s2/esp_speech_features.h new file mode 100644 index 0000000..c1659f9 --- /dev/null +++ b/include/esp32s2/esp_speech_features.h @@ -0,0 +1,62 @@ +#pragma once +#include "c_speech_features_config.h" +#include "stdlib.h" +#include +#include + +#ifndef M_2PI +#define M_2PI 6.283185307179586476925286766559005 +#endif + +typedef struct { + float *coeff; + int *bank_pos; + int nfilter; +} esp_mel_filter_t; + +float *esp_mfcc_malloc(size_t size, bool from_psram); + +void esp_mfcc_free(void *ptr); + +/** + * @brief Initialize FFT table + * @warning For ESP-PLATFORM, use esp-dsp fft + * For Other platform, use kiss fft + * + * @param nfft The input samples number + * @return fft-table + **/ +void *esp_fft_init(int nfft); + +/** + * @brief Free FFT table + * @warning For ESP-PLATFORM, use esp-dsp fft + * For Other platform, use kiss fft + * + * @param fft_table The fft table initialized by esp_fft_init + * @param nfft The input samples number + * @return fft-table + **/ +void esp_fft_deinit(void *fft_table, int nfft); + +/** + * @brief Initial window function + * Currently support hanning, hamming, sine, povey, rectangular, + * wn9(512-hanning to get wakenet9& multinet5 compatible) + **/ +float *esp_win_func_init(char *win_type, float *window_data, int frame_length); + +float *esp_fftr(float *x, int nfft, void *fft_table); + +float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_table); + +void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc); + +float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last); + +esp_mel_filter_t *esp_mel_filter_init( + int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram); + +void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter); + +float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon); diff --git a/include/esp32s2/esp_wn_iface.h b/include/esp32s2/esp_wn_iface.h new file mode 100644 index 0000000..44bab8d --- /dev/null +++ b/include/esp32s2/esp_wn_iface.h @@ -0,0 +1,215 @@ +#pragma once +#include "stdint.h" +#include "dl_lib_convq_queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//Opaque model data container +typedef struct model_iface_data_t model_iface_data_t; + +/** + * @brief The state of wakeup + */ +typedef enum +{ + WAKENET_NO_DETECT = 0, // wake word is not detected + WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified + WAKENET_DETECTED = 1 // wake word is detected +} wakenet_state_t; + +//Set wake words recognition operating mode +//The probability of being wake words is increased with increasing mode, +//As a consequence also the false alarm rate goes up +typedef enum { + DET_MODE_90 = 0, // Normal + DET_MODE_95 = 1, // Aggressive + DET_MODE_2CH_90 = 2, + DET_MODE_2CH_95 = 3, + DET_MODE_3CH_90 = 4, + DET_MODE_3CH_95 = 5, +} det_mode_t; + +typedef struct { + int wake_word_num; //The number of all wake words + char **wake_word_list; //The name list of wake words +} wake_word_info_t; + +/** + * @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient + * + * @param model_name The specified wake word model coefficient + * @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95 + * @returns Handle to the model data + */ +typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode); + +/** + * @brief Get the amount of samples that need to be passed to the detect function + * + * Every speech recognition model processes a certain number of samples at the same time. This function + * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes. + * + * @param model The model object to query + * @return The amount of samples to feed the detect function + */ +typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model); + +/** + * @brief Get the channel number of samples that need to be passed to the detect function + * + * Every speech recognition model processes a certain number of samples at the same time. This function + * can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes. + * + * @param model The model object to query + * @return The amount of samples to feed the detect function + */ +typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model); + +/** + * @brief Get the start point of wake word when one wake word is detected. + * + * @Warning: This function should be called when the channel index is verified. + * The returned value is the number of samples from start point of wake word to detected point. + * + * @param model The model object to query + * @return The number of samples from start point to detected point (end point) + */ +typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model); + + +/** + * @brief Get the sample rate of the samples to feed to the detect function + * + * @param model The model object to query + * @return The sample rate, in hz + */ +typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model); + +/** + * @brief Get the number of wake words + * + * @param model The model object to query + * @returns the number of wake words + */ +typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model); + +/** + * @brief Get the name of wake word by index + * + * @Warning The index of wake word start with 1 + + * @param model The model object to query + * @param word_index The index of wake word + * @returns the detection threshold + */ +typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index); + +/** + * @brief Set the detection threshold to manually abjust the probability + * + * @param model The model object to query + * @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.5~0.9999 + * @param word_index The index of wake word + * @return 0: setting failed, 1: setting success + */ +typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index); + +/** + * @brief Get the wake word detection threshold of different modes + * + * @param model The model object to query + * @param word_index The index of wake word + * @returns the detection threshold + */ +typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index); + +/** + * @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found. + * + * @Warning The index of wake word start with 1, 0 means no wake words is detected. + * + * @param model The model object to query + * @param samples An array of 16-bit signed audio samples. The array size used can be queried by the + * get_samp_chunksize function. + * @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words. + */ +typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples); + +/** + * @brief Get the volume gain + * + * @param model The model object to query + * @param target_db The target dB to calculate volume gain + * @returns the volume gain + */ +typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db); + +/** + * @brief Get the triggered channel index. Channel index starts from zero + * + * @param model The model object to query + * @return The channel index + */ +typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model); + +/** + * @brief Clean all states of model + * + * @param model The model object to query + */ +typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model); + +/** + * @brief Destroy a speech recognition model + * + * @param model Model object to destroy + */ +typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model); + +/** + * @brief Feed MFCC of an audio stream to the vad model and detect whether is + * voice. + * + * @param model The model object to query + * @param cq An array of 16-bit MFCC. + * @return The index of wake words, return 0 if no wake word is detected, else + * the index of the wake words. + */ +typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq); + +/** + * @brief Get MFCC of an audio stream + * + * @param model The model object to query + * @return MFCC data + */ +typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model); + + +/** + * This structure contains the functions used to do operations on a wake word detection model. + */ +typedef struct { + esp_wn_iface_op_create_t create; + esp_wn_iface_op_get_start_point_t get_start_point; + esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize; + esp_wn_iface_op_get_channel_num_t get_channel_num; + esp_wn_iface_op_get_samp_rate_t get_samp_rate; + esp_wn_iface_op_get_word_num_t get_word_num; + esp_wn_iface_op_get_word_name_t get_word_name; + esp_wn_iface_op_set_det_threshold_t set_det_threshold; + esp_wn_iface_op_get_det_threshold_t get_det_threshold; + esp_wn_iface_op_get_triggered_channel_t get_triggered_channel; + esp_wn_iface_op_get_vol_gain_t get_vol_gain; + esp_wn_iface_op_detect_t detect; + esp_wn_iface_op_detect_mfcc_t detect_mfcc; + esp_wn_iface_op_get_mfcc_data_t get_mfcc_data; + esp_wn_iface_op_clean_t clean; + esp_wn_iface_op_destroy_t destroy; +} esp_wn_iface_t; + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/include/esp32s2/esp_wn_models.h b/include/esp32s2/esp_wn_models.h new file mode 100644 index 0000000..3a4d7e4 --- /dev/null +++ b/include/esp32s2/esp_wn_models.h @@ -0,0 +1,52 @@ +#pragma once +#include "esp_wn_iface.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// The prefix of wakenet model name is used to filter all wakenet from availabel models. +#define ESP_WN_PREFIX "wn" + +/** + * @brief Get the wakenet handle from model name + * + * @param model_name The name of model + * @returns The handle of wakenet + */ +const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name); + +/** + * @brief Get the wake word name from model name + * + * @param model_name The name of model + * @returns The wake word name, like "alexa","hilexin","xiaoaitongxue" + */ +char* esp_wn_wakeword_from_name(const char *model_name); + +#ifdef __cplusplus +} +#endif + +/* + +static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name); + +//Initialize wakeNet model data +static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90); + +//Set parameters of buffer +int audio_chunksize=model->get_samp_chunksize(model_data); +int frequency = model->get_samp_rate(model_data); +int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t)); + +//Detect +int r=model->detect(model_data, buffer); +if (r>0) { + printf("Detection triggered output %d.\n", r); +} + +//Destroy model +model->destroy(model_data) + +*/ diff --git a/lib/esp32c3/libc_speech_features.a b/lib/esp32c3/libc_speech_features.a new file mode 100644 index 0000000..28679c7 Binary files /dev/null and b/lib/esp32c3/libc_speech_features.a differ diff --git a/lib/esp32c3/libdl_lib.a b/lib/esp32c3/libdl_lib.a new file mode 100644 index 0000000..19a5e83 Binary files /dev/null and b/lib/esp32c3/libdl_lib.a differ diff --git a/lib/esp32c3/libesp_audio_front_end.a b/lib/esp32c3/libesp_audio_front_end.a new file mode 100644 index 0000000..4c99de1 Binary files /dev/null and b/lib/esp32c3/libesp_audio_front_end.a differ diff --git a/lib/esp32c3/libesp_audio_processor.a b/lib/esp32c3/libesp_audio_processor.a new file mode 100644 index 0000000..3b8b36c Binary files /dev/null and b/lib/esp32c3/libesp_audio_processor.a differ diff --git a/lib/esp32c3/libhufzip.a b/lib/esp32c3/libhufzip.a new file mode 100644 index 0000000..4ec56b8 Binary files /dev/null and b/lib/esp32c3/libhufzip.a differ diff --git a/lib/esp32c3/libwakenet.a b/lib/esp32c3/libwakenet.a new file mode 100644 index 0000000..8955079 Binary files /dev/null and b/lib/esp32c3/libwakenet.a differ diff --git a/lib/esp32c6/libc_speech_features.a b/lib/esp32c6/libc_speech_features.a new file mode 100644 index 0000000..de94ea0 Binary files /dev/null and b/lib/esp32c6/libc_speech_features.a differ diff --git a/lib/esp32c6/libdl_lib.a b/lib/esp32c6/libdl_lib.a new file mode 100644 index 0000000..0f32a4b Binary files /dev/null and b/lib/esp32c6/libdl_lib.a differ diff --git a/lib/esp32c6/libesp_audio_front_end.a b/lib/esp32c6/libesp_audio_front_end.a new file mode 100644 index 0000000..d92b830 Binary files /dev/null and b/lib/esp32c6/libesp_audio_front_end.a differ diff --git a/lib/esp32c6/libesp_audio_processor.a b/lib/esp32c6/libesp_audio_processor.a new file mode 100644 index 0000000..84192c9 Binary files /dev/null and b/lib/esp32c6/libesp_audio_processor.a differ diff --git a/lib/esp32c6/libhufzip.a b/lib/esp32c6/libhufzip.a new file mode 100644 index 0000000..1a4f151 Binary files /dev/null and b/lib/esp32c6/libhufzip.a differ diff --git a/lib/esp32c6/libwakenet.a b/lib/esp32c6/libwakenet.a new file mode 100644 index 0000000..c3413cf Binary files /dev/null and b/lib/esp32c6/libwakenet.a differ diff --git a/lib/esp32s2/libc_speech_features.a b/lib/esp32s2/libc_speech_features.a new file mode 100644 index 0000000..2fe5691 Binary files /dev/null and b/lib/esp32s2/libc_speech_features.a differ diff --git a/lib/esp32s2/libdl_lib.a b/lib/esp32s2/libdl_lib.a new file mode 100644 index 0000000..9cd8d80 Binary files /dev/null and b/lib/esp32s2/libdl_lib.a differ diff --git a/lib/esp32s2/libesp_audio_front_end.a b/lib/esp32s2/libesp_audio_front_end.a new file mode 100644 index 0000000..2404377 Binary files /dev/null and b/lib/esp32s2/libesp_audio_front_end.a differ diff --git a/lib/esp32s2/libesp_audio_processor.a b/lib/esp32s2/libesp_audio_processor.a new file mode 100644 index 0000000..22c0987 Binary files /dev/null and b/lib/esp32s2/libesp_audio_processor.a differ diff --git a/lib/esp32s2/libhufzip.a b/lib/esp32s2/libhufzip.a new file mode 100644 index 0000000..694705f Binary files /dev/null and b/lib/esp32s2/libhufzip.a differ diff --git a/lib/esp32s2/libwakenet.a b/lib/esp32s2/libwakenet.a new file mode 100644 index 0000000..9045c44 Binary files /dev/null and b/lib/esp32s2/libwakenet.a differ diff --git a/test_apps/esp-tts/sdkconfig.ci.esp32c5 b/test_apps/esp-tts/sdkconfig.ci.esp32c5 new file mode 100644 index 0000000..ab64f22 --- /dev/null +++ b/test_apps/esp-tts/sdkconfig.ci.esp32c5 @@ -0,0 +1,11 @@ +# This file was generated using idf.py save-defconfig. It can be edited manually. +# Espressif IoT Development Framework (ESP-IDF) 5.4.1 Project Minimal Configuration +# +CONFIG_IDF_TARGET="esp32c5" +CONFIG_ESPTOOLPY_FLASHMODE_QIO=y +CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y +CONFIG_PARTITION_TABLE_CUSTOM=y +CONFIG_SPIRAM=y +CONFIG_ESP_SYSTEM_ALLOW_RTC_FAST_MEM_AS_HEAP=n +CONFIG_ESP_INT_WDT=n +CONFIG_ESP_TASK_WDT_EN=n diff --git a/test_apps/esp32c5/sdkconfig.ci.esp32c3 b/test_apps/esp32c5/sdkconfig.ci.esp32c3 new file mode 100644 index 0000000..f193d7a --- /dev/null +++ b/test_apps/esp32c5/sdkconfig.ci.esp32c3 @@ -0,0 +1,11 @@ +# This file was generated using idf.py save-defconfig. It can be edited manually. +# Espressif IoT Development Framework (ESP-IDF) 5.4.1 Project Minimal Configuration +# +CONFIG_IDF_TARGET="esp32c3" +CONFIG_ESPTOOLPY_FLASHMODE_QIO=y +CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y +CONFIG_PARTITION_TABLE_CUSTOM=y +CONFIG_SR_WN_WN9S_HIESP=y +CONFIG_ESP_MAIN_TASK_STACK_SIZE=148584 +CONFIG_ESP_TASK_WDT_EN=n +CONFIG_IDF_EXPERIMENTAL_FEATURES=y diff --git a/test_apps/esp32c5/sdkconfig.ci.esp32c6 b/test_apps/esp32c5/sdkconfig.ci.esp32c6 new file mode 100644 index 0000000..e672fe8 --- /dev/null +++ b/test_apps/esp32c5/sdkconfig.ci.esp32c6 @@ -0,0 +1,11 @@ +# This file was generated using idf.py save-defconfig. It can be edited manually. +# Espressif IoT Development Framework (ESP-IDF) 5.4.1 Project Minimal Configuration +# +CONFIG_IDF_TARGET="esp32c6" +CONFIG_ESPTOOLPY_FLASHMODE_QIO=y +CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y +CONFIG_PARTITION_TABLE_CUSTOM=y +CONFIG_SR_WN_WN9S_HIESP=y +CONFIG_ESP_MAIN_TASK_STACK_SIZE=148584 +CONFIG_ESP_TASK_WDT_EN=n +CONFIG_IDF_EXPERIMENTAL_FEATURES=y diff --git a/test_apps/esp32c5/sdkconfig.ci.esp32s2 b/test_apps/esp32c5/sdkconfig.ci.esp32s2 new file mode 100644 index 0000000..1417a36 --- /dev/null +++ b/test_apps/esp32c5/sdkconfig.ci.esp32s2 @@ -0,0 +1,12 @@ +# This file was generated using idf.py save-defconfig. It can be edited manually. +# Espressif IoT Development Framework (ESP-IDF) 5.4.1 Project Minimal Configuration +# +CONFIG_IDF_TARGET="esp32s2" +CONFIG_ESPTOOLPY_FLASHMODE_QIO=y +CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y +CONFIG_PARTITION_TABLE_CUSTOM=y +CONFIG_SR_WN_WN9S_HIESP=y +CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y +CONFIG_ESP_MAIN_TASK_STACK_SIZE=148584 +CONFIG_ESP_TASK_WDT_EN=n +CONFIG_IDF_EXPERIMENTAL_FEATURES=y