feat: add wakenet9s and support esp32c5

This commit is contained in:
xysun 2025-04-18 11:50:50 +08:00
parent 33f47da975
commit 3b30cf3102
41 changed files with 14372 additions and 31 deletions

View File

@ -47,6 +47,14 @@ before_script:
- "test_apps/esp-sr/**/*"
- "CMakeList.txt"
.patterns-test_esp_sr: &patterns-test_esp32c5
- "lib/esp32c5/*"
- "include/esp32c5/*"
- "src/**/*"
- "model/**/*"
- "test_apps/esp-sr/**/*"
- "CMakeList.txt"
.patterns-build_system: &patterns-build_system
- "test_apps/build_apps.py"
- "conftest.py"
@ -75,6 +83,15 @@ before_script:
- <<: *if-dev-push
changes: *patterns-test_esp_tts
.rules:build:test_esp32c5:
rules:
- <<: *if-protected
- <<: *if-label-build
- <<: *if-dev-push
changes: *patterns-build_system
- <<: *if-dev-push
changes: *patterns-test_esp32c5
.rules:build_docs:docs:
rules:
- <<: *if-protected
@ -136,6 +153,15 @@ build_esp_tts:
- IMAGE: [espressif/idf:release-v5.3, espressif/idf:latest]
EXAMPLES_PATH: "test_apps/esp-tts"
build_esp32c5:
extends:
- .build_test_template
- .rules:build:test_esp32c5
parallel:
matrix:
- IMAGE: [espressif/idf:release-v5.4, espressif/idf:latest]
EXAMPLES_PATH: "test_apps/esp32c5"
.test_template: &test_template
image: DOCKER_TARGET_TEST_v5_0_ENV_IMAGE
stage: target_test

View File

@ -69,51 +69,43 @@ if((${IDF_TARGET} STREQUAL "esp32s3") OR (${IDF_TARGET} STREQUAL "esp32p4") OR (
"-Wl,--end-group")
if(CONFIG_PARTITION_TABLE_CUSTOM)
set(MVMODEL_EXE ${COMPONENT_PATH}/model/movemodel.py)
idf_build_get_property(build_dir BUILD_DIR)
set(image_file ${build_dir}/srmodels/srmodels.bin)
add_custom_command(
OUTPUT ${image_file}
COMMENT "Move and Pack models..."
COMMAND python ${MVMODEL_EXE} -d1 ${SDKCONFIG} -d2 ${COMPONENT_PATH} -d3 ${build_dir}
DEPENDS ${SDKCONFIG}
VERBATIM)
add_custom_target(srmodels_bin ALL DEPENDS ${image_file})
add_dependencies(flash srmodels_bin)
partition_table_get_partition_info(size "--partition-name model" "size")
partition_table_get_partition_info(offset "--partition-name model" "offset")
if("${size}" AND "${offset}")
esptool_py_flash_to_partition(flash "model" "${image_file}")
else()
set(message "Failed to find model in partition table file"
"Please add a line(Name=model, Size>recommended size in log) to the partition file.")
endif()
endif()
elseif(${IDF_TARGET} STREQUAL "esp32c5")
set(srcs
"lib/${IDF_TARGET}/dummy.c"
"src/model_path.c"
)
set(include_dirs
"include/${IDF_TARGET}"
"src/include"
)
set(requires
json
spiffs
esp_partition
)
idf_component_register(SRCS ${srcs}
INCLUDE_DIRS ${include_dirs}
REQUIRES ${requires}
PRIV_REQUIRES spi_flash
)
component_compile_options(-ffast-math -O3 -Wno-error=format=-Wno-format)
add_prebuilt_library(esp_audio_processor "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libesp_audio_processor.a")
add_prebuilt_library(esp_audio_front_end "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libesp_audio_front_end.a")
add_prebuilt_library(dl_lib "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libdl_lib.a" PRIV_REQUIRES ${COMPONENT_NAME})
add_prebuilt_library(c_speech_features "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libc_speech_features.a" PRIV_REQUIRES ${COMPONENT_NAME})
add_prebuilt_library(hufzip "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libhufzip.a" PRIV_REQUIRES ${COMPONENT_NAME})
add_prebuilt_library(wakenet "${CMAKE_CURRENT_SOURCE_DIR}/lib/${IDF_TARGET}/libwakenet.a" PRIV_REQUIRES ${COMPONENT_NAME})
target_link_libraries(${COMPONENT_LIB} PRIVATE esp_audio_processor)
target_link_libraries(${COMPONENT_LIB} PRIVATE esp_audio_front_end)
target_link_libraries(${COMPONENT_LIB} PRIVATE dl_lib)
target_link_libraries(${COMPONENT_LIB} PRIVATE c_speech_features)
target_link_libraries(${COMPONENT_LIB} PRIVATE hufzip)
target_link_libraries(${COMPONENT_LIB} PRIVATE wakenet)
elseif((${IDF_TARGET} STREQUAL "esp32s2") OR (${IDF_TARGET} STREQUAL "esp32c3") OR (${IDF_TARGET} STREQUAL "esp32c6"))
#Only support TTS on esp32s2, esp32c3 and esp32c6
@ -139,4 +131,30 @@ target_link_libraries(${COMPONENT_TARGET} INTERFACE "-Wl,--start-group"
voice_set_xiaole
"-Wl,--end-group")
endif()
# Add model partition and flash srmodels.bin
if(CONFIG_PARTITION_TABLE_CUSTOM)
partition_table_get_partition_info(size "--partition-name model" "size")
partition_table_get_partition_info(offset "--partition-name model" "offset")
if("${size}" AND "${offset}")
set(MVMODEL_EXE ${COMPONENT_PATH}/model/movemodel.py)
idf_build_get_property(build_dir BUILD_DIR)
set(image_file ${build_dir}/srmodels/srmodels.bin)
add_custom_command(
OUTPUT ${image_file}
COMMENT "Move and Pack models..."
COMMAND python ${MVMODEL_EXE} -d1 ${SDKCONFIG} -d2 ${COMPONENT_PATH} -d3 ${build_dir}
DEPENDS ${SDKCONFIG}
VERBATIM)
add_custom_target(srmodels_bin ALL DEPENDS ${image_file})
add_dependencies(flash srmodels_bin)
esptool_py_flash_to_partition(flash "model" "${image_file}")
else()
set(message "Failed to find model in partition table file"
"Please add a line(Name=model) to the partition file if you want to use esp-sr models.")
endif()
endif()

View File

@ -52,6 +52,20 @@ choice SR_VADN_MODEL_LOAD
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4
endchoice
menu "Load Multiple Wake Words"
depends on IDF_TARGET_ESP32C5 || IDF_TARGET_ESP32C3
config SR_WN_WN9S_HILEXIN
bool "Hi,乐鑫 (wn9s_hilexin)"
default False
config SR_WN_WN9S_HIESP
bool "Hi,ESP (wn9s_hiesp)"
default False
endmenu
menu "Load Multiple Wake Words"
depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 || IDF_TARGET_ESP32

View File

@ -0,0 +1,29 @@
#pragma once
#include <float.h>
#include <math.h>
/* #undef ENABLE_DOUBLE */
#ifdef ENABLE_DOUBLE
# define csf_float double
# define csf_ceil ceil
# define csf_floor floor
# define csf_sin sin
# define csf_log log
# define csf_log10 log10
# define csf_pow pow
# define csf_sqrt sqrt
# define csf_abs fabs
# define csf_float_min DBL_MIN
#else
# define csf_float float
# define csf_ceil ceilf
# define csf_floor floorf
# define csf_sin sinf
# define csf_log logf
# define csf_log10 log10f
# define csf_pow powf
# define csf_sqrt sqrtf
# define csf_abs fabsf
# define csf_float_min FLT_MIN
#endif

418
include/esp32c5/dl_lib.h Normal file
View File

@ -0,0 +1,418 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_H
#define DL_LIB_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#include "esp_heap_caps.h"
#include "sdkconfig.h"
#define DL_SPIRAM_SUPPORT 1
#endif
#ifdef CONFIG_IDF_TARGET_ESP32S3
#include "esp32s3/rom/cache.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef int padding_state;
// /**
// * @brief Allocate a chunk of memory which has the given capabilities.
// * Equivalent semantics to libc malloc(), for capability-aware memory.
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
// *
// * @param size In bytes, of the amount of memory to allocate
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
// * @return Pointer to currently allocated heap memory
// **/
// void *heap_caps_malloc(size_t size, uint32_t caps);
/**
* @brief Allocate aligned memory from internal memory or external memory.
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
* else, allocate memory from PSRAM
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently allocated heap memory
*/
void *dl_lib_calloc(int cnt, int size, int align);
/**
* @brief Always allocate aligned memory from external memory.
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently aligned heap memory
*/
void *dl_lib_calloc_psram(int cnt, int size, int align);
/**
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
*
* @param ptr Pointer to free
*/
void dl_lib_free(void *ptr);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
* Should be good til an input of 5 or so with a steps factor of 8.
*
* @param in Floating point input
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
* @return Exp()'ed output
*/
fptp_t fast_exp(double x, int steps);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* @param in Floating point input
* @return Exp()'ed output
*/
double fast_exp_pro(double x);
/**
* @brief Does a softmax operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a softmax operation on a quantized matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a floating point number
*
* @param in Floating point input
* @return Sigmoid output
*/
fptp_t dl_sigmoid_op(fptp_t in);
/**
* @brief Does a sigmoid operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a tanh operation on a floating point number
*
* @param in Floating point input number
* @return Tanh value
*/
fptp_t dl_tanh_op(fptp_t v);
/**
* @brief Does a tanh operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
*
* @param in Floating point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
/**
* @brief Does a ReLu operation on a matrix.
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Fully connected layer operation
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
*/
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
/**
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
* this matrix only needs to be calculated once. This function does that.
*
* @param
* @return
*/
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
/**
* @brief Batch-normalize a matrix
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @return
*/
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
/**
* @brief Do a basic LSTM layer pass.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a basic LSTM layer pass, partial quantized version.
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons, need to be quantised
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a fully-connected layer pass, fully-quantized version.
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
/**
* @brief Do a basic LSTM layer pass, fully-quantized version
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
/**
* @brief Batch-normalize a matrix, fully-quantized version
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return
*/
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
/**
* @brief Does a ReLu operation on a matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return Sigmoid output
*/
int dl_sigmoid_op_q(const int in);
int16_t dl_sigmoid_op_q8(const int16_t in);
/**
* @brief Does a sigmoid operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return tanh output
*/
int dl_tanh_op_q(int v);
int16_t dl_tanh_op_q8(int16_t v);
void load_mat_psram_mn4(void);
void load_mat_psram_mn3(void);
void free_mat_psram_mn4(void);
void free_mat_psram_mn3(void);
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
int16_t dl_table_tanh_op(int16_t in, int exponent);
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, float version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Do a basic CNN layer pass.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
* @param bias Bias for the CNN layer.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1.
* @return The result of CNN layer.
*/
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
/**
* @brief Do a basic CNN layer pass, quantised wersion.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
* @param bias Bias of the neurons.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1
* @return The result of CNN layer
*/
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,80 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_COEFGETTER_IF_H
#define DL_LIB_COEFGETTER_IF_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "cJSON.h"
#ifdef __cplusplus
extern "C" {
#endif
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
//dl_batch_normalize_get_sqrtvar first.
#define COEF_GETTER_HINT_BNVAR (1<<0)
/*
This struct describes the basic information of model data:
word_num: the number of wake words or speech commands
word_list: the name list of wake words or speech commands
thres_list: the threshold list of wake words or speech commands
info_str: the string used to reflect the version and information of model data
which consist of the architecture of network, the version of model data, wake words and their threshold
*/
typedef struct {
int word_num;
char **word_list;
int *win_list;
float *thresh_list;
char *info_str;
} model_info_t;
/*
Alphabet struct describes the basic grapheme or phoneme.
item_num: the number of baisc item(grapheme or phonemr)
items: the list of basic item
*/
typedef struct {
int item_num;
char **items;
}alphabet_t;
/*
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
memory for the returned matrices, when applicable.
*/
typedef struct {
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
void (*free_f)(const dl_matrix2d_t *m);
void (*free_q)(const dl_matrix2dq_t *m);
void (*free_q8)(const dl_matrix2dq8_t *m);
const model_info_t* (*getter_info)(void *arg);
const alphabet_t* (*getter_alphabet)(void *arg);
const cJSON* (*getter_config)(void *arg);
} model_coeff_getter_t;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,180 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONV_QUEUE_H
#define DL_LIB_CONV_QUEUE_H
#include "dl_lib_matrix.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef float fptp_t;
//Flags for matrices
// #define DL_MF_FOREIGNDATA (0) /*< Matrix *item data actually points to another matrix and should not be freed */
//Float convolution FIFO queue.
typedef struct {
int n; /*< the length of queue */
int c; /*< the channel number of queue element*/
int front; /*< the front(top) position of queue */
int flag; /*< not used*/
fptp_t *item; /*< Pointer to item array */
} dl_conv_queue_t;
/**
* @brief Allocate a convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
/**
* @brief Allocate a convolution queue from psram
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
/**
* @brief Free a convolution queue
*
* @param cq The convolution queue to free
*/
void dl_conv_queue_free(dl_conv_queue_t *cq);
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input convolution queue
* @return Pointer of oldest element
*/
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input convolution queue
* @param item The new element
*/
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a sigmoid operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @return The result of atrous convolution
*/
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @return The result of dilation layer
*/
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,303 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ8_QUEUE_H
#define DL_LIB_CONVQ8_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the channel of queue */
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
q8tp_t *itemq; /*< Pointer to item array */
} dl_convq8_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param c The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a bit fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel Kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of atrous convolution
*/
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of dilation layer
*/
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
int8_t dl_sigmoid_lutq8(int in);
/**
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch  The channel number
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number
*/
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
/**
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
*
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
* @param offset Offset used to calculate the beginning of input conv queue
* @param nch The channel number
*/
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
/**
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* Usually, this layer is used as first layer for 8-bit network.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* Input is a 16-bit queue point, Output is an 8-bit queue point.
*
* @param in Input 16bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
/**
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
int nch, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of 8-bit dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8-bit fixed-point convolution queue
* @param out Output 8-bit fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
const dl_matrix2dq_t *bias, int prenum);
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
void print_convq8(dl_convq8_queue_t *cq, int offset);
void print_convq(dl_convq_queue_t *cq, int offset);
void dl_relu_convq8(dl_convq8_queue_t *cq);
void lstmq8_free(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,382 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ_QUEUE_H
#define DL_LIB_CONVQ_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib.h"
#ifdef __cplusplus
extern "C" {
#endif
//fixed-point convolution FIFO queue.
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the multiple of queue*/
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
qtp_t *itemq; /*< Pointer to item array */
} dl_convq_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
/**
* @brief Allocate a fixed-point multi-channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq_queue_free(dl_convq_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue point
*/
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input fixed-point convolution queue
* @param item The new element
*/
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param last_num Offset from the front of the queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of convolution queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point multi channnel convolution queue
* @param offset Offset from the front of the queue
* @param nch The channel number of cqm
* @return Pointer of the element
*/
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
/**
* @brief Does a relu operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* relu operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, input data
stay as it is. Results are saved into the *out* array.
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
* @return softmax results
*/
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @return The result of atrous convolution
*/
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int offset, int prenum);
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int prenum);
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
/**
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
*
* @param cq1 First fixed-point convolution queue
* @param cq2 Seconf fixed-point convolution queue
* @return The result of float-point convolution queue
*/
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
/**
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
*
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
* the kernel is parameters *h_weight*.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param in_weight the LSTM kernel needed by first part
* @param h_weight the LSTM kernel needed by second part
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
* @in_shift Shift ratio used in first part
* @h_shift Shift ratio used in second part
* @return The result of LSTM layer
*/
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
dl_matrix2dq_t *dl_convq16_lstm_layer(dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, dl_matrix2dq_t *in_weight, dl_matrix2dq_t *h_weight,
dl_matrix2dq_t *bias, int prenum);
/**
* @brief Allocate a fixed-point multi channel convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @param nch the channel numbet of convolution queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a fixed-point multi channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number of cqm
*/
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset the offset to calculate input convq
* @param prenum the preload size, 0: do not use preload function
* @return The result of atrous convolution
*/
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* kernel,
dl_matrix2dq_t* bias,
int shift,
int offset,
int prenum);
/**
* @brief Fast implement of dilation layer as follows for multi channel input
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @param offset The offset to calculate input convq
* @param prenum The preload size, 0: do not use preload function
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* filter_kernel,
dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel,
dl_matrix2dq_t* gate_bias,
int filter_shift,
int gate_shift,
int offset,
int prenum);
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
void test_lstm_convq(int size, int in_dim, int lstm_cell);
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,257 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIX_H
#define DL_LIB_MATRIX_H
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef float fptp_t;
#if CONFIG_BT_SHARE_MEM_REUSE
extern multi_heap_handle_t gst_heap;
#endif
//Flags for matrices
#define DL_MF_FOREIGNDATA 1 /*< Matrix pointer and item data actually points to another matrix and should not be freed */
#define DL_MF_FOREIGNITEM 2 /*< Only item data actually points to another matrix and should not be freed */
//'Normal' float matrix
typedef struct {
int w; /*< Width */
int h; /*< Height */
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
int flags; /*< Flags. OR of DL_MF_* values */
fptp_t *item; /*< Pointer to item array */
} dl_matrix2d_t;
//Macro to quickly access the raw items in a matrix
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
/**
* @brief Free a matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrix_free(dl_matrix2d_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrix_zero(dl_matrix2d_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to zero
*/
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief Generate a matrix from existing floating-point data
*
* @param w Width of resulting matrix
* @param h Height of resulting matrix
* @param data Data to populate matrix with
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
*/
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
/**
* @brief Multiply a pair of matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two matrices : res=a.b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Add a pair of matrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Divide a pair of matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Subtract a matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Add a constant to every item of the matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
/**
* @brief Concatenate the rows of two matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated array with as avlues a|b
*/
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Print the contents of a matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrix(const dl_matrix2d_t *a);
/**
* @brief Return the average square error given a correct and a test matrix.
*
* ...Well, more or less. If anything, it gives an indication of the error between
* the two. Check the code for the exact implementation.
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return value indicating the relative difference between matrices
*/
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Get a specific item from the matrix
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
return DL_ITM(m, x, y);
}
/**
* @brief Set a specific item in the matrix to the given value
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
DL_ITM(m, x, y)=val;
}
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,387 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ_H
#define DL_LIB_MATRIXQ_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int16_t qtp_t;
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
//for easy use as a multiplicand without stressing out the flash cache too much.
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
qtp_t *itemq;
} dl_matrix2dq_t;
#define DL_QTP_SHIFT 15
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
#define DL_SHIFT_AUTO 32
/**
* @info About quantized matrices and shift values
*
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
* time. Quantized matrices can speed up your operations, but come with some quirks, and
* it's good to understand how they work before using them.
*
* The data in the quantized matrix type is stored similarily to floating-point types:
* when storing a real value, the value is stored as a mantissa (base number) and an
* exponent. The 'real' value that can be re-derived from those two numbers is something
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
* the standard floating point implementations like e.g. IEEE-754.
*
* The difference with respect to quantized matrices is that for a quantized matrix, it is
* assumed all values stored have more-or-less the same order of magnitude. This allows the
* matrix to only store all the mantissas, while the exponents are shared; there is only one
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
* integer arithmetic. It also nets us some memory savings - while normally a floating point
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
* memory requirements.
*
* While most of the details of handling the intricacies of the quantized matrixes are done
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
* specifically in places where addition/subtraction/division happens.
*
* The problem is that the routines do not know what the size of the resulting operation is. For
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
* assume the mantissas needs to be scaled back, we may lose precision.
*
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
* the argument is zero, the routine will be conservative, that is, increase the exponent of
* the result to such an extent it's mathematically impossible a value in the result will exceed
* the maximum value that can be stored. However, when this argument is larger than zero, the
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
* value possible. (Neural networks usually are okay with this happening for a limited amount
* of matrix indices).
*
* For deciding on these shift values, it is recommended to start with a shift value of one, then
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
* else.
*
* For neural networks and other noise-tolerant applications, note that even when
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
* to slightly improved precision. Feel free to experiment.
**/
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
/**
* TODO: DESCRIBE THIS FUNCTION
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
/**
* @brief Convert a quantized matrix to a floating-point one.
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
**/
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq_free(dl_matrix2dq_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrixq_zero(dl_matrix2dq_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to copy
*/
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a fixed-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
* much slower than dl_matrixq_dot .
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a floating-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
* much slower than dl_matrixq_dot_matrix_out.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
*
* @param a First multiplicand; float matrix
* @param b Second multiplicand; quantized matrix
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrixq(const dl_matrix2dq_t *a);
/**
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
* garbage after modifying the data in one of the slices.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
*/
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
/**
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
/**
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
* rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Concatenate the rows of two quantized matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated quantized matrix with as values a|b
*/
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Add a constant to every item of the quantized matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
/**
* @brief Check the sanity of a quantized matrix
*
* Due to the nature of quantized matrices, depending on the calculations a quantized
* matrix is the result of and the shift values chosen in those calculations, a quantized
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
* most significant mantissa bits are unused, or because a fair amount of mantissas are
* clipped. This function checks if this is the case and will report a message to stdout
* if significant loss of precision is detected.
*
* @param m The quantized matrix to check
* @param name A string to be displayed in the message if the sanity check fails
* @return True if matrix is sane, false otherwise
**/
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
/**
* @brief re-adjust the exponent of the matrix to fit the mantissa better
*
* This function will shift up all the data in the mantissas so there are no
* most-significant bits that are unused in all mantissas. It will also adjust
* the exponent to keep the actua values in the matrix the same.
*
* Some operations done on a matrix, especially operations that re-use the
* result of earlier operations done in the same way, can lead to the loss of
* data because the exponent of the quantized matrix is never re-adjusted. You
* can do that implicitely by calling this function.
*
* @param m The matrix to re-adjust
**/
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
/**
* @brief Get the floating-point value of a specific item from the quantized matrix
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
/**
* @brief Set a specific item in the quantized matrix to the given
* floating-point value
*
* @warning If the given value is more than the exponent in the quantized matrix
* allows for, all mantissas in the matrix will be shifted down to make the value
* 'fit'. If, however, the exponent is such that the value would result in a
* quantized mantissa of 0, nothing is done.
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,80 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ8_H
#define DL_LIB_MATRIXQ8_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#include "dl_lib.h"
#include "dl_lib_matrixq.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int8_t q8tp_t;
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
q8tp_t *itemq;
} dl_matrix2dq8_t;
#define DL_Q8TP_SHIFT 7
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq8_free(dl_matrix2dq8_t *m);
/**
* @brief Copy a quantized matrix
* Copy a quantized matrix from flash or iram/psram
*
* @param m Matrix to copy
*/
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,86 @@
#pragma once
#include "esp_speech_features.h"
#include <stdint.h>
/*
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
multiple implementations can be used.
*/
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
// please refer to its documentation for details.
typedef struct {
int winstep_ms; // The step between successive windows in ms. (10)
int winlen_ms; // The length of the analysis window in ms. (25)
int nch; // The number of input channel
int numcep; // The number of cepstrum to return
int nfilter; // The number of filters in the filterbank
int nfft; // The FFT size
int samp_freq; // The sample-rate of the signal.
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
char *win_type; // Analysis window type to apply to each frame "hanning","hamming","sine","rectangular","povey"
bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
float log_epsilon; // log epsilon. (e.g. 1e-7)
bool psram_first; // Alloc memory from PSRAM first
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
} esp_mfcc_opts_t;
/**
* @brief Un-initialize and free a mfcc runner
*
* Function to free a previously allocated mfcc runner.
*
* @param r Runner object to destroy
*/
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
/**
* @brief Initialize parameters for a mfcc runner.
*
* After creation, a mfcc runner needs to be initialized first; this is usually done
* in the initialization routine of a speech recognition algorithm. This provides
* a pointer to do this for a specific mfcc runner.
*
* @param opt Options for the mfcc process
* @return True if success, false on error.
*/
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
/**
* @brief Run a mfcc iteration on frame by frame
*
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
* an initial call to this function may return NULL and subsequent calls may return the
* cepstrum of previous calls.
*
* @param r The mfcc runner
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
* to this function is done.
*/
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
/**
* @brief Clean all state of mfcc handle
*
* @param r The mfcc runner
*/
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
/**
* @brief Operations possible on a mfcc runner
*/
typedef struct {
esp_mfcc_op_destroy_t destroy;
esp_mfcc_op_create_t create;
esp_mfcc_op_run_step_t run_step;
esp_mfcc_op_clean_t clean;
} esp_mfcc_iface_t;

View File

@ -0,0 +1,89 @@
#pragma once
#include "esp_speech_features.h"
#include <stdint.h>
/*
This describes an interface for a MFCC runner, that is, some kind of implementation that can be
fed sample chunks and returns the MFCC cepstrum of those samples. This is an abstracted interface so
multiple implementations can be used.
*/
typedef struct esp_mfcc_data_t esp_mfcc_data_t;
// Options for the mfcc algorithm itself. These more-or-less match the parameters of csf_mfcc (from c_speech_features),
// please refer to its documentation for details.
typedef struct {
int winstep_ms; // The step between successive windows in ms. (10)
int winlen_ms; // The length of the analysis window in ms. (25)
int nch; // The number of input channel
int numcep; // The number of cepstrum to return
int nfilter; // The number of filters in the filterbank
int nfft; // The FFT size
int samp_freq; // The sample-rate of the signal.
int low_freq; // The lowest band edge of mel filters, in hz. (e.g. 0)
int high_freq; // The highest band edge of mel filters, in hz. Must not be higher than samp_freq
float preemph; // Preemphasis filter coefficient. 0 is no filter. (e.g. 0.97)
char *win_type; // Analysis window type to apply to each frame "hanning","hamming","sine","rectangular","povey"
bool append_energy; //  If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy
bool use_power; // If true, use power of fft spectrum, else use magnitude of fft spectrum
int use_log_fbank; // 0: return fbank, 1: return log(x+log_epsilon), 2: return log(max(x, log_epsilon))
float log_epsilon; // log epsilon. (e.g. 1e-7)
bool psram_first; // Alloc memory from PSRAM first
bool remove_dc_offset; // Whether to subtract mean of wave before FFT
} esp_mfcc_opts_t;
/**
* @brief Un-initialize and free a mfcc runner
*
* Function to free a previously allocated mfcc runner.
*
* @param r Runner object to destroy
*/
typedef void (*esp_mfcc_op_destroy_t)(esp_mfcc_data_t *r);
/**
* @brief Initialize parameters for a mfcc runner.
*
* After creation, a mfcc runner needs to be initialized first; this is usually done
* in the initialization routine of a speech recognition algorithm. This provides
* a pointer to do this for a specific mfcc runner.
*
* @param opt Options for the mfcc process
* @return True if success, false on error.
*/
typedef esp_mfcc_data_t *(*esp_mfcc_op_create_t)(const esp_mfcc_opts_t *opt);
/**
* @brief Run a mfcc iteration on frame by frame
*
* This will take a set of samples and return a ceptrum. Note that this may be pipelined:
* an initial call to this function may return NULL and subsequent calls may return the
* cepstrum of previous calls.
*
* @param r The mfcc runner
* @param samp An array of signed 16-bit samples. The amount of samples should be sampfreq/(winstep_ms/1000).
* @return A set of cepstral values, or NULL if no such values are available yet. Free using the free_cepbuf function
* when done with this buffer. Note that some implementations require the buffer to be freed before another call
* to this function is done.
*/
typedef float *(*esp_mfcc_op_run_step_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t nch);
typedef void (*esp_mfcc_op_run_step_s16_t)(esp_mfcc_data_t *r, int16_t *samp, int16_t *fbank);
/**
* @brief Clean all state of mfcc handle
*
* @param r The mfcc runner
*/
typedef void (*esp_mfcc_op_clean_t)(esp_mfcc_data_t *r);
/**
* @brief Operations possible on a mfcc runner
*/
typedef struct {
esp_mfcc_op_destroy_t destroy;
esp_mfcc_op_create_t create;
esp_mfcc_op_run_step_t run_step;
esp_mfcc_op_run_step_s16_t run_step_s16;
esp_mfcc_op_clean_t clean;
} esp_mfcc_iface_t;

View File

@ -0,0 +1,44 @@
#pragma once
#include "esp_mfcc_iface.h"
extern const esp_mfcc_iface_t esp_fbank_f32; // float32-fbank handle
extern const esp_mfcc_iface_t esp_fbank_s16; // int16-fbank handle
/**
* @brief Return basic opts used in wakenet9 & multinet5
**/
esp_mfcc_opts_t *get_mfcc_opts_wn9();
/**
* @brief Return basic opts used in wakenet9s
**/
esp_mfcc_opts_t *get_mfcc_opts_wn9s16();
/**
* @brief Return basic opts for default kaldifeat
*
opts->psram_first = true;
opts->use_power = true;
opts->use_log_fbank = 2; // log(max(x, log_epsilon))
opts->log_epsilon = 1.1920928955078125e-07f; // torch.finfo(torch.float32).eps
opts->win_type = "povey";
opts->low_freq = 20;
opts->high_freq = 7600;
opts->samp_freq = 16000;
opts->nch = 1;
opts->nfft = 512;
opts->nfilter = 80;
opts->numcep = 80;
opts->preemph = 0.97;
opts->append_energy = false;
opts->winlen_ms = 25;
opts->winstep_ms = 10;
opts->remove_dc_offset = true;
*
**/
esp_mfcc_opts_t *get_mfcc_opts_kaldi();
/**
* @brief Print mfcc opts
**/
void print_mfcc_opts(esp_mfcc_opts_t *opts);

View File

@ -0,0 +1,62 @@
#pragma once
#include "c_speech_features_config.h"
#include "stdlib.h"
#include <assert.h>
#include <stdbool.h>
#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif
typedef struct {
float *coeff;
int *bank_pos;
int nfilter;
} esp_mel_filter_t;
float *esp_mfcc_malloc(size_t size, bool from_psram);
void esp_mfcc_free(void *ptr);
/**
* @brief Initialize FFT table
* @warning For ESP-PLATFORM, use esp-dsp fft
* For Other platform, use kiss fft
*
* @param nfft The input samples number
* @return fft-table
**/
void *esp_fft_init(int nfft);
/**
* @brief Free FFT table
* @warning For ESP-PLATFORM, use esp-dsp fft
* For Other platform, use kiss fft
*
* @param fft_table The fft table initialized by esp_fft_init
* @param nfft The input samples number
* @return fft-table
**/
void esp_fft_deinit(void *fft_table, int nfft);
/**
* @brief Initial window function
* Currently support hanning, hamming, sine, povey, rectangular,
* wn9(512-hanning to get wakenet9& multinet5 compatible)
**/
float *esp_win_func_init(char *win_type, float *window_data, int frame_length);
float *esp_fftr(float *x, int nfft, void *fft_table);
float *esp_spectrum_step(float *x, int nfft, bool use_power, void *fft_table);
void esp_audio_short_to_float(short *samples, float *x, int len, int remove_dc);
float *esp_preemphasis_step(float *x, unsigned int len, float coeff, float last);
esp_mel_filter_t *esp_mel_filter_init(
int nfft, int nfilter, int low_freq, int high_freq, int samp_freq, bool from_psram);
void esp_mel_filter_deinit(esp_mel_filter_t *mel_filter);
float *esp_mel_dotprod_step(float *x, float *out, esp_mel_filter_t *mel_filter, int use_log_fbank, float epsilon);

View File

@ -0,0 +1,215 @@
#pragma once
#include "stdint.h"
#include "dl_lib_convq_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
//Opaque model data container
typedef struct model_iface_data_t model_iface_data_t;
/**
* @brief The state of wakeup
*/
typedef enum
{
WAKENET_NO_DETECT = 0, // wake word is not detected
WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified
WAKENET_DETECTED = 1 // wake word is detected
} wakenet_state_t;
//Set wake words recognition operating mode
//The probability of being wake words is increased with increasing mode,
//As a consequence also the false alarm rate goes up
typedef enum {
DET_MODE_90 = 0, // Normal
DET_MODE_95 = 1, // Aggressive
DET_MODE_2CH_90 = 2,
DET_MODE_2CH_95 = 3,
DET_MODE_3CH_90 = 4,
DET_MODE_3CH_95 = 5,
} det_mode_t;
typedef struct {
int wake_word_num; //The number of all wake words
char **wake_word_list; //The name list of wake words
} wake_word_info_t;
/**
* @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient
*
* @param model_name The specified wake word model coefficient
* @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95
* @returns Handle to the model data
*/
typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode);
/**
* @brief Get the amount of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Get the channel number of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model);
/**
* @brief Get the start point of wake word when one wake word is detected.
*
* @Warning: This function should be called when the channel index is verified.
* The returned value is the number of samples from start point of wake word to detected point.
*
* @param model The model object to query
* @return The number of samples from start point to detected point (end point)
*/
typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Get the number of wake words
*
* @param model The model object to query
* @returns the number of wake words
*/
typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model);
/**
* @brief Get the name of wake word by index
*
* @Warning The index of wake word start with 1
* @param model The model object to query
* @param word_index The index of wake word
* @returns the detection threshold
*/
typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.5~0.9999
* @param word_index The index of wake word
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index);
/**
* @brief Get the wake word detection threshold of different modes
*
* @param model The model object to query
* @param word_index The index of wake word
* @returns the detection threshold
*/
typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index);
/**
* @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found.
*
* @Warning The index of wake word start with 1, 0 means no wake words is detected.
*
* @param model The model object to query
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words.
*/
typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Get the volume gain
*
* @param model The model object to query
* @param target_db The target dB to calculate volume gain
* @returns the volume gain
*/
typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db);
/**
* @brief Get the triggered channel index. Channel index starts from zero
*
* @param model The model object to query
* @return The channel index
*/
typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
/**
* @brief Clean all states of model
*
* @param model The model object to query
*/
typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
/**
* @brief Destroy a speech recognition model
*
* @param model Model object to destroy
*/
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* @brief Feed MFCC of an audio stream to the vad model and detect whether is
* voice.
*
* @param model The model object to query
* @param cq An array of 16-bit MFCC.
* @return The index of wake words, return 0 if no wake word is detected, else
* the index of the wake words.
*/
typedef wakenet_state_t (*esp_wn_iface_op_detect_mfcc_t)(model_iface_data_t *model, int16_t *samples, dl_convq_queue_t *cq);
/**
* @brief Get MFCC of an audio stream
*
* @param model The model object to query
* @return MFCC data
*/
typedef dl_convq_queue_t* (*esp_wn_iface_op_get_mfcc_data_t)(model_iface_data_t *model);
/**
* This structure contains the functions used to do operations on a wake word detection model.
*/
typedef struct {
esp_wn_iface_op_create_t create;
esp_wn_iface_op_get_start_point_t get_start_point;
esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_wn_iface_op_get_channel_num_t get_channel_num;
esp_wn_iface_op_get_samp_rate_t get_samp_rate;
esp_wn_iface_op_get_word_num_t get_word_num;
esp_wn_iface_op_get_word_name_t get_word_name;
esp_wn_iface_op_set_det_threshold_t set_det_threshold;
esp_wn_iface_op_get_det_threshold_t get_det_threshold;
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_wn_iface_op_get_vol_gain_t get_vol_gain;
esp_wn_iface_op_detect_t detect;
esp_wn_iface_op_detect_mfcc_t detect_mfcc;
esp_wn_iface_op_get_mfcc_data_t get_mfcc_data;
esp_wn_iface_op_clean_t clean;
esp_wn_iface_op_destroy_t destroy;
} esp_wn_iface_t;
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,52 @@
#pragma once
#include "esp_wn_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
// The prefix of wakenet model name is used to filter all wakenet from availabel models.
#define ESP_WN_PREFIX "wn"
/**
* @brief Get the wakenet handle from model name
*
* @param model_name The name of model
* @returns The handle of wakenet
*/
const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name);
/**
* @brief Get the wake word name from model name
*
* @param model_name The name of model
* @returns The wake word name, like "alexa","hilexin","xiaoaitongxue"
*/
char* esp_wn_wakeword_from_name(const char *model_name);
#ifdef __cplusplus
}
#endif
/*
static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name);
//Initialize wakeNet model data
static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90);
//Set parameters of buffer
int audio_chunksize=model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
//Detect
int r=model->detect(model_data, buffer);
if (r>0) {
printf("Detection triggered output %d.\n", r);
}
//Destroy model
model->destroy(model_data)
*/

Binary file not shown.

BIN
lib/esp32c5/libdl_lib.a Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
lib/esp32c5/libhufzip.a Normal file

Binary file not shown.

BIN
lib/esp32c5/libwakenet.a Normal file

Binary file not shown.

View File

@ -0,0 +1 @@
wakenet9s_v4h8_Alexa_3_0.640_0.650

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
wakenet9s_tts2h8_Hi,ESP_3_0.636_0.642

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
wakenet9s_tts2h8_Hi,乐鑫_3_0.635_0.640

Binary file not shown.

Binary file not shown.

View File

@ -4,9 +4,9 @@
#include "string.h"
#include <dirent.h>
#include <sys/stat.h>
#ifndef CONFIG_IDF_TARGET_ESP32P4
#include "esp_mn_models.h"
#endif
// #ifndef CONFIG_IDF_TARGET_ESP32P4
// #include "esp_mn_models.h"
// #endif
#ifdef ESP_PLATFORM
#include "esp_idf_version.h"

View File

@ -2,6 +2,7 @@
set(srcs
"app_main.cpp"
"test_aec.cpp"
"test_wakenet.cpp"
)
idf_component_register(SRCS ${srcs}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,137 @@
/* test_mean.c: Implementation of a testable component.
This example code is in the Public Domain (or CC0 licensed, at your option.)
Unless required by applicable law or agreed to in writing, this
software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied.
*/
#include "string.h"
#include <limits.h>
#include "unity.h"
#include "model_path.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "hilexin.h"
#include "hiesp.h"
#include "dl_lib_convq_queue.h"
#include <sys/time.h>
TEST_CASE("wakenet create/destroy API & memory leak", "[wn]")
{
vTaskDelay(500 / portTICK_PERIOD_MS);
int start_size = heap_caps_get_free_size(MALLOC_CAP_8BIT);
int start_internal_size = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
srmodel_list_t *models = esp_srmodel_init("model");
char *model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
esp_wn_iface_t *wakenet = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
// test model loading time
struct timeval tv_start, tv_end;
gettimeofday(&tv_start, NULL);
model_iface_data_t *model_data = wakenet->create(model_name, DET_MODE_3CH_95);
gettimeofday(&tv_end, NULL);
int tv_ms = (tv_end.tv_sec - tv_start.tv_sec) * 1000 + (tv_end.tv_usec - tv_start.tv_usec) / 1000;
printf("create latency:%d ms\n", tv_ms);
// test model memory concumption
int create_size = start_size - heap_caps_get_free_size(MALLOC_CAP_8BIT);
int create_internal_size = start_internal_size - heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
printf("Internal RAM: %d, PSRAM:%d\n", create_internal_size, create_size - create_internal_size);
wakenet->destroy(model_data);
esp_srmodel_deinit(models);
// test memory leak
int first_end_size = heap_caps_get_free_size(MALLOC_CAP_8BIT);
int last_end_size = first_end_size;
int mem_leak = start_size - last_end_size;
printf("create&destroy times:%d, memory leak:%d\n", 1, mem_leak);
for (int i = 0; i < 6; i++) {
printf("init partition ...\n");
models = esp_srmodel_init("model");
model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
wakenet = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
// char *wake_words = esp_srmodel_get_wake_words(models, model_name);
printf("create ...\n");
// typedef enum {
// DET_MODE_90 = 0, // Normal
// DET_MODE_95 = 1, // Aggressive
// DET_MODE_2CH_90 = 2,
// DET_MODE_2CH_95 = 3,
// DET_MODE_3CH_90 = 4,
// DET_MODE_3CH_95 = 5,
// } det_mode_t;
model_data = wakenet->create(model_name, (det_mode_t)i);
printf("destroy ...\n");
wakenet->destroy(model_data);
// free(wake_words);
esp_srmodel_deinit(models);
last_end_size = heap_caps_get_free_size(MALLOC_CAP_8BIT);
mem_leak = start_size - last_end_size;
printf("create&destroy times:%d, memory leak:%d\n", i + 2, mem_leak);
}
TEST_ASSERT_EQUAL(true, (mem_leak) < 1000 && last_end_size == first_end_size);
}
TEST_CASE("wakenet detect API & cpu loading", "[wn]")
{
vTaskDelay(500 / portTICK_PERIOD_MS);
srmodel_list_t *models = esp_srmodel_init("model");
char *model_name = esp_srmodel_filter(models, ESP_WN_PREFIX, NULL);
esp_wn_iface_t *wakenet = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
model_iface_data_t *model_data = wakenet->create(model_name, DET_MODE_95);
int frequency = wakenet->get_samp_rate(model_data);
int audio_chunksize = wakenet->get_samp_chunksize(model_data) * sizeof(int16_t);
int16_t *buffer = (int16_t *) malloc(audio_chunksize);
int chunks = 0;
int detected = 0;
struct timeval tv_start, tv_end;
gettimeofday(&tv_start, NULL);
unsigned char* data = NULL;
size_t data_size = 0;
char *wake_words = NULL;
wake_words = esp_srmodel_get_wake_words(models, model_name);
if (strstr(model_name, "hiesp") != NULL) {
data = (unsigned char*)hiesp;
data_size = sizeof(hiesp);
printf("wake word: %s, size:%d\n", wake_words, data_size);
} else if(strstr(model_name, "hilexin") != NULL) {
data = (unsigned char*)hilexin;
data_size = sizeof(hilexin);
printf("wake word: %s, size:%d\n", wake_words, data_size);
}
while (1) {
if ((chunks + 1)*audio_chunksize <= data_size) {
memcpy(buffer, data + chunks * audio_chunksize, audio_chunksize);
} else {
memset(buffer, 0, audio_chunksize);
}
int res = wakenet->detect(model_data, buffer);
if (res > 0) {
detected = 1;
}
chunks++;
if (detected == 1) {
break;
}
}
gettimeofday(&tv_end, NULL);
int tv_ms = (tv_end.tv_sec - tv_start.tv_sec) * 1000 + (tv_end.tv_usec - tv_start.tv_usec) / 1000;
int run_ms = (chunks) * audio_chunksize / sizeof(int16_t) * 1000 / frequency;
float cpu_loading = tv_ms * 100.0 / run_ms;
printf("Done! Took %d ms to parse %d ms worth of samples in %d iterations. CPU loading(single core):%.1f%%\n",
tv_ms, run_ms, chunks, cpu_loading);
wakenet->destroy(model_data);
esp_srmodel_deinit(models);
TEST_ASSERT_EQUAL(true, (cpu_loading < 75 && detected == 1));
}

View File

@ -0,0 +1,5 @@
# Name, Type, SubType, Offset, Size, Flags
# Note: if you change the phy_init or app partition offset, make sure to change the offset in Kconfig.projbuild
factory, app, factory, 0x010000, 3000K,
model, data, spiffs, , 1000K,
1 # Name, Type, SubType, Offset, Size, Flags
2 # Note: if you change the phy_init or app partition offset, make sure to change the offset in Kconfig.projbuild
3 factory, app, factory, 0x010000, 3000K,
4 model, data, spiffs, , 1000K,

View File

@ -0,0 +1,6 @@
# This file was generated using idf.py save-defconfig. It can be edited manually.
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
#
CONFIG_IDF_TARGET="esp32c5"
CONFIG_ESP_MAIN_TASK_STACK_SIZE=148584
CONFIG_ESP_TASK_WDT_EN=n

View File

@ -1,6 +1,10 @@
# This file was generated using idf.py save-defconfig. It can be edited manually.
# Espressif IoT Development Framework (ESP-IDF) 5.5.0 Project Minimal Configuration
# Espressif IoT Development Framework (ESP-IDF) 5.4.1 Project Minimal Configuration
#
CONFIG_IDF_TARGET="esp32c5"
CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y
CONFIG_PARTITION_TABLE_CUSTOM=y
CONFIG_SR_WN_WN9S_HIESP=y
CONFIG_ESP_MAIN_TASK_STACK_SIZE=148584
CONFIG_ESP_TASK_WDT_EN=n