Merge branch 'feature/wakeNet6' into 'master'

Feature/wakenet6

See merge request speech-recognition-internal/esp_sr_public!8
This commit is contained in:
Sun Xiang Yu 2019-09-25 11:33:26 +08:00
commit b8c8606819
16 changed files with 167 additions and 81 deletions

View File

@ -1,5 +1,9 @@
# Change log for esp-sr
## 0.3.0(dev)
add wakenet6
support cmake
## 0.2.0
add acoustic algorithm, include AEC, AGC, VAD ,NS
add wakenet5X2 and wakenet5X3

View File

@ -54,11 +54,15 @@ config SR_WN5X2_NIHAOXIAOZHI
bool "nihaoxiaozhi (WakeNet5X2)"
depends on SR_MODEL_WN5_QUANT || SR_MODEL_WN5_FLOAT
config SR_WN5X3_NIHAOXIAOXIN
bool "nihaoxiaoxin (WakeNet5X3)"
depends on SR_MODEL_WN5_QUANT || SR_MODEL_WN5_FLOAT
config SR_WN5_CUSTOMIZED_WORD
bool "customized word (WakeNet5)"
depends on SR_MODEL_WN5_QUANT || SR_MODEL_WN5_FLOAT
config SR_WN6_HILEXIN
config SR_WN6_NIHAOXIAOXIN
bool "nihaoxiaoxin (WakeNet6)"
depends on SR_MODEL_WN6_QUANT
@ -68,32 +72,57 @@ config SR_WN6_CUSTOMIZED_WORD
endchoice
choice SR_MN_MODEL_SEL
prompt "speech commands recognition model to use"
default CONFIG_MN1_MODEL_FLOAT
help
Select the model to be used.
choice SR_RUN_WN6_CORE
config SR_MN1_MODEL_QUANT
bool "MultiNet 1 (quantized)"
depends on SR_MODEL_WN6_QUANT || SR_MODEL_WN6_FLOAT
prompt "ESP32 core to run WakeNet6"
default SR_RUN_WM6_CORE1
help
Select one ESP32 core to run WakeNet6.
config SR_RUN_WN6_CORE0
bool "core 0"
config SR_RUN_WN6_CORE1
bool "core 1"
endchoice
choice SR_MN_MODEL_SEL
prompt "speech commands recognition model to use"
default CONFIG_MN1_MODEL_FLOAT
help
Select the model to be used.
config SR_MN1_MODEL_FLOAT
bool "MultiNet 1 (float)"
config SR_MN1_MODEL_QUANT
bool "MultiNet 1 (quantized)"
endchoice
choice SR_LANGUAGE_SEL
prompt "langugae"
default SR_MN1_CHINESE
help
Select the language to be used.
prompt "langugae"
default SR_MN1_CHINESE
help
Select the language to be used.
config SR_MN1_CHINESE_FLOAT
bool "chinese (MultiNet1)"
depends on SR_MN1_MODEL_FLOAT
config SR_MN1_CHINESE_QUANT
bool "chinese (MultiNet1)"
depends on SR_MN1_MODEL_QUANT
bool "chinese (MultiNet1)"
depends on SR_MN1_MODEL_QUANT
endchoice
config SPEECH_COMMANDS_NUM
int "The number of speech commands"
default 0
default 20
help
The number of the speech commands.
@ -101,83 +130,83 @@ menu "Add speech commands"
config SPEECH_COMMAND_ID0
string "ID0"
default ""
default "da kai kong tiao"
config SPEECH_COMMAND_ID1
string "ID1"
default ""
default "guan bi kong tiao"
config SPEECH_COMMAND_ID2
string "ID2"
default ""
default "zeng da feng su"
config SPEECH_COMMAND_ID3
string "ID3"
default ""
default "jian xiao feng su"
config SPEECH_COMMAND_ID4
string "ID4"
default ""
default "sheng gao yi du"
config SPEECH_COMMAND_ID5
string "ID5"
default ""
default "jiang di yi du"
config SPEECH_COMMAND_ID6
string "ID6"
default ""
default "zhi re mo shi"
config SPEECH_COMMAND_ID7
string "ID7"
default ""
default "zhi leng mo shi"
config SPEECH_COMMAND_ID8
string "ID8"
default ""
default "song feng mo shi"
config SPEECH_COMMAND_ID9
string "ID9"
default ""
default "jie neng mo shi"
config SPEECH_COMMAND_ID10
string "ID10"
default ""
default "guan bi jie neng mo shi"
config SPEECH_COMMAND_ID11
string "ID11"
default ""
default "chu shi mo shi"
config SPEECH_COMMAND_ID12
string "ID12"
default ""
default "guan bi chu shi mo shi"
config SPEECH_COMMAND_ID13
string "ID13"
default ""
default "da kai lan ya"
config SPEECH_COMMAND_ID14
string "ID14"
default ""
default "guan bi lan ya"
config SPEECH_COMMAND_ID15
string "ID15"
default ""
default "bo fang ge qu"
config SPEECH_COMMAND_ID16
string "ID16"
default ""
default "zan ting bo fang"
config SPEECH_COMMAND_ID17
string "ID17"
default ""
default "ding shi yi xiao shi"
config SPEECH_COMMAND_ID18
string "ID18"
default ""
default "da kai dian deng"
config SPEECH_COMMAND_ID19
string "ID19"
default ""
default "guan bi dian deng"
config SPEECH_COMMAND_ID20
string "ID20"

View File

@ -16,6 +16,7 @@
#include "dl_lib_matrixq.h"
#include "dl_lib_conv_queue.h"
//fixed-point convolution FIFO queue.
typedef struct {
@ -135,6 +136,8 @@ fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
*/
qtp_t *dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift);
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset);
/**
* @brief Fast implement of dilation layer as follows
*
@ -166,4 +169,6 @@ dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
#endif

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -54,11 +54,15 @@ config SR_WN5X2_NIHAOXIAOZHI
bool "nihaoxiaozhi (WakeNet5X2)"
depends on SR_MODEL_WN5_QUANT || SR_MODEL_WN5_FLOAT
config SR_WN5X3_NIHAOXIAOXIN
bool "nihaoxiaoxin (WakeNet5X3)"
depends on SR_MODEL_WN5_QUANT || SR_MODEL_WN5_FLOAT
config SR_WN5_CUSTOMIZED_WORD
bool "customized word (WakeNet5)"
depends on SR_MODEL_WN5_QUANT || SR_MODEL_WN5_FLOAT
config SR_WN6_HILEXIN
config SR_WN6_NIHAOXIAOXIN
bool "nihaoxiaoxin (WakeNet6)"
depends on SR_MODEL_WN6_QUANT
@ -68,32 +72,57 @@ config SR_WN6_CUSTOMIZED_WORD
endchoice
choice SR_MN_MODEL_SEL
prompt "speech commands recognition model to use"
default CONFIG_MN1_MODEL_FLOAT
help
Select the model to be used.
choice SR_RUN_WN6_CORE
config SR_MN1_MODEL_QUANT
bool "MultiNet 1 (quantized)"
depends on SR_MODEL_WN6_QUANT || SR_MODEL_WN6_FLOAT
prompt "ESP32 core to run WakeNet6"
default SR_RUN_WM6_CORE1
help
Select one ESP32 core to run WakeNet6.
config SR_RUN_WN6_CORE0
bool "core 0"
config SR_RUN_WN6_CORE1
bool "core 1"
endchoice
choice SR_MN_MODEL_SEL
prompt "speech commands recognition model to use"
default CONFIG_MN1_MODEL_FLOAT
help
Select the model to be used.
config SR_MN1_MODEL_FLOAT
bool "MultiNet 1 (float)"
config SR_MN1_MODEL_QUANT
bool "MultiNet 1 (quantized)"
endchoice
choice SR_LANGUAGE_SEL
prompt "langugae"
default SR_MN1_CHINESE
help
Select the language to be used.
prompt "langugae"
default SR_MN1_CHINESE
help
Select the language to be used.
config SR_MN1_CHINESE_FLOAT
bool "chinese (MultiNet1)"
depends on SR_MN1_MODEL_FLOAT
config SR_MN1_CHINESE_QUANT
bool "chinese (MultiNet1)"
depends on SR_MN1_MODEL_QUANT
bool "chinese (MultiNet1)"
depends on SR_MN1_MODEL_QUANT
endchoice
config SPEECH_COMMANDS_NUM
int "The number of speech commands"
default 0
default 20
help
The number of the speech commands.
@ -101,83 +130,83 @@ menu "Add speech commands"
config SPEECH_COMMAND_ID0
string "ID0"
default ""
default "da kai kong tiao"
config SPEECH_COMMAND_ID1
string "ID1"
default ""
default "guan bi kong tiao"
config SPEECH_COMMAND_ID2
string "ID2"
default ""
default "zeng da feng su"
config SPEECH_COMMAND_ID3
string "ID3"
default ""
default "jian xiao feng su"
config SPEECH_COMMAND_ID4
string "ID4"
default ""
default "sheng gao yi du"
config SPEECH_COMMAND_ID5
string "ID5"
default ""
default "jiang di yi du"
config SPEECH_COMMAND_ID6
string "ID6"
default ""
default "zhi re mo shi"
config SPEECH_COMMAND_ID7
string "ID7"
default ""
default "zhi leng mo shi"
config SPEECH_COMMAND_ID8
string "ID8"
default ""
default "song feng mo shi"
config SPEECH_COMMAND_ID9
string "ID9"
default ""
default "jie neng mo shi"
config SPEECH_COMMAND_ID10
string "ID10"
default ""
default "guan bi jie neng mo shi"
config SPEECH_COMMAND_ID11
string "ID11"
default ""
default "chu shi mo shi"
config SPEECH_COMMAND_ID12
string "ID12"
default ""
default "guan bi chu shi mo shi"
config SPEECH_COMMAND_ID13
string "ID13"
default ""
default "da kai lan ya"
config SPEECH_COMMAND_ID14
string "ID14"
default ""
default "guan bi lan ya"
config SPEECH_COMMAND_ID15
string "ID15"
default ""
default "bo fang ge qu"
config SPEECH_COMMAND_ID16
string "ID16"
default ""
default "zan ting bo fang"
config SPEECH_COMMAND_ID17
string "ID17"
default ""
default "ding shi yi xiao shi"
config SPEECH_COMMAND_ID18
string "ID18"
default ""
default "da kai dian deng"
config SPEECH_COMMAND_ID19
string "ID19"
default ""
default "guan bi dian deng"
config SPEECH_COMMAND_ID20
string "ID20"

View File

@ -504,7 +504,6 @@ CONFIG_LWIP_MAX_RAW_PCBS=16
#
CONFIG_SR_MODEL_WN3_QUANT=
CONFIG_SR_MODEL_WN4_QUANT=
CONFIG_SR_MODEL_WN5_FLOAT=
CONFIG_SR_MODEL_WN5_QUANT=y
CONFIG_SR_MODEL_WN6_QUANT=
CONFIG_SR_WN5_HILEXIN=
@ -512,6 +511,7 @@ CONFIG_SR_WN5X2_HILEXIN=y
CONFIG_SR_WN5X3_HILEXIN=
CONFIG_SR_WN5_NIHAOXIAOZHI=
CONFIG_SR_WN5X2_NIHAOXIAOZHI=
CONFIG_SR_WN5X3_NIHAOXIAOXIN=
CONFIG_SR_WN5_CUSTOMIZED_WORD=
CONFIG_SR_MN1_MODEL_FLOAT=
CONFIG_SR_MN1_MODEL_QUANT=y

View File

@ -29,11 +29,11 @@ Please see the flow diagram of WakeNet below:
- How to select the WakeNet model
Go to `make menuconfig`, navigate to `Component config` >> `ESP Speech Recognition` >> `Wake word engine`. See below:
<center>
<img src="../img/model_sel.png" width = "500" />
</center>
1. Go to `make menuconfig`, navigate to `Component config` >> `ESP Speech Recognition` >> `Wake word engine`. See below:
<center> <img src="../img/model_sel.png" width = "500" /> </center>
2. WakeNet6 is divided into two tasks task1 is used to calculate speech recognition the task2 is used to calculate neural network model. The ESP32 core used to calculate task2 can be selected by `Component config` >> `ESP Speech Recognition` >> `ESP32 core to run WakeNet6`
- How to select the wake words
@ -74,6 +74,9 @@ Please see the flow diagram of WakeNet below:
|Quantised WakeNet5|41 K|15 KB|5.5 ms|30 ms|
|Quantised WakeNet5X2|165 K|20 KB|10.5 ms|30 ms|
|Quantised WakeNet5X3|371 K|24 KB|18 ms|30 ms|
|Quantised WakeNet6|378 K|45 KB|4ms(task1) + 25 ms(task2)|30 ms|
**Note**: Quantised WakeNet6 is split into two tasks, task1 is used to calculate speech features and task2 is used to calculate neural network model.
### 2. Performance

View File

@ -19,7 +19,7 @@ WakeNet的流程图如下
- wakeNet3和wakeNet4基于[CRNN](https://arxiv.org/abs/1703.05390)结构。
- WakeNet5(WakeNet5X2,WakeNetX3) 和 WakeNet6 基于 the [Dilated Convolution](https://arxiv.org/pdf/1609.03499.pdf) 结构。
注意WakeNet5,WakeNet5X2 和 WakeNet5X3 的网络结构一致,但是 WakeNetX2 和 WakeNetX3 的参数比 WakeNet5 要多。请参考 [性能测试](#性能测试) 来获取更多细节。
注意WakeNet5,WakeNet5X2 和 WakeNet5X3 的网络结构一致,但是 WakeNet5X2 和 WakeNet5X3 的参数比 WakeNet5 要多。请参考 [性能测试](#性能测试) 来获取更多细节。
- keyword trigger method
对连续的音频流为准确判断关键词的触发我们通过计算若干帧内识别结果的平均值M来判断触发。当M大于大于指定阈值发出触发的命令。
@ -28,14 +28,16 @@ WakeNet的流程图如下
## API introduction
- WakeNet模型选择
使用make menuconfig选择Component config >> ESP Speech commands >> Keyword spotting model,如下图
1. 使用make menuconfig选择Component config >> ESP Speech Recognition >> Wake Word Engine,如下图
<center>
<img src="../img/model_sel.png" width = "500" />
</center>
</center>
2. 不同与WakeNet5WakeNet6被拆分成两个tasktask1计算speech featurestask2计算neural network model。task2使用的ESP32核心可以通过Component config >> ESP Speech Recognition >> ESP32 core to run WakeNet6选择默认使用core1。
- 唤醒词选择
使用make menuconfig选择Component config >> ESP Speech commands >> Wake word list进行选择如下图
使用make menuconfig选择Component config >> ESP Speech Recognition >> Wake word list进行选择如下图
<center>
<img src="../img/word_sel.png" width = "500" />
</center>
@ -70,7 +72,9 @@ WakeNet的流程图如下
|Quantised WakeNet5|41 K|15 KB|5.5 ms|30 ms|
|Quantised WakeNet5X2|165 K|20 KB|10.5 ms|30 ms|
|Quantised WakeNet5X3|371 K|24 KB|18 ms|30 ms|
|Quantised WakeNet6|378 K|45 KB|4ms(task1)+25ms(task2)|30 ms|
**注**Quantised WakeNet6被拆分成两个task其中task1用于计算speech features另一个task2用于计算神经网络。
### 2.识别性能
|距离|安静环境|平稳噪声(SNR=0~10dB)|语音噪声(SNR=0~10dB)|AEC打断唤醒(-5~-15dB)|

View File

@ -62,7 +62,11 @@ extern const esp_wn_iface_t esp_sr_wakenet6_quantized;
#include "nihaoxiaozhi_wn5X2.h"
#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5X2
#elif CONFIG_SR_WN6_HILEXIN
#elif CONFIG_SR_WN5X3_NIHAOXIAOXIN & CONFIG_SR_MODEL_WN5_QUANT
#include "nihaoxiaoxin_wn5X3.h"
#define WAKENET_COEFF get_coeff_nihaoxiaoxin_wn5X3
#elif CONFIG_SR_WN6_NIHAOXIAOXIN
#include "nihaoxiaoxin_wn6.h"
#define WAKENET_COEFF get_coeff_nihaoxiaoxin_wn6

View File

@ -0,0 +1,8 @@
//Generated by mkmodel
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaoxin_wn5X3;

Binary file not shown.