feat(mn6): Add MultiNet6 models

This commit is contained in:
sxy 2023-02-18 11:58:36 +08:00
parent 304ac3040e
commit cc3251b0d2
43 changed files with 2300 additions and 19 deletions

View File

@ -2,6 +2,7 @@
## Unreleased
- ESP-DSP dependency is now installed from the component registry
- Add some English MultiNet6 model which is trained by RNNT and CTC
## 1.1.0
- Support esp32c3 for Chinese TTS

View File

@ -70,6 +70,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32s3")
target_link_libraries(${COMPONENT_TARGET} "-Wl,--start-group"
hufzip
dl_lib
fst
c_speech_features
$<TARGET_FILE:${esp_dsp_lib}>
esp_audio_front_end

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,15 @@ typedef struct {
*/
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
/**
* @brief Allocate a convolution queue from psram
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c);
/**
* @brief Free a convolution queue
*

View File

@ -50,6 +50,16 @@ dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a bit fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch);
/**
* @brief Free a fixed-point convolution queue
*
@ -64,6 +74,16 @@ void dl_convq8_queue_free(dl_convq8_queue_t *cq);
*/
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq);
q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,

View File

@ -93,8 +93,8 @@ void dl_convq_queue_bzero(dl_convq_queue_t *cq);
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
inline qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
inline qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
@ -125,7 +125,7 @@ dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
* @param last_num Offset from the front of the queue
* @return Pointer of the element
*/
inline qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
/**
* @brief Get the pointer of element in the queue by offset

View File

@ -17,6 +17,17 @@ typedef enum {
ESP_MN_STATE_TIMEOUT = 2, // time out
} esp_mn_state_t;
typedef enum {
ESP_MN_GREEDY_SEARCH = 0, // greedy search
ESP_MN_BEAM_SEARCH = 1, // beam search
ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model
} esp_mn_search_method_t;
typedef enum {
CHINESE_ID = 1, // Chinese language
ENGLISH_ID = 2, // English language
} language_id_t;
// Return all possible recognition results
typedef struct{
esp_mn_state_t state;
@ -24,8 +35,10 @@ typedef struct{
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
char string[256];
} esp_mn_results_t;
typedef struct{
int16_t num; // The number of error phrases, which can not added into model
int16_t phrase_idx[ESP_MN_MAX_PHRASE_NUM]; // The error phrase index in singly linked list

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
lib/esp32s3/libfst.a Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -28,12 +28,15 @@ if __name__ == '__main__':
with io.open(sdkconfig_path, "r") as f:
WN_STRING = ''
MN_STRING = ''
NSN_STRING = ''
for label in f:
label = label.strip("\n")
if 'CONFIG_SR_WN' in label and label[0] != '#':
WN_STRING += label
if 'CONFIG_SR_MN' in label and label[0] != '#':
MN_STRING += label
if 'CONFIG_SR_NSN' in label and label[0] != '#':
NSN_STRING += label
wakenet_model = []
if "CONFIG_SR_WN_WN7Q8_XIAOAITONGXUE" in WN_STRING:
@ -73,9 +76,18 @@ if "CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8" in MN_STRING and len(mu
multinet_model.append('mn5q8_en')
elif "CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION" in MN_STRING and len(multinet_model) < 2:
multinet_model.append('mn5_en')
elif "CONFIG_SR_MN_EN_MULTINET6_QUANT" in MN_STRING and len(multinet_model) < 2:
multinet_model.append('mn6_en')
elif "CONFIG_SR_MN_EN_MULTINET6_CTC_QUANT" in MN_STRING and len(multinet_model) < 2:
multinet_model.append('mn6_en_ctc')
nsnet_model = ''
if "CONFIG_SR_NSN_NSNET1" in NSN_STRING:
nsnet_model = 'nsnet1'
print(wakenet_model)
print(multinet_model)
print(nsnet_model)
target_model = args.project_path + '/target'
if os.path.exists(target_model):
@ -87,8 +99,11 @@ if len(wakenet_model) != 0:
if len(multinet_model) != 0:
for multinet_model_item in multinet_model:
shutil.copytree(model_path + '/multinet_model/' + multinet_model_item, target_model+'/'+multinet_model_item)
if nsnet_model != '':
shutil.copytree(model_path + '/nsnet_model/' + nsnet_model, target_model+'/'+nsnet_model)
# os.system("cp %s %s" % (wakenet_model+'/_MODEL_INFO_', target_model))
shutil.copytree(f'{model_path}/multinet_model/fst', target_model + '/fst')
total_size = calculate_total_size(target_model)
print("Recommended model partition size: ", str(int((total_size / 1024 + 900) / 4 ) * 4) + 'KB')

View File

@ -0,0 +1,49 @@
1 ▁TE LL ▁ME ▁A ▁JO KE
2 ▁S ING ▁A ▁SO NG
3 ▁PLAY ▁NEW S ▁CHA N N EL
4 ▁TURN ▁ON ▁MY ▁SO UND BO X
5 ▁TURN ▁OF F ▁MY ▁SO UND BO X
5 ▁TURN ▁OF ▁MY ▁SO UND BO X
6 ▁HIGH EST ▁ VO LU ME
7 ▁ LOW EST ▁ VO LU ME
8 ▁IN C RE A SE ▁THE ▁ VO LU ME
9 ▁DE C RE A SE ▁THE ▁ VO LU ME
10 ▁TURN ▁ON ▁THE ▁T V
11 ▁TURN ▁OF F ▁THE ▁T V
11 ▁TURN ▁OF ▁THE ▁T V
12 ▁MAKE ▁ME ▁A ▁TE A
13 ▁MAKE ▁ME ▁A ▁CO FF E E
14 ▁TURN ▁ON ▁THE ▁ L IGHT
15 ▁TURN ▁OF F ▁THE ▁ L IGHT
15 ▁TURN ▁OF ▁THE ▁ L IGHT
16 ▁CHA NG E ▁THE ▁COL OR ▁TO ▁RE D
17 ▁CHA NG E ▁THE ▁COL OR ▁TO ▁G RE EN
18 ▁TURN ▁ON ▁ALL ▁THE ▁ L IGHT S
19 ▁TURN ▁OF F ▁ALL ▁THE ▁ L IGHT S
19 ▁TURN ▁OF ▁ALL ▁THE ▁ L IGHT S
20 ▁TURN ▁ON ▁THE ▁A IR ▁CON D ITION ER
21 ▁TURN ▁OF F ▁THE ▁A IR ▁CON D ITION ER
21 ▁TURN ▁OF ▁THE ▁A IR ▁CON D ITION ER
22 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁SIX TE EN ▁DE G RE ES
23 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁SE VEN TE EN ▁DE G RE ES
24 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁E IGHT E EN ▁DE G RE ES
25 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁NI NE TE EN ▁DE G RE ES
26 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁DE G RE ES
27 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁ONE ▁DE G RE ES
28 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁TWO ▁DE G RE ES
29 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁THREE ▁DE G RE ES
30 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁FOUR ▁DE G RE ES
31 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁F IVE ▁DE G RE ES
32 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁SIX ▁DE G RE ES
33 ▁ LOW EST ▁FA N ▁SP E ED
34 ▁ME DI UM ▁FA N ▁SP E ED
35 ▁HIGH EST ▁FA N ▁SP E ED
36 ▁A UT O ▁A D J US T ▁THE ▁FA N ▁SP E ED
37 ▁DE C RE A SE ▁THE ▁FA N ▁SP E ED
38 ▁IN C RE A SE ▁THE ▁FA N ▁SP E ED
39 ▁IN C RE A SE ▁THE ▁TE MP ER A TURE
40 ▁DE C RE A SE ▁THE ▁TE MP ER A TURE
41 ▁CO OL ING ▁MO DE
42 ▁HE AT ING ▁MO DE
43 ▁ VEN T IL ATION ▁MO DE
44 ▁DE H UM ID I F Y ▁MO DE

View File

@ -0,0 +1,179 @@
0 1 ▁TE 1
0 2 ▁S 2
0 3 ▁PLAY 3
0 4 ▁TURN 0
0 5 ▁HIGH 0
0 6 ▁ 0
0 7 ▁IN 0
0 8 ▁DE 0
0 9 ▁MAKE 0
0 10 ▁CHA 0
0 11 ▁SE 0
0 12 ▁ME 34
0 13 ▁A 36
0 14 ▁CO 41
0 15 ▁HE 42
1 16 LL 0
2 17 ING 0
3 20 ▁NEW 0
4 21 ▁ON 0
4 22 ▁OF 0
5 23 EST 0
6 25 LOW 0
6 26 VEN 43
7 27 C 0
8 28 C 0
8 29 H 44
9 30 ▁ME 0
10 32 NG 0
11 33 T 0
12 37 DI 0
13 38 UT 0
14 19 OL 0
15 19 AT 0
16 31 ▁ME 0
17 39 ▁A 0
18 95 ▁MO 0
19 18 ING 0
20 44 S 0
21 45 ▁MY 4
21 46 ▁THE 0
21 47 ▁ALL 18
22 48 F 0
22 45 ▁MY 5
22 49 ▁THE 0
22 47 ▁ALL 19
23 50 ▁ 6
23 51 ▁FA 35
24 50 ▁ 7
24 51 ▁FA 33
25 24 EST 0
26 36 T 0
27 52 RE 0
28 53 RE 0
29 58 UM 0
30 40 ▁A 0
31 41 ▁A 0
32 60 E 0
33 67 ▁THE 0
34 59 ▁THE 0
35 34 T 0
36 84 IL 0
37 59 UM 0
38 43 O 0
39 74 ▁SO 0
40 92 ▁TE 12
40 93 ▁CO 13
41 96 ▁JO 0
42 109 D 0
43 42 ▁A 0
44 76 ▁CHA 0
45 75 ▁SO 0
46 77 ▁T 10
46 78 ▁ 14
46 79 ▁A 20
47 68 ▁THE 0
48 45 ▁MY 5
48 49 ▁THE 0
48 47 ▁ALL 19
49 77 ▁T 11
49 78 ▁ 15
49 79 ▁A 21
50 80 VO 0
51 81 N 0
52 85 A 0
53 86 A 0
54 97 EN 0
55 54 RE 0
56 97 ES 0
57 56 RE 0
58 91 ID 0
59 51 ▁FA 0
60 69 ▁THE 0
61 97 ED 0
62 61 E 0
63 97 E 0
64 63 E 0
65 130 EN 0
66 65 E 0
67 94 ▁TE 0
68 101 ▁ 0
69 105 ▁COL 0
70 50 ▁ 8
70 51 ▁FA 38
70 108 ▁TE 39
71 70 ▁THE 0
72 50 ▁ 9
72 51 ▁FA 37
72 108 ▁TE 40
73 72 ▁THE 0
74 97 NG 0
75 98 UND 0
76 82 N 0
77 97 V 0
78 99 L 0
79 102 IR 0
80 103 LU 0
81 62 ▁SP 0
82 83 N 0
83 97 EL 0
84 18 ATION 0
85 71 SE 0
86 73 SE 0
87 122 TURE 0
88 87 A 0
89 97 TURE 0
90 89 A 0
91 104 I 0
92 97 A 0
93 64 FF 0
94 106 MP 0
95 97 DE 0
96 97 KE 0
97
98 112 BO 0
99 97 IGHT 0
100 116 IGHT 0
101 100 L 0
102 111 ▁CON 0
103 97 ME 0
104 113 F 0
105 114 OR 0
106 88 ER 0
107 90 ER 0
108 107 MP 0
109 115 J 0
110 123 ITION 0
111 110 D 0
112 97 X 0
113 18 Y 0
114 120 ▁TO 0
115 35 US 0
116 97 S 0
117 66 IGHT 0
118 131 ▁DE 26
118 130 ▁ONE 27
118 130 ▁TWO 28
118 130 ▁THREE 29
118 130 ▁FOUR 30
118 132 ▁F 31
118 130 ▁SIX 32
119 118 Y 0
120 124 ▁RE 16
120 55 ▁G 17
121 125 ▁SIX 22
121 126 ▁SE 23
121 117 ▁E 24
121 127 ▁NI 25
121 128 ▁T 0
122 121 ▁TO 0
123 97 ER 0
124 97 D 0
125 65 TE 0
126 125 VEN 0
127 125 NE 0
128 129 W 0
129 119 ENT 0
130 131 ▁DE 0
131 57 G 0
132 130 IVE 0

View File

@ -0,0 +1,188 @@
0 1 KE 1
0 2 NG 2
0 3 EL 3
0 4 X 0
0 5 ME 0
0 6 V 0
0 7 A 12
0 8 E 13
0 9 IGHT 0
0 10 D 16
0 11 EN 17
0 12 S 0
0 13 ER 0
0 14 ES 0
0 15 ED 0
0 16 TURE 0
0 17 DE 0
1 18 ▁JO 0
2 19 ▁SO 0
3 22 N 0
4 26 BO 0
5 27 LU 0
6 28 ▁T 0
7 31 ▁TE 0
8 36 E 0
9 40 L 0
10 43 ▁RE 0
11 44 RE 0
12 42 IGHT 0
13 54 ITION 0
14 45 RE 0
15 37 E 0
16 55 A 0
17 61 ▁MO 0
18 62 ▁A 0
19 64 ▁A 0
20 107 ▁MY 0
21 20 ▁SO 0
22 23 N 0
23 94 ▁CHA 0
24 114 ▁FA 0
25 24 N 0
26 21 UND 0
27 69 VO 0
28 70 ▁THE 0
29 82 ▁TO 0
30 29 ▁T 0
31 63 ▁A 0
32 76 ▁THE 0
33 32 ▁TE 0
34 78 ▁THE 0
35 34 ▁TE 0
36 79 FF 0
37 25 ▁SP 0
38 132 NG 0
39 38 E 0
40 72 ▁ 0
41 74 ▁ 0
42 41 L 0
43 81 ▁TO 0
44 43 ▁G 0
45 85 G 0
46 135 C 0
47 46 RE 0
48 136 C 0
49 48 RE 0
50 137 C 0
51 50 RE 0
52 29 ▁E 0
53 52 IGHT 0
54 83 D 0
55 86 ER 0
56 87 ER 0
57 56 A 0
58 47 A 0
59 49 A 0
60 51 A 0
61 88 ING 0
61 89 ATION 43
61 90 Y 44
62 91 ▁ME 0
63 92 ▁ME 0
64 93 ING 0
65 77 ▁THE 0
66 65 ▁A 0
67 138 O 0
68 67 ▁A 0
69 80 ▁ 0
70 95 ▁ON 10
70 96 F 11
70 95 ▁OF 11
71 95 ▁ON 14
71 96 F 15
71 95 ▁OF 15
72 71 ▁THE 0
73 120 ▁ALL 0
74 73 ▁THE 0
75 39 ▁THE 0
76 60 SE 0
77 95 ▁ON 20
77 96 F 21
77 95 ▁OF 21
78 116 T 0
79 31 ▁CO 0
80 108 EST 0
80 109 ▁THE 0
81 97 OR 0
82 57 TURE 0
83 98 ▁CON 0
84 68 D 0
85 99 ▁DE 0
86 33 MP 0
87 35 MP 0
88 100 OL 41
88 101 AT 42
89 102 IL 0
90 103 F 0
91 104 LL 0
92 105 ▁MAKE 0
93 105 ▁S 0
94 106 S 0
95 105 ▁TURN 0
96 95 ▁OF 0
97 75 ▁COL 0
98 66 IR 0
99 110 EN 0
99 111 Y 26
99 112 ▁ONE 27
99 112 ▁TWO 28
99 112 ▁THREE 29
99 112 ▁FOUR 30
99 113 IVE 31
99 112 ▁SIX 32
100 105 ▁CO 0
101 105 ▁HE 0
102 115 T 0
103 117 I 0
104 105 ▁TE 0
105
106 118 ▁NEW 0
107 95 ▁ON 4
107 96 F 5
107 95 ▁OF 5
108 119 LOW 7
108 105 ▁HIGH 6
109 58 SE 0
110 121 TE 0
110 53 E 24
111 122 ENT 0
112 111 Y 0
113 112 ▁F 0
114 123 EST 0
114 124 UM 34
114 125 ▁THE 0
115 119 VEN 0
116 105 ▁SE 0
117 126 ID 0
118 105 ▁PLAY 0
119 105 ▁ 0
120 95 ▁ON 18
120 96 F 19
120 95 ▁OF 19
121 29 ▁SIX 22
121 127 VEN 23
121 128 NE 25
122 30 W 0
123 119 LOW 33
123 105 ▁HIGH 35
124 129 DI 0
125 130 T 36
125 59 SE 0
126 131 UM 0
127 29 ▁SE 0
128 29 ▁NI 0
129 105 ▁ME 0
130 133 US 0
131 134 H 0
132 105 ▁CHA 0
133 84 J 0
134 105 ▁DE 0
135 105 ▁IN 8
135 105 ▁DE 9
136 105 ▁DE 37
136 105 ▁IN 38
137 105 ▁IN 39
137 105 ▁DE 40
138 139 UT 0
139 105 ▁A 0

View File

@ -0,0 +1,99 @@
<blk> 0
S 3
▁THE 4
T 5
▁A 6
N 7
D 8
ED 9
E 10
▁OF 11
Y 12
▁S 14
▁IN 15
▁TO 17
▁ 18
A 19
ING 20
O 22
▁HE 24
ER 25
C 26
G 27
I 28
L 29
RE 31
F 37
▁RE 38
W 40
▁E 45
OR 50
▁F 51
ES 55
LL 56
ENT 65
H 66
▁DE 68
▁G 71
EN 72
▁ON 73
SE 74
▁T 75
▁ME 78
IL 81
NE 86
TE 87
▁SO 89
ATION 90
NG 92
ME 93
▁CON 95
EL 103
IR 115
▁MO 117
▁CO 119
▁SE 122
▁FA 136
V 138
US 146
▁ALL 151
X 152
IVE 156
▁ONE 157
KE 159
▁TE 175
AT 178
LU 180
MP 182
▁SP 186
▁MY 188
DE 193
IGHT 196
UT 198
EST 204
UND 209
FF 216
J 220
▁CHA 226
OL 227
▁TWO 237
ID 251
UM 256
VO 259
DI 266
LOW 281
TURE 286
▁NEW 304
ITION 310
BO 312
VEN 326
▁PLAY 338
▁JO 356
▁THREE 367
▁COL 375
▁HIGH 381
▁FOUR 424
▁MAKE 430
▁NI 446
▁TURN 457
▁SIX 483

View File

@ -0,0 +1,2 @@
# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
MN6_v1_english_8_0.9_0.90

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,2 @@
# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
MN6_v11_english_8_0.9_0.90

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,49 @@
1 ▁TE LL ▁ME ▁A ▁JO KE
2 ▁S ING ▁A ▁SO NG
3 ▁PLAY ▁NEW S ▁CHA N N EL
4 ▁TURN ▁ON ▁MY ▁SO UND BO X
5 ▁TURN ▁OF F ▁MY ▁SO UND BO X
5 ▁TURN ▁OF ▁MY ▁SO UND BO X
6 ▁HIGH EST ▁ VO LU ME
7 ▁ LOW EST ▁ VO LU ME
8 ▁IN C RE A SE ▁THE ▁ VO LU ME
9 ▁DE C RE A SE ▁THE ▁ VO LU ME
10 ▁TURN ▁ON ▁THE ▁T V
11 ▁TURN ▁OF F ▁THE ▁T V
11 ▁TURN ▁OF ▁THE ▁T V
12 ▁MAKE ▁ME ▁A ▁TE A
13 ▁MAKE ▁ME ▁A ▁CO FF E E
14 ▁TURN ▁ON ▁THE ▁ L IGHT
15 ▁TURN ▁OF F ▁THE ▁ L IGHT
15 ▁TURN ▁OF ▁THE ▁ L IGHT
16 ▁CHA NG E ▁THE ▁COL OR ▁TO ▁RE D
17 ▁CHA NG E ▁THE ▁COL OR ▁TO ▁G RE EN
18 ▁TURN ▁ON ▁ALL ▁THE ▁ L IGHT S
19 ▁TURN ▁OF F ▁ALL ▁THE ▁ L IGHT S
19 ▁TURN ▁OF ▁ALL ▁THE ▁ L IGHT S
20 ▁TURN ▁ON ▁THE ▁A IR ▁CON D ITION ER
21 ▁TURN ▁OF F ▁THE ▁A IR ▁CON D ITION ER
21 ▁TURN ▁OF ▁THE ▁A IR ▁CON D ITION ER
22 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁SIX TE EN ▁DE G RE ES
23 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁SE VEN TE EN ▁DE G RE ES
24 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁E IGHT E EN ▁DE G RE ES
25 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁NI NE TE EN ▁DE G RE ES
26 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁DE G RE ES
27 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁ONE ▁DE G RE ES
28 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁TWO ▁DE G RE ES
29 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁THREE ▁DE G RE ES
30 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁FOUR ▁DE G RE ES
31 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁F IVE ▁DE G RE ES
32 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁SIX ▁DE G RE ES
33 ▁ LOW EST ▁FA N ▁SP E ED
34 ▁ME DI UM ▁FA N ▁SP E ED
35 ▁HIGH EST ▁FA N ▁SP E ED
36 ▁A UT O ▁A D J US T ▁THE ▁FA N ▁SP E ED
37 ▁DE C RE A SE ▁THE ▁FA N ▁SP E ED
38 ▁IN C RE A SE ▁THE ▁FA N ▁SP E ED
39 ▁IN C RE A SE ▁THE ▁TE MP ER A TURE
40 ▁DE C RE A SE ▁THE ▁TE MP ER A TURE
41 ▁CO OL ING ▁MO DE
42 ▁HE AT ING ▁MO DE
43 ▁ VEN T IL ATION ▁MO DE
44 ▁DE H UM ID I F Y ▁MO DE

179
model/target/fst/fst.txt Normal file
View File

@ -0,0 +1,179 @@
0 1 ▁TE 1
0 2 ▁S 2
0 3 ▁PLAY 3
0 4 ▁TURN 0
0 5 ▁HIGH 0
0 6 ▁ 0
0 7 ▁IN 0
0 8 ▁DE 0
0 9 ▁MAKE 0
0 10 ▁CHA 0
0 11 ▁SE 0
0 12 ▁ME 34
0 13 ▁A 36
0 14 ▁CO 41
0 15 ▁HE 42
1 16 LL 0
2 17 ING 0
3 20 ▁NEW 0
4 21 ▁ON 0
4 22 ▁OF 0
5 23 EST 0
6 25 LOW 0
6 26 VEN 43
7 27 C 0
8 28 C 0
8 29 H 44
9 30 ▁ME 0
10 32 NG 0
11 33 T 0
12 37 DI 0
13 38 UT 0
14 19 OL 0
15 19 AT 0
16 31 ▁ME 0
17 39 ▁A 0
18 95 ▁MO 0
19 18 ING 0
20 44 S 0
21 45 ▁MY 4
21 46 ▁THE 0
21 47 ▁ALL 18
22 48 F 0
22 45 ▁MY 5
22 49 ▁THE 0
22 47 ▁ALL 19
23 50 ▁ 6
23 51 ▁FA 35
24 50 ▁ 7
24 51 ▁FA 33
25 24 EST 0
26 36 T 0
27 52 RE 0
28 53 RE 0
29 58 UM 0
30 40 ▁A 0
31 41 ▁A 0
32 60 E 0
33 67 ▁THE 0
34 59 ▁THE 0
35 34 T 0
36 84 IL 0
37 59 UM 0
38 43 O 0
39 74 ▁SO 0
40 92 ▁TE 12
40 93 ▁CO 13
41 96 ▁JO 0
42 109 D 0
43 42 ▁A 0
44 76 ▁CHA 0
45 75 ▁SO 0
46 77 ▁T 10
46 78 ▁ 14
46 79 ▁A 20
47 68 ▁THE 0
48 45 ▁MY 5
48 49 ▁THE 0
48 47 ▁ALL 19
49 77 ▁T 11
49 78 ▁ 15
49 79 ▁A 21
50 80 VO 0
51 81 N 0
52 85 A 0
53 86 A 0
54 97 EN 0
55 54 RE 0
56 97 ES 0
57 56 RE 0
58 91 ID 0
59 51 ▁FA 0
60 69 ▁THE 0
61 97 ED 0
62 61 E 0
63 97 E 0
64 63 E 0
65 130 EN 0
66 65 E 0
67 94 ▁TE 0
68 101 ▁ 0
69 105 ▁COL 0
70 50 ▁ 8
70 51 ▁FA 38
70 108 ▁TE 39
71 70 ▁THE 0
72 50 ▁ 9
72 51 ▁FA 37
72 108 ▁TE 40
73 72 ▁THE 0
74 97 NG 0
75 98 UND 0
76 82 N 0
77 97 V 0
78 99 L 0
79 102 IR 0
80 103 LU 0
81 62 ▁SP 0
82 83 N 0
83 97 EL 0
84 18 ATION 0
85 71 SE 0
86 73 SE 0
87 122 TURE 0
88 87 A 0
89 97 TURE 0
90 89 A 0
91 104 I 0
92 97 A 0
93 64 FF 0
94 106 MP 0
95 97 DE 0
96 97 KE 0
97
98 112 BO 0
99 97 IGHT 0
100 116 IGHT 0
101 100 L 0
102 111 ▁CON 0
103 97 ME 0
104 113 F 0
105 114 OR 0
106 88 ER 0
107 90 ER 0
108 107 MP 0
109 115 J 0
110 123 ITION 0
111 110 D 0
112 97 X 0
113 18 Y 0
114 120 ▁TO 0
115 35 US 0
116 97 S 0
117 66 IGHT 0
118 131 ▁DE 26
118 130 ▁ONE 27
118 130 ▁TWO 28
118 130 ▁THREE 29
118 130 ▁FOUR 30
118 132 ▁F 31
118 130 ▁SIX 32
119 118 Y 0
120 124 ▁RE 16
120 55 ▁G 17
121 125 ▁SIX 22
121 126 ▁SE 23
121 117 ▁E 24
121 127 ▁NI 25
121 128 ▁T 0
122 121 ▁TO 0
123 97 ER 0
124 97 D 0
125 65 TE 0
126 125 VEN 0
127 125 NE 0
128 129 W 0
129 119 ENT 0
130 131 ▁DE 0
131 57 G 0
132 130 IVE 0

View File

@ -0,0 +1,188 @@
0 1 KE 1
0 2 NG 2
0 3 EL 3
0 4 X 0
0 5 ME 0
0 6 V 0
0 7 A 12
0 8 E 13
0 9 IGHT 0
0 10 D 16
0 11 EN 17
0 12 S 0
0 13 ER 0
0 14 ES 0
0 15 ED 0
0 16 TURE 0
0 17 DE 0
1 18 ▁JO 0
2 19 ▁SO 0
3 22 N 0
4 26 BO 0
5 27 LU 0
6 28 ▁T 0
7 31 ▁TE 0
8 36 E 0
9 40 L 0
10 43 ▁RE 0
11 44 RE 0
12 42 IGHT 0
13 54 ITION 0
14 45 RE 0
15 37 E 0
16 55 A 0
17 61 ▁MO 0
18 62 ▁A 0
19 64 ▁A 0
20 107 ▁MY 0
21 20 ▁SO 0
22 23 N 0
23 94 ▁CHA 0
24 114 ▁FA 0
25 24 N 0
26 21 UND 0
27 69 VO 0
28 70 ▁THE 0
29 82 ▁TO 0
30 29 ▁T 0
31 63 ▁A 0
32 76 ▁THE 0
33 32 ▁TE 0
34 78 ▁THE 0
35 34 ▁TE 0
36 79 FF 0
37 25 ▁SP 0
38 132 NG 0
39 38 E 0
40 72 ▁ 0
41 74 ▁ 0
42 41 L 0
43 81 ▁TO 0
44 43 ▁G 0
45 85 G 0
46 135 C 0
47 46 RE 0
48 136 C 0
49 48 RE 0
50 137 C 0
51 50 RE 0
52 29 ▁E 0
53 52 IGHT 0
54 83 D 0
55 86 ER 0
56 87 ER 0
57 56 A 0
58 47 A 0
59 49 A 0
60 51 A 0
61 88 ING 0
61 89 ATION 43
61 90 Y 44
62 91 ▁ME 0
63 92 ▁ME 0
64 93 ING 0
65 77 ▁THE 0
66 65 ▁A 0
67 138 O 0
68 67 ▁A 0
69 80 ▁ 0
70 95 ▁ON 10
70 96 F 11
70 95 ▁OF 11
71 95 ▁ON 14
71 96 F 15
71 95 ▁OF 15
72 71 ▁THE 0
73 120 ▁ALL 0
74 73 ▁THE 0
75 39 ▁THE 0
76 60 SE 0
77 95 ▁ON 20
77 96 F 21
77 95 ▁OF 21
78 116 T 0
79 31 ▁CO 0
80 108 EST 0
80 109 ▁THE 0
81 97 OR 0
82 57 TURE 0
83 98 ▁CON 0
84 68 D 0
85 99 ▁DE 0
86 33 MP 0
87 35 MP 0
88 100 OL 41
88 101 AT 42
89 102 IL 0
90 103 F 0
91 104 LL 0
92 105 ▁MAKE 0
93 105 ▁S 0
94 106 S 0
95 105 ▁TURN 0
96 95 ▁OF 0
97 75 ▁COL 0
98 66 IR 0
99 110 EN 0
99 111 Y 26
99 112 ▁ONE 27
99 112 ▁TWO 28
99 112 ▁THREE 29
99 112 ▁FOUR 30
99 113 IVE 31
99 112 ▁SIX 32
100 105 ▁CO 0
101 105 ▁HE 0
102 115 T 0
103 117 I 0
104 105 ▁TE 0
105
106 118 ▁NEW 0
107 95 ▁ON 4
107 96 F 5
107 95 ▁OF 5
108 119 LOW 7
108 105 ▁HIGH 6
109 58 SE 0
110 121 TE 0
110 53 E 24
111 122 ENT 0
112 111 Y 0
113 112 ▁F 0
114 123 EST 0
114 124 UM 34
114 125 ▁THE 0
115 119 VEN 0
116 105 ▁SE 0
117 126 ID 0
118 105 ▁PLAY 0
119 105 ▁ 0
120 95 ▁ON 18
120 96 F 19
120 95 ▁OF 19
121 29 ▁SIX 22
121 127 VEN 23
121 128 NE 25
122 30 W 0
123 119 LOW 33
123 105 ▁HIGH 35
124 129 DI 0
125 130 T 36
125 59 SE 0
126 131 UM 0
127 29 ▁SE 0
128 29 ▁NI 0
129 105 ▁ME 0
130 133 US 0
131 134 H 0
132 105 ▁CHA 0
133 84 J 0
134 105 ▁DE 0
135 105 ▁IN 8
135 105 ▁DE 9
136 105 ▁DE 37
136 105 ▁IN 38
137 105 ▁IN 39
137 105 ▁DE 40
138 139 UT 0
139 105 ▁A 0

View File

@ -0,0 +1,99 @@
<blk> 0
S 3
▁THE 4
T 5
▁A 6
N 7
D 8
ED 9
E 10
▁OF 11
Y 12
▁S 14
▁IN 15
▁TO 17
▁ 18
A 19
ING 20
O 22
▁HE 24
ER 25
C 26
G 27
I 28
L 29
RE 31
F 37
▁RE 38
W 40
▁E 45
OR 50
▁F 51
ES 55
LL 56
ENT 65
H 66
▁DE 68
▁G 71
EN 72
▁ON 73
SE 74
▁T 75
▁ME 78
IL 81
NE 86
TE 87
▁SO 89
ATION 90
NG 92
ME 93
▁CON 95
EL 103
IR 115
▁MO 117
▁CO 119
▁SE 122
▁FA 136
V 138
US 146
▁ALL 151
X 152
IVE 156
▁ONE 157
KE 159
▁TE 175
AT 178
LU 180
MP 182
▁SP 186
▁MY 188
DE 193
IGHT 196
UT 198
EST 204
UND 209
FF 216
J 220
▁CHA 226
OL 227
▁TWO 237
ID 251
UM 256
VO 259
DI 266
LOW 281
TURE 286
▁NEW 304
ITION 310
BO 312
VEN 326
▁PLAY 338
▁JO 356
▁THREE 367
▁COL 375
▁HIGH 381
▁FOUR 424
▁MAKE 430
▁NI 446
▁TURN 457
▁SIX 483

View File

@ -0,0 +1,2 @@
# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
MN6_v11_english_8_0.9_0.90

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
wakeNet9_v1h24_hiesp_3_0.63_0.635

Binary file not shown.

Binary file not shown.

View File

@ -12,42 +12,42 @@ void check_chip_config(void)
{
#ifdef CONFIG_IDF_TARGET_ESP32S3
#ifndef CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240
ESP_LOGE(TAG, "CPU freq should be 240MHz");
ESP_LOGW(TAG, "CPU freq should be 240MHz");
#endif
#ifndef CONFIG_ESPTOOLPY_FLASHFREQ_80M
ESP_LOGE(TAG, "Flash freq should be 80MHz");
#if (! defined CONFIG_ESPTOOLPY_FLASHFREQ_80M) && (! defined CONFIG_ESPTOOLPY_FLASHFREQ_120M)
ESP_LOGW(TAG, "Flash freq should be not less than 80MHz");
#endif
#ifndef CONFIG_SPIRAM_SPEED_80M
ESP_LOGE(TAG, "PSRAM freq should be 80MHz");
#if (! defined CONFIG_SPIRAM_SPEED_80M) && (! defined CONFIG_SPIRAM_SPEED_120M)
ESP_LOGW(TAG, "PSRAM freq should be not less than 80MHz");
#endif
#ifndef CONFIG_ESP32S3_DATA_CACHE_64KB
ESP_LOGE(TAG, "Data cache should be 64KB");
ESP_LOGW(TAG, "Data cache recommends 64KB");
#endif
#ifndef CONFIG_ESP32S3_DATA_CACHE_LINE_64B
ESP_LOGE(TAG, "Data cache line should be 64B");
ESP_LOGW(TAG, "Data cache line recommends 64B");
#endif
#elif CONFIG_IDF_TARGET_ESP32
#ifndef CONFIG_ESP32_DEFAULT_CPU_FREQ_240
ESP_LOGE(TAG, "CPU freq should be 240MHz");
ESP_LOGW(TAG, "CPU freq should be 240MHz");
#endif
#ifndef CONFIG_SPIRAM_SPEED_80M
ESP_LOGE(TAG, "PSRAM freq should be 80MHz");
ESP_LOGW(TAG, "PSRAM freq should be 80MHz");
#endif
#ifndef CONFIG_ESPTOOLPY_FLASHFREQ_80M
ESP_LOGE(TAG, "Flash freq should be 80MHz");
ESP_LOGW(TAG, "Flash freq should be 80MHz");
#endif
#ifndef CONFIG_ESPTOOLPY_FLASHMODE_QIO
ESP_LOGE(TAG, "Flash mode should be QIO");
ESP_LOGW(TAG, "Flash mode should be QIO");
#endif
#else
ESP_LOGE(TAG, "ESP-SR-AFE only support ESP32/ESP32S3");
ESP_LOGW(TAG, "ESP-SR-AFE only support ESP32/ESP32S3");
#endif
}

View File

@ -1,7 +1,4 @@
**multinet_g2p.py** is used to convert English phrase into phonemes which can be recognized by multinet
## MultiNet5
#### 1. Install g2p_en, please refer to https://pypi.org/project/g2p-en/
```
@ -45,4 +42,45 @@ multinet->reset(model_data, new_commands_str, err_id);
// turn off the light -> commond id=2
```
## MultiNet6
The FST (Finite State Transducer) is used to save a list of commands.
#### Step 1. Data preparation
Requirements:
- python>3.8
- sentencepiece
To create a FST from a list of commands, two files are needed:
- commands.txt: maps a command id to subwords
- tokens.txt: maps subword tokens to it's indices in the bpe model
Assume you have a list of commands written in a text file `commands_list.txt` of the following format:
```
# command_id command_sentence
1 TELL ME A JOKE
2 MAKE A COFFEE
```
**Note**: command ids starts from 1, 0 is reserved in FST.
Run the following command to create the required files, do not change the filenames `commands.txt` and `tokens.txt`.
```sh
pip install -r requirements.txt
python fst/prepare_for_fst.py \
--infile commands_list.txt \
--bpe-model fst/bpe.model \
--out-command-list commands.txt \
--out-token-symbols tokens.txt
```
#### Step 2. Move created files
1. Remove `/model/multinet_model/fst/fst.txt` and `/model/multinet_model/fst/fst_reversed.txt` if those files exist.
2. Move the following files to `/model/multinet_model/fst/`
- commands.txt
- tokens.txt

BIN
tool/fst/bpe.model Normal file

Binary file not shown.

View File

@ -0,0 +1,49 @@
1 TELL ME A JOKE
2 SING A SONG
3 PLAY NEWS CHANNEL
4 TURN ON MY SOUNDBOX
5 TURN OFF MY SOUNDBOX
5 TURN OF MY SOUNDBOX
6 HIGHEST VOLUME
7 LOWEST VOLUME
8 INCREASE THE VOLUME
9 DECREASE THE VOLUME
10 TURN ON THE TV
11 TURN OFF THE TV
11 TURN OF THE TV
12 MAKE ME A TEA
13 MAKE ME A COFFEE
14 TURN ON THE LIGHT
15 TURN OFF THE LIGHT
15 TURN OF THE LIGHT
16 CHANGE THE COLOR TO RED
17 CHANGE THE COLOR TO GREEN
18 TURN ON ALL THE LIGHTS
19 TURN OFF ALL THE LIGHTS
19 TURN OF ALL THE LIGHTS
20 TURN ON THE AIR CONDITIONER
21 TURN OFF THE AIR CONDITIONER
21 TURN OF THE AIR CONDITIONER
22 SET THE TEMPERATURE TO SIXTEEN DEGREES
23 SET THE TEMPERATURE TO SEVENTEEN DEGREES
24 SET THE TEMPERATURE TO EIGHTEEN DEGREES
25 SET THE TEMPERATURE TO NINETEEN DEGREES
26 SET THE TEMPERATURE TO TWENTY DEGREES
27 SET THE TEMPERATURE TO TWENTY ONE DEGREES
28 SET THE TEMPERATURE TO TWENTY TWO DEGREES
29 SET THE TEMPERATURE TO TWENTY THREE DEGREES
30 SET THE TEMPERATURE TO TWENTY FOUR DEGREES
31 SET THE TEMPERATURE TO TWENTY FIVE DEGREES
32 SET THE TEMPERATURE TO TWENTY SIX DEGREES
33 LOWEST FAN SPEED
34 MEDIUM FAN SPEED
35 HIGHEST FAN SPEED
36 AUTO ADJUST THE FAN SPEED
37 DECREASE THE FAN SPEED
38 INCREASE THE FAN SPEED
39 INCREASE THE TEMPERATURE
40 DECREASE THE TEMPERATURE
41 COOLING MODE
42 HEATING MODE
43 VENTILATION MODE
44 DEHUMIDIFY MODE

View File

@ -0,0 +1,64 @@
import argparse
from pathlib import Path
from typing import List, Set, Tuple
import sentencepiece as spm
def process_commands(infile: List[str], sp: spm.SentencePieceProcessor
) -> Tuple[List[str], Set[int]]:
out_commands = []
tokens = set()
for line in infile:
command_id = line.split()[0]
command = ' '.join(line.split()[1:])
command_tokens = sp.encode(command, out_type=str)
for token in command_tokens:
tokens.add(token)
command_tokens = [command_id] + command_tokens
out_commands.append('\t'.join(command_tokens))
return out_commands, tokens
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--infile', type=str, required=True,
help='the text file of commands id and commands.')
parser.add_argument('--bpe-model', type=str, default='bpe.model',
help='subword bpe model file.')
parser.add_argument('--out-command-list', type=str,
default='commands_tokens.txt',
help='the output subword commands text filename.')
parser.add_argument('--out-token-symbols', type=str,
default='tokens.txt',
help='the output token to subword id mapping.')
args = parser.parse_args()
if not Path(args.infile).is_file():
raise FileNotFoundError(args.infile)
if not Path(args.bpe_model).is_file():
raise FileNotFoundError(args.bpe_model)
with open(args.infile) as f:
infile = f.readlines()
infile = [x.strip() for x in infile]
sp = spm.SentencePieceProcessor()
sp.load(args.bpe_model)
out_commands, tokens = process_commands(infile, sp)
token_symbols = []
for i in range(sp.vocab_size()):
if sp.id_to_piece(i) in tokens or i == 0:
token_symbols.append(f'{sp.id_to_piece(i)}\t{i}')
with open(args.out_command_list, 'wt') as f:
f.write('\n'.join(out_commands))
f.write('\n')
with open(args.out_token_symbols, 'wt') as f:
f.write('\n'.join(token_symbols))
f.write('\n')

View File

@ -0,0 +1 @@
sentencepiece==0.1.97

View File

@ -1 +1,2 @@
g2p-en
sentencepiece==0.1.97