feat(multinet): add API to modify speech commands online

This commit is contained in:
sxy 2021-09-24 21:29:56 +08:00
parent cfebc5f31a
commit c3d120ac9d
8 changed files with 131 additions and 92 deletions

View File

@ -148,13 +148,13 @@ config EN_MULTINET3_SINGLE_RECOGNITION
bool "english single recognition (MultiNet3)"
depends on SR_ENGLISH && IDF_TARGET_ESP32
config EN_MULTINET5_SINGLE_RECOGNITION
bool "english single recognition (MultiNet5)"
depends on SR_ENGLISH && IDF_TARGET_ESP32S3
config EN_MULTINET5_SINGLE_RECOGNITION_QUANT8
bool "english single recognition (MultiNet5 quantized with 8-bit)"
depends on SR_ENGLISH && IDF_TARGET_ESP32S3
config EN_MULTINET5_SINGLE_RECOGNITION
bool "english single recognition (MultiNet5)"
depends on SR_ENGLISH && IDF_TARGET_ESP32S3
config CN_MULTINET2_SINGLE_RECOGNITION
bool "chinese single recognition (MultiNet2)"
@ -685,162 +685,162 @@ config CN_SPEECH_COMMAND_ID99
config EN_SPEECH_COMMAND_ID0
string "ID0"
depends on SR_ENGLISH
default "T,f,L, ,M,m, ,c, ,q,b,K"
default "TfL Mm c qbK"
config EN_SPEECH_COMMAND_ID1
string "ID1"
depends on SR_ENGLISH
default "S,g,l, ,c, ,S,e,l"
default "Sgl c Sel"
config EN_SPEECH_COMMAND_ID2
string "ID2"
depends on SR_ENGLISH
default "P,L,d, ,N,o,Z, ,p,a,N,c,L"
default "PLd NoZ paNcL"
config EN_SPEECH_COMMAND_ID3
string "ID3"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,M,i, ,S,t,N,D,B,n,K,S"
default "TkN nN Mi StNDBnKS"
config EN_SPEECH_COMMAND_ID4
string "ID4"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,M,i, ,S,t,N,D,B,n,K,S"
default "TkN eF Mi StNDBnKS"
config EN_SPEECH_COMMAND_ID5
string "ID5"
depends on SR_ENGLISH
default "h,i,c,S,T, ,V,n,L,Y,o,M"
default "hicST VnLYoM"
config EN_SPEECH_COMMAND_ID6
string "ID6"
depends on SR_ENGLISH
default "L,b,c,S,T, ,V,n,L,Y,o,M"
default "LbcST VnLYoM"
config EN_SPEECH_COMMAND_ID7
string "ID7"
depends on SR_ENGLISH
default "g,N,K,R,m,S, ,j,c, ,V,n,L,Y,o,M"
default "gNKRmS jc VnLYoM"
config EN_SPEECH_COMMAND_ID8
string "ID8"
depends on SR_ENGLISH
default "D,g,K,R,m,S, ,j,c, ,V,n,L,Y,o,M"
default "DgKRmS jc VnLYoM"
config EN_SPEECH_COMMAND_ID9
string "ID9"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,j,c, ,T,m,V,m"
default "TkN nN jc TmVm"
config EN_SPEECH_COMMAND_ID10
string "ID10"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,j,c, ,T,m,V,m"
default "TkN eF jc TmVm"
config EN_SPEECH_COMMAND_ID11
string "ID11"
depends on SR_ENGLISH
default "M,d,K, ,M,m, ,c, ,T,m"
default "MdK Mm c Tm"
config EN_SPEECH_COMMAND_ID12
string "ID12"
depends on SR_ENGLISH
default "M,d,K, ,M,m, ,c, ,K,n,F,m"
default "MdK Mm c KnFm"
config EN_SPEECH_COMMAND_ID13
string "ID13"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,j,c, ,L,i,T"
default "TkN nN jc LiT"
config EN_SPEECH_COMMAND_ID14
string "ID14"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,j,c, ,L,i,T"
default "TkN eF jc LiT"
config EN_SPEECH_COMMAND_ID15
string "ID15"
depends on SR_ENGLISH
default "p,d,N,q, ,j,c, ,K,c,L,k, ,T,o, ,R,f,D"
default "pdNq jc KcLk To RfD"
config EN_SPEECH_COMMAND_ID16
string "ID16"
depends on SR_ENGLISH
default "p,d,N,q, ,j,c, ,K,c,L,k, ,T,o, ,G,R,m,N"
default "pdNq jc KcLk To GRmN"
config EN_SPEECH_COMMAND_ID17
string "ID17"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,e,L, ,j,c, ,L,i,T,S"
default "TkN nN eL jc LiTS"
config EN_SPEECH_COMMAND_ID18
string "ID18"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,e,L, ,j,c, ,L,i,T,S"
default "TkN eF eL jc LiTS"
config EN_SPEECH_COMMAND_ID19
string "ID19"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,j,c, ,f,R, ,K,c,N,D,g,s,c,N,k"
default "TkN nN jc fR KcNDgscNk"
config EN_SPEECH_COMMAND_ID20
string "ID20"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,j,c, ,f,R, ,K,c,N,D,g,s,c,N,k"
default "TkN eF jc fR KcNDgscNk"
config EN_SPEECH_COMMAND_ID21
string "ID21"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,S,g,K,S,T,m,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To SgKSTmN DgGRmZ"
config EN_SPEECH_COMMAND_ID22
string "ID22"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,S,f,V,c,N,T,m,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To SfVcNTmN DgGRmZ"
config EN_SPEECH_COMMAND_ID23
string "ID23"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,d,T,m,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To dTmN DgGRmZ"
config EN_SPEECH_COMMAND_ID24
string "ID24"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,N,i,N,T,m,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To NiNTmN DgGRmZ"
config EN_SPEECH_COMMAND_ID25
string "ID25"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm DgGRmZ"
config EN_SPEECH_COMMAND_ID26
string "ID26"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,W,c,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm WcN DgGRmZ"
config EN_SPEECH_COMMAND_ID27
string "ID27"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,T,o, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm To DgGRmZ"
config EN_SPEECH_COMMAND_ID28
string "ID28"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,v,R,m, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm vRm DgGRmZ"
config EN_SPEECH_COMMAND_ID29
string "ID29"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,F,e,R, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm FeR DgGRmZ"
config EN_SPEECH_COMMAND_ID30
string "ID30"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,F,i,V, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm FiV DgGRmZ"
config EN_SPEECH_COMMAND_ID31
string "ID31"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,S,g,K,S, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm SgKS DgGRmZ"
config EN_SPEECH_COMMAND_ID32
string "ID32"

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -148,13 +148,13 @@ config EN_MULTINET3_SINGLE_RECOGNITION
bool "english single recognition (MultiNet3)"
depends on SR_ENGLISH && IDF_TARGET_ESP32
config EN_MULTINET5_SINGLE_RECOGNITION
bool "english single recognition (MultiNet5)"
depends on SR_ENGLISH && IDF_TARGET_ESP32S3
config EN_MULTINET5_SINGLE_RECOGNITION_QUANT8
bool "english single recognition (MultiNet5 quantized with 8-bit)"
depends on SR_ENGLISH && IDF_TARGET_ESP32S3
config EN_MULTINET5_SINGLE_RECOGNITION
bool "english single recognition (MultiNet5)"
depends on SR_ENGLISH && IDF_TARGET_ESP32S3
config CN_MULTINET2_SINGLE_RECOGNITION
bool "chinese single recognition (MultiNet2)"
@ -685,162 +685,162 @@ config CN_SPEECH_COMMAND_ID99
config EN_SPEECH_COMMAND_ID0
string "ID0"
depends on SR_ENGLISH
default "T,f,L, ,M,m, ,c, ,q,b,K"
default "TfL Mm c qbK"
config EN_SPEECH_COMMAND_ID1
string "ID1"
depends on SR_ENGLISH
default "S,g,l, ,c, ,S,e,l"
default "Sgl c Sel"
config EN_SPEECH_COMMAND_ID2
string "ID2"
depends on SR_ENGLISH
default "P,L,d, ,N,o,Z, ,p,a,N,c,L"
default "PLd NoZ paNcL"
config EN_SPEECH_COMMAND_ID3
string "ID3"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,M,i, ,S,t,N,D,B,n,K,S"
default "TkN nN Mi StNDBnKS"
config EN_SPEECH_COMMAND_ID4
string "ID4"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,M,i, ,S,t,N,D,B,n,K,S"
default "TkN eF Mi StNDBnKS"
config EN_SPEECH_COMMAND_ID5
string "ID5"
depends on SR_ENGLISH
default "h,i,c,S,T, ,V,n,L,Y,o,M"
default "hicST VnLYoM"
config EN_SPEECH_COMMAND_ID6
string "ID6"
depends on SR_ENGLISH
default "L,b,c,S,T, ,V,n,L,Y,o,M"
default "LbcST VnLYoM"
config EN_SPEECH_COMMAND_ID7
string "ID7"
depends on SR_ENGLISH
default "g,N,K,R,m,S, ,j,c, ,V,n,L,Y,o,M"
default "gNKRmS jc VnLYoM"
config EN_SPEECH_COMMAND_ID8
string "ID8"
depends on SR_ENGLISH
default "D,g,K,R,m,S, ,j,c, ,V,n,L,Y,o,M"
default "DgKRmS jc VnLYoM"
config EN_SPEECH_COMMAND_ID9
string "ID9"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,j,c, ,T,m,V,m"
default "TkN nN jc TmVm"
config EN_SPEECH_COMMAND_ID10
string "ID10"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,j,c, ,T,m,V,m"
default "TkN eF jc TmVm"
config EN_SPEECH_COMMAND_ID11
string "ID11"
depends on SR_ENGLISH
default "M,d,K, ,M,m, ,c, ,T,m"
default "MdK Mm c Tm"
config EN_SPEECH_COMMAND_ID12
string "ID12"
depends on SR_ENGLISH
default "M,d,K, ,M,m, ,c, ,K,n,F,m"
default "MdK Mm c KnFm"
config EN_SPEECH_COMMAND_ID13
string "ID13"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,j,c, ,L,i,T"
default "TkN nN jc LiT"
config EN_SPEECH_COMMAND_ID14
string "ID14"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,j,c, ,L,i,T"
default "TkN eF jc LiT"
config EN_SPEECH_COMMAND_ID15
string "ID15"
depends on SR_ENGLISH
default "p,d,N,q, ,j,c, ,K,c,L,k, ,T,o, ,R,f,D"
default "pdNq jc KcLk To RfD"
config EN_SPEECH_COMMAND_ID16
string "ID16"
depends on SR_ENGLISH
default "p,d,N,q, ,j,c, ,K,c,L,k, ,T,o, ,G,R,m,N"
default "pdNq jc KcLk To GRmN"
config EN_SPEECH_COMMAND_ID17
string "ID17"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,e,L, ,j,c, ,L,i,T,S"
default "TkN nN eL jc LiTS"
config EN_SPEECH_COMMAND_ID18
string "ID18"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,e,L, ,j,c, ,L,i,T,S"
default "TkN eF eL jc LiTS"
config EN_SPEECH_COMMAND_ID19
string "ID19"
depends on SR_ENGLISH
default "T,k,N, ,n,N, ,j,c, ,f,R, ,K,c,N,D,g,s,c,N,k"
default "TkN nN jc fR KcNDgscNk"
config EN_SPEECH_COMMAND_ID20
string "ID20"
depends on SR_ENGLISH
default "T,k,N, ,e,F, ,j,c, ,f,R, ,K,c,N,D,g,s,c,N,k"
default "TkN eF jc fR KcNDgscNk"
config EN_SPEECH_COMMAND_ID21
string "ID21"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,S,g,K,S,T,m,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To SgKSTmN DgGRmZ"
config EN_SPEECH_COMMAND_ID22
string "ID22"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,S,f,V,c,N,T,m,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To SfVcNTmN DgGRmZ"
config EN_SPEECH_COMMAND_ID23
string "ID23"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,d,T,m,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To dTmN DgGRmZ"
config EN_SPEECH_COMMAND_ID24
string "ID24"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,N,i,N,T,m,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To NiNTmN DgGRmZ"
config EN_SPEECH_COMMAND_ID25
string "ID25"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm DgGRmZ"
config EN_SPEECH_COMMAND_ID26
string "ID26"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,W,c,N, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm WcN DgGRmZ"
config EN_SPEECH_COMMAND_ID27
string "ID27"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,T,o, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm To DgGRmZ"
config EN_SPEECH_COMMAND_ID28
string "ID28"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,v,R,m, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm vRm DgGRmZ"
config EN_SPEECH_COMMAND_ID29
string "ID29"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,F,e,R, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm FeR DgGRmZ"
config EN_SPEECH_COMMAND_ID30
string "ID30"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,F,i,V, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm FiV DgGRmZ"
config EN_SPEECH_COMMAND_ID31
string "ID31"
depends on SR_ENGLISH
default "S,f,T, ,j,c, ,T,f,M,P,R,c,p,k, ,T,o, ,T,W,f,N,T,m, ,S,g,K,S, ,D,g,G,R,m,Z"
default "SfT jc TfMPRcpk To TWfNTm SgKS DgGRmZ"
config EN_SPEECH_COMMAND_ID32
string "ID32"

View File

@ -1,18 +1,48 @@
multinet_g2p.py 脚本可以把文本转为可以被multinet识别的因素代号
1. 使用之前需要先安装en_g2p
```
pip install en_g2p
```
**multinet_g2p.py** is used to convert English phrase into phonemes which can be recognized by multinet
2. 运行脚本,将文本呢转化为所需音素代号
```
python multinet_g2p.py -t "hello word"
----
hello word -> ['h', 'c', 'L', 'b', ' ', 'W', 'k', 'D']
#### 1. Install g2p_en, please refer to https://pypi.org/project/g2p-en/
```
pip install g2p_en
```
3. 将对应的短语的音素代号添加到menuconfig即可
#### 2. Run multinet_g2p.py
```
python multinet_g2p.py -t "hello world,hi ESP;turn on the light;turn off the light"
------
in: hello world,hi ESP;turn on the light;turn off the light
out: hcLb WkLD,hi fST;TkN nN jc LiT;TkN eF jc LiT;
```
#### 3. Add speech commands
##### 3.1 add speech commands by menuconfig
```
idf.py menuconfig
ESP Speech Recognition -> Add speech commands
```
##### 3.2 add speech commands by reset function
```
// Function definition
// typedef void (*esp_mn_iface_op_reset_t)(model_iface_data_t *model_data, char *command_str, char *err_phrase_id);
// "," is used to split different phrase with same command id
// ";" is used to split different command id
char *new_commands_str="hcLb WkLD,hi fST;TkN nN jc LiT;TkN eF jc LiT;" //
char err_id[256];
multinet->reset(model_data, new_commands_str, err_id);
// hello world,hi ESP -> commond id=0
// turn on the light -> commond id=1
// turn off the light -> commond id=2
```

View File

@ -5,19 +5,28 @@ import pandas
def english_g2p(text, alphabet=None):
g2p = G2p()
labels = g2p(text)
out = []
out = ""
if alphabet is None:
alphabet={"AE1": "a", "N": "N", " ": " ", "OW1": "b", "V": "V", "AH0": "c", "L": "L", "F": "F", "EY1": "d", "S": "S", "B": "B", "R": "R", "AO1": "e", "D": "D", "AH1": "c", "EH1": "f", "OW0": "b", "IH0": "g", "G": "G", "HH": "h", "K": "K", "IH1": "g", "W": "W", "AY1": "i", "T": "T", "M": "M", "Z": "Z", "DH": "j", "ER0": "k", "P": "P", "NG": "l", "IY1": "m", "AA1": "n", "Y": "Y", "UW1": "o", "IY0": "m", "EH2": "f", "CH": "p", "AE0": "a", "JH": "q", "ZH": "r", "AA2": "n", "SH": "s", "AW1": "t", "OY1": "u", "AW2": "t", "IH2": "g", "AE2": "a", "EY2": "d", "ER1": "k", "TH": "v", "UH1": "w", "UW2": "o", "OW2": "b", "AY2": "i", "UW0": "o", "AH2": "c", "EH0": "f", "AW0": "t", "AO2": "e", "AO0": "e", "UH0": "w", "UH2": "w", "AA0": "n", "AY0": "i", "IY2": "m", "EY0": "d", "ER2": "k", "OY2": "u", "OY0": "u"}
for item in labels:
if item not in alphabet:
print("skip %s, not found in alphabet")
continue
else:
out.append(alphabet[item])
print(text, " -> ", out)
text_list = text.split(";")
for item in text_list:
item = item.split(",")
for phrase in item:
labels = g2p(phrase)
for char in labels:
if char not in alphabet:
print("skip %s, not found in alphabet")
continue
else:
out += alphabet[char]
if phrase != item[-1]:
out += ','
out += ";"
print("in:", text)
print("out:", out)
return out