diff --git a/CHANGELOG.md b/CHANGELOG.md index 53a9e64..c58415f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased - ESP-DSP dependency is now installed from the component registry +- Add some English MultiNet6 model which is trained by RNNT and CTC ## 1.1.0 - Support esp32c3 for Chinese TTS diff --git a/CMakeLists.txt b/CMakeLists.txt index 50b8db1..b14acd1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,7 @@ elseif(${IDF_TARGET} STREQUAL "esp32s3") target_link_libraries(${COMPONENT_TARGET} "-Wl,--start-group" hufzip dl_lib + fst c_speech_features $ esp_audio_front_end diff --git a/Kconfig.projbuild b/Kconfig.projbuild index d02b078..67d79f8 100644 --- a/Kconfig.projbuild +++ b/Kconfig.projbuild @@ -160,6 +160,14 @@ choice ENGLISH_SR_MN_MODEL_SEL config SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8 bool "english recognition (mn5q8_en)" depends on IDF_TARGET_ESP32S3 + + config SR_MN_EN_MULTINET6_QUANT + bool "english recognition (mn6_en)" + depends on IDF_TARGET_ESP32S3 + + config SR_MN_EN_MULTINET6_CTC_QUANT + bool "english recognition(RNNT+CTC) (mn6_en_ctc)" + depends on IDF_TARGET_ESP32S3 endchoice @@ -2171,4 +2179,1028 @@ config EN_SPEECH_COMMAND_ID199 endmenu + +menu "Add English speech commands" +depends on SR_MN_EN_MULTINET6_QUANT +config EN_SPEECH_COMMAND_ID0 + string "ID0" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TE LL _ME _A _JO KE,TE LL _ME _A _JO KE" + +config EN_SPEECH_COMMAND_ID1 + string "ID1" + depends on SR_MN_EN_MULTINET6_QUANT + default "_S ING _A _SO NG,S ING _A _SO NG" + +config EN_SPEECH_COMMAND_ID2 + string "ID2" + depends on SR_MN_EN_MULTINET6_QUANT + default "_PLAY _NEW S _CHA N N EL,P LA Y _NEW S _CHA N N EL" + +config EN_SPEECH_COMMAND_ID3 + string "ID3" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _ON _MY _SO UND BO X,_TURN _ON _MY _S O U ND B O X,_TURN _ON _MY _ S O UN D BO X" + +config EN_SPEECH_COMMAND_ID4 + string "ID4" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _OF F _MY _SO UND BO X,_TURN _OF _MY _S O U ND B O X,_TURN _OF _MY _ S O UN D BO X" + +config EN_SPEECH_COMMAND_ID5 + string "ID5" + depends on SR_MN_EN_MULTINET6_QUANT + default "_HIGH EST _ VO LU ME,H IG H EST _ VO LU ME" + +config EN_SPEECH_COMMAND_ID6 + string "ID6" + depends on SR_MN_EN_MULTINET6_QUANT + default "LOW EST _ VO LU ME" + +config EN_SPEECH_COMMAND_ID7 + string "ID7" + depends on SR_MN_EN_MULTINET6_QUANT + default "_IN C RE A SE _THE _ VO LU ME" + +config EN_SPEECH_COMMAND_ID8 + string "ID8" + depends on SR_MN_EN_MULTINET6_QUANT + default "_DE C RE A SE _THE _ VO LU ME" + +config EN_SPEECH_COMMAND_ID9 + string "ID9" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _ON _THE _T V,T UR N _ON _THE _T V" + +config EN_SPEECH_COMMAND_ID10 + string "ID10" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _OF F _THE _T V,T UR N _OF F _THE _T V" + +config EN_SPEECH_COMMAND_ID11 + string "ID11" + depends on SR_MN_EN_MULTINET6_QUANT + default "_MAKE _ME _A _TE A,MA KE _ME _A _TE A" + +config EN_SPEECH_COMMAND_ID12 + string "ID12" + depends on SR_MN_EN_MULTINET6_QUANT + default "_MAKE _ME _A _CO FF E E,MA KE _ME _A _CO FF E E" + +config EN_SPEECH_COMMAND_ID13 + string "ID13" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _ON _THE _ L IGHT,T UR N _ON _THE _ L IGHT" + +config EN_SPEECH_COMMAND_ID14 + string "ID14" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _OF F _THE _ L IGHT,T UR N _OF F _THE _ L IGHT,_TURN _OF _THE _ L IGHT" + +config EN_SPEECH_COMMAND_ID15 + string "ID15" + depends on SR_MN_EN_MULTINET6_QUANT + default "_CHA NG E _THE _COL OR _TO _RE D,CH AN GE _THE _COL OR _TO _RE D" + +config EN_SPEECH_COMMAND_ID16 + string "ID15" + depends on SR_MN_EN_MULTINET6_QUANT + default "_CHA NG E _THE _COL OR _TO _G RE EN,CH AN GE _THE _COL OR _TO _G RE EN" + +config EN_SPEECH_COMMAND_ID17 + string "ID15" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _ON _ALL _THE _ L IGHT S,T UR N _ON _ALL _THE _ L IGHT S" + +config EN_SPEECH_COMMAND_ID18 + string "ID15" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _OF F _ALL _THE _ L IGHT S,_TURN _OF _ALL _THE _ L IGHT S" + +config EN_SPEECH_COMMAND_ID19 + string "ID15" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _ON _THE _A IR _CON D ITION ER" + +config EN_SPEECH_COMMAND_ID20 + string "ID16" + depends on SR_MN_EN_MULTINET6_QUANT + default "_TURN _OF F _THE _A IR _CON D ITION ER,_TURN _OF _THE _A IR _CON D ITION ER" + +config EN_SPEECH_COMMAND_ID21 + string "ID17" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _SIX TE EN _DE G RE ES" + +config EN_SPEECH_COMMAND_ID22 + string "ID18" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _SE VEN TE EN _DE G RE ES" + +config EN_SPEECH_COMMAND_ID23 + string "ID19" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _E IGHT E EN _DE G RE ES" + +config EN_SPEECH_COMMAND_ID24 + string "ID20" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _NI NE TE EN _DE G RE ES" + +config EN_SPEECH_COMMAND_ID25 + string "ID21" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _T W ENT Y _DE G RE ES" + +config EN_SPEECH_COMMAND_ID26 + string "ID22" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _T W ENT Y _ONE _DE G RE ES" + +config EN_SPEECH_COMMAND_ID27 + string "ID23" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _T W ENT Y _TWO _DE G RE ES" + +config EN_SPEECH_COMMAND_ID28 + string "ID24" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _T W ENT Y _THREE _DE G RE ES" + +config EN_SPEECH_COMMAND_ID29 + string "ID25" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _T W ENT Y _FOUR _DE G RE ES" + +config EN_SPEECH_COMMAND_ID30 + string "ID26" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _T W ENT Y _F IVE _DE G RE ES" + +config EN_SPEECH_COMMAND_ID31 + string "ID27" + depends on SR_MN_EN_MULTINET6_QUANT + default "_SE T _THE _TE MP ER A TURE _TO _T W ENT Y _SIX _DE G RE ES" + +config EN_SPEECH_COMMAND_ID28 + string "ID28" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID29 + string "ID29" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID30 + string "ID30" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID31 + string "ID31" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID32 + string "ID32" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID33 + string "ID33" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID34 + string "ID34" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID35 + string "ID35" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID36 + string "ID36" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID37 + string "ID37" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID38 + string "ID38" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID39 + string "ID39" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID40 + string "ID40" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID41 + string "ID41" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID42 + string "ID42" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID43 + string "ID43" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID44 + string "ID44" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID45 + string "ID45" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID46 + string "ID46" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID47 + string "ID47" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID48 + string "ID48" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID49 + string "ID49" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID50 + string "ID50" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID51 + string "ID51" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID52 + string "ID52" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID53 + string "ID53" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID54 + string "ID54" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID55 + string "ID55" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID56 + string "ID56" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID57 + string "ID57" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID58 + string "ID58" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID59 + string "ID59" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID60 + string "ID60" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID61 + string "ID61" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID62 + string "ID62" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID63 + string "ID63" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID64 + string "ID64" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID65 + string "ID65" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID66 + string "ID66" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID67 + string "ID67" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID68 + string "ID68" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID69 + string "ID69" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID70 + string "ID70" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID71 + string "ID71" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID72 + string "ID72" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID73 + string "ID73" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID74 + string "ID74" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID75 + string "ID75" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID76 + string "ID76" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID77 + string "ID77" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID78 + string "ID78" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID79 + string "ID79" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID80 + string "ID80" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID81 + string "ID81" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID82 + string "ID82" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID83 + string "ID83" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID84 + string "ID84" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID85 + string "ID85" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID86 + string "ID86" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID87 + string "ID87" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID88 + string "ID88" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID89 + string "ID89" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID90 + string "ID90" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID91 + string "ID91" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID92 + string "ID92" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID93 + string "ID93" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID94 + string "ID94" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID95 + string "ID95" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID96 + string "ID96" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID97 + string "ID97" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID98 + string "ID98" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID99 + string "ID99" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID100 + string "ID100" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID101 + string "ID101" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID102 + string "ID102" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID103 + string "ID103" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID104 + string "ID104" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID105 + string "ID105" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID106 + string "ID106" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID107 + string "ID107" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID108 + string "ID108" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID109 + string "ID109" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID110 + string "ID110" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID111 + string "ID111" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID112 + string "ID112" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID113 + string "ID113" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID114 + string "ID114" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID115 + string "ID115" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID116 + string "ID116" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID117 + string "ID117" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID118 + string "ID118" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID119 + string "ID119" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID120 + string "ID120" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID121 + string "ID121" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID122 + string "ID122" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID123 + string "ID123" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID124 + string "ID124" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID125 + string "ID125" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID126 + string "ID126" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID127 + string "ID127" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID128 + string "ID128" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID129 + string "ID129" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID130 + string "ID130" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID131 + string "ID131" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID132 + string "ID132" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID133 + string "ID133" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID134 + string "ID134" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID135 + string "ID135" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID136 + string "ID136" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID137 + string "ID137" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID138 + string "ID138" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID139 + string "ID139" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID140 + string "ID140" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID141 + string "ID141" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID142 + string "ID142" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID143 + string "ID143" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID144 + string "ID144" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID145 + string "ID145" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID146 + string "ID146" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID147 + string "ID147" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID148 + string "ID148" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID149 + string "ID149" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID150 + string "ID150" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID151 + string "ID151" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID152 + string "ID152" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID153 + string "ID153" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID154 + string "ID154" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID155 + string "ID155" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID156 + string "ID156" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID157 + string "ID157" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID158 + string "ID158" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID159 + string "ID159" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID160 + string "ID160" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID161 + string "ID161" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID162 + string "ID162" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID163 + string "ID163" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID164 + string "ID164" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID165 + string "ID165" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID166 + string "ID166" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID167 + string "ID167" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID168 + string "ID168" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID169 + string "ID169" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID170 + string "ID170" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID171 + string "ID171" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID172 + string "ID172" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID173 + string "ID173" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID174 + string "ID174" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID175 + string "ID175" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID176 + string "ID176" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID177 + string "ID177" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID178 + string "ID178" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID179 + string "ID179" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID180 + string "ID180" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID181 + string "ID181" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID182 + string "ID182" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID183 + string "ID183" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID184 + string "ID184" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID185 + string "ID185" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID186 + string "ID186" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID187 + string "ID187" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID188 + string "ID188" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID189 + string "ID189" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID190 + string "ID190" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID191 + string "ID191" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID192 + string "ID192" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID193 + string "ID193" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID194 + string "ID194" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID195 + string "ID195" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID196 + string "ID196" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID197 + string "ID197" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID198 + string "ID198" + depends on SR_MN_EN_MULTINET6_QUANT + default "" + +config EN_SPEECH_COMMAND_ID199 + string "ID199" + depends on SR_MN_EN_MULTINET6_QUANT + default "" +endmenu + endmenu diff --git a/include/esp32s3/dl_lib_conv_queue.h b/include/esp32s3/dl_lib_conv_queue.h index 890689d..280d21d 100644 --- a/include/esp32s3/dl_lib_conv_queue.h +++ b/include/esp32s3/dl_lib_conv_queue.h @@ -40,6 +40,15 @@ typedef struct { */ dl_conv_queue_t *dl_conv_queue_alloc(int n, int c); +/** + * @brief Allocate a convolution queue from psram + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_conv_queue_t *dl_conv_queue_alloc_from_psram(int n, int c); + /** * @brief Free a convolution queue * diff --git a/include/esp32s3/dl_lib_convq8_queue.h b/include/esp32s3/dl_lib_convq8_queue.h index dadb5ca..c37803c 100644 --- a/include/esp32s3/dl_lib_convq8_queue.h +++ b/include/esp32s3/dl_lib_convq8_queue.h @@ -50,6 +50,16 @@ dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c); */ dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch); +/** + * @brief Allocate a bit fixed-point convolution queue from PSRAM + * + * @param n The length of queue + * @param c The number of elements in the queue + * @param nch The channel of queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq8_queue_t *dl_convq8_queue_alloc_mc_from_psram(int n, int c, int nch); + /** * @brief Free a fixed-point convolution queue * @@ -64,6 +74,16 @@ void dl_convq8_queue_free(dl_convq8_queue_t *cq); */ void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm); +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input fixed-point convolution queue + * @return Pointer of oldest element + */ +q8tp_t *dl_convq8_queue_pop(dl_convq8_queue_t *cq); +q8tp_t *dl_convq8_queue_popn(dl_convq8_queue_t *cq, int n); + /** * @brief Insert the float-point element at the end of queue. * The precision of fixed-point numbers is described by the Qm.f notation, diff --git a/include/esp32s3/dl_lib_convq_queue.h b/include/esp32s3/dl_lib_convq_queue.h index 8069371..0e8ab47 100644 --- a/include/esp32s3/dl_lib_convq_queue.h +++ b/include/esp32s3/dl_lib_convq_queue.h @@ -93,8 +93,8 @@ void dl_convq_queue_bzero(dl_convq_queue_t *cq); * @param cq Input fixed-point convolution queue * @return Pointer of oldest element */ -inline qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq); -inline qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n); +qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq); +qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n); /** * @brief Remove the oldest element, then insert the input element at the end of queue * @@ -125,7 +125,7 @@ dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1); * @param last_num Offset from the front of the queue * @return Pointer of the element */ -inline qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num); +qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num); /** * @brief Get the pointer of element in the queue by offset diff --git a/include/esp32s3/esp_mn_iface.h b/include/esp32s3/esp_mn_iface.h index f43f326..e2f697b 100644 --- a/include/esp32s3/esp_mn_iface.h +++ b/include/esp32s3/esp_mn_iface.h @@ -17,6 +17,17 @@ typedef enum { ESP_MN_STATE_TIMEOUT = 2, // time out } esp_mn_state_t; +typedef enum { + ESP_MN_GREEDY_SEARCH = 0, // greedy search + ESP_MN_BEAM_SEARCH = 1, // beam search + ESP_MN_BEAM_SEARCH_WITH_FST = 2, // beam search with trie language model +} esp_mn_search_method_t; + +typedef enum { + CHINESE_ID = 1, // Chinese language + ENGLISH_ID = 2, // English language +} language_id_t; + // Return all possible recognition results typedef struct{ esp_mn_state_t state; @@ -24,8 +35,10 @@ typedef struct{ int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id. int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id. float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability. + char string[256]; } esp_mn_results_t; + typedef struct{ int16_t num; // The number of error phrases, which can not added into model int16_t phrase_idx[ESP_MN_MAX_PHRASE_NUM]; // The error phrase index in singly linked list. diff --git a/lib/esp32s3/libc_speech_features.a b/lib/esp32s3/libc_speech_features.a index a5bffe2..a53b6e4 100644 Binary files a/lib/esp32s3/libc_speech_features.a and b/lib/esp32s3/libc_speech_features.a differ diff --git a/lib/esp32s3/libdl_lib.a b/lib/esp32s3/libdl_lib.a index 2f66130..b570195 100644 Binary files a/lib/esp32s3/libdl_lib.a and b/lib/esp32s3/libdl_lib.a differ diff --git a/lib/esp32s3/libesp_audio_front_end.a b/lib/esp32s3/libesp_audio_front_end.a index b78f84f..699b1b2 100644 Binary files a/lib/esp32s3/libesp_audio_front_end.a and b/lib/esp32s3/libesp_audio_front_end.a differ diff --git a/lib/esp32s3/libesp_audio_processor.a b/lib/esp32s3/libesp_audio_processor.a index 7f2271f..e60ccc8 100644 Binary files a/lib/esp32s3/libesp_audio_processor.a and b/lib/esp32s3/libesp_audio_processor.a differ diff --git a/lib/esp32s3/libfst.a b/lib/esp32s3/libfst.a new file mode 100644 index 0000000..215cfe2 Binary files /dev/null and b/lib/esp32s3/libfst.a differ diff --git a/lib/esp32s3/libhufzip.a b/lib/esp32s3/libhufzip.a index 38dfe12..9f6a22e 100644 Binary files a/lib/esp32s3/libhufzip.a and b/lib/esp32s3/libhufzip.a differ diff --git a/lib/esp32s3/libmultinet.a b/lib/esp32s3/libmultinet.a index 5568327..fc19ceb 100644 Binary files a/lib/esp32s3/libmultinet.a and b/lib/esp32s3/libmultinet.a differ diff --git a/lib/esp32s3/libwakenet.a b/lib/esp32s3/libwakenet.a index a0c8c54..afeb02b 100644 Binary files a/lib/esp32s3/libwakenet.a and b/lib/esp32s3/libwakenet.a differ diff --git a/model/movemodel.py b/model/movemodel.py index f57baea..78fbd2d 100644 --- a/model/movemodel.py +++ b/model/movemodel.py @@ -28,12 +28,15 @@ if __name__ == '__main__': with io.open(sdkconfig_path, "r") as f: WN_STRING = '' MN_STRING = '' + NSN_STRING = '' for label in f: label = label.strip("\n") if 'CONFIG_SR_WN' in label and label[0] != '#': WN_STRING += label if 'CONFIG_SR_MN' in label and label[0] != '#': MN_STRING += label + if 'CONFIG_SR_NSN' in label and label[0] != '#': + NSN_STRING += label wakenet_model = [] if "CONFIG_SR_WN_WN7Q8_XIAOAITONGXUE" in WN_STRING: @@ -73,9 +76,18 @@ if "CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION_QUANT8" in MN_STRING and len(mu multinet_model.append('mn5q8_en') elif "CONFIG_SR_MN_EN_MULTINET5_SINGLE_RECOGNITION" in MN_STRING and len(multinet_model) < 2: multinet_model.append('mn5_en') +elif "CONFIG_SR_MN_EN_MULTINET6_QUANT" in MN_STRING and len(multinet_model) < 2: + multinet_model.append('mn6_en') +elif "CONFIG_SR_MN_EN_MULTINET6_CTC_QUANT" in MN_STRING and len(multinet_model) < 2: + multinet_model.append('mn6_en_ctc') + +nsnet_model = '' +if "CONFIG_SR_NSN_NSNET1" in NSN_STRING: + nsnet_model = 'nsnet1' print(wakenet_model) print(multinet_model) +print(nsnet_model) target_model = args.project_path + '/target' if os.path.exists(target_model): @@ -87,8 +99,11 @@ if len(wakenet_model) != 0: if len(multinet_model) != 0: for multinet_model_item in multinet_model: shutil.copytree(model_path + '/multinet_model/' + multinet_model_item, target_model+'/'+multinet_model_item) +if nsnet_model != '': + shutil.copytree(model_path + '/nsnet_model/' + nsnet_model, target_model+'/'+nsnet_model) # os.system("cp %s %s" % (wakenet_model+'/_MODEL_INFO_', target_model)) +shutil.copytree(f'{model_path}/multinet_model/fst', target_model + '/fst') total_size = calculate_total_size(target_model) print("Recommended model partition size: ", str(int((total_size / 1024 + 900) / 4 ) * 4) + 'KB') diff --git a/model/multinet_model/fst/commands.txt b/model/multinet_model/fst/commands.txt new file mode 100644 index 0000000..ac0d192 --- /dev/null +++ b/model/multinet_model/fst/commands.txt @@ -0,0 +1,49 @@ +1 ▁TE LL ▁ME ▁A ▁JO KE +2 ▁S ING ▁A ▁SO NG +3 ▁PLAY ▁NEW S ▁CHA N N EL +4 ▁TURN ▁ON ▁MY ▁SO UND BO X +5 ▁TURN ▁OF F ▁MY ▁SO UND BO X +5 ▁TURN ▁OF ▁MY ▁SO UND BO X +6 ▁HIGH EST ▁ VO LU ME +7 ▁ LOW EST ▁ VO LU ME +8 ▁IN C RE A SE ▁THE ▁ VO LU ME +9 ▁DE C RE A SE ▁THE ▁ VO LU ME +10 ▁TURN ▁ON ▁THE ▁T V +11 ▁TURN ▁OF F ▁THE ▁T V +11 ▁TURN ▁OF ▁THE ▁T V +12 ▁MAKE ▁ME ▁A ▁TE A +13 ▁MAKE ▁ME ▁A ▁CO FF E E +14 ▁TURN ▁ON ▁THE ▁ L IGHT +15 ▁TURN ▁OF F ▁THE ▁ L IGHT +15 ▁TURN ▁OF ▁THE ▁ L IGHT +16 ▁CHA NG E ▁THE ▁COL OR ▁TO ▁RE D +17 ▁CHA NG E ▁THE ▁COL OR ▁TO ▁G RE EN +18 ▁TURN ▁ON ▁ALL ▁THE ▁ L IGHT S +19 ▁TURN ▁OF F ▁ALL ▁THE ▁ L IGHT S +19 ▁TURN ▁OF ▁ALL ▁THE ▁ L IGHT S +20 ▁TURN ▁ON ▁THE ▁A IR ▁CON D ITION ER +21 ▁TURN ▁OF F ▁THE ▁A IR ▁CON D ITION ER +21 ▁TURN ▁OF ▁THE ▁A IR ▁CON D ITION ER +22 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁SIX TE EN ▁DE G RE ES +23 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁SE VEN TE EN ▁DE G RE ES +24 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁E IGHT E EN ▁DE G RE ES +25 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁NI NE TE EN ▁DE G RE ES +26 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁DE G RE ES +27 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁ONE ▁DE G RE ES +28 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁TWO ▁DE G RE ES +29 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁THREE ▁DE G RE ES +30 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁FOUR ▁DE G RE ES +31 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁F IVE ▁DE G RE ES +32 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁SIX ▁DE G RE ES +33 ▁ LOW EST ▁FA N ▁SP E ED +34 ▁ME DI UM ▁FA N ▁SP E ED +35 ▁HIGH EST ▁FA N ▁SP E ED +36 ▁A UT O ▁A D J US T ▁THE ▁FA N ▁SP E ED +37 ▁DE C RE A SE ▁THE ▁FA N ▁SP E ED +38 ▁IN C RE A SE ▁THE ▁FA N ▁SP E ED +39 ▁IN C RE A SE ▁THE ▁TE MP ER A TURE +40 ▁DE C RE A SE ▁THE ▁TE MP ER A TURE +41 ▁CO OL ING ▁MO DE +42 ▁HE AT ING ▁MO DE +43 ▁ VEN T IL ATION ▁MO DE +44 ▁DE H UM ID I F Y ▁MO DE diff --git a/model/multinet_model/fst/fst.txt b/model/multinet_model/fst/fst.txt new file mode 100644 index 0000000..f7c3e59 --- /dev/null +++ b/model/multinet_model/fst/fst.txt @@ -0,0 +1,179 @@ +0 1 ▁TE 1 +0 2 ▁S 2 +0 3 ▁PLAY 3 +0 4 ▁TURN 0 +0 5 ▁HIGH 0 +0 6 ▁ 0 +0 7 ▁IN 0 +0 8 ▁DE 0 +0 9 ▁MAKE 0 +0 10 ▁CHA 0 +0 11 ▁SE 0 +0 12 ▁ME 34 +0 13 ▁A 36 +0 14 ▁CO 41 +0 15 ▁HE 42 +1 16 LL 0 +2 17 ING 0 +3 20 ▁NEW 0 +4 21 ▁ON 0 +4 22 ▁OF 0 +5 23 EST 0 +6 25 LOW 0 +6 26 VEN 43 +7 27 C 0 +8 28 C 0 +8 29 H 44 +9 30 ▁ME 0 +10 32 NG 0 +11 33 T 0 +12 37 DI 0 +13 38 UT 0 +14 19 OL 0 +15 19 AT 0 +16 31 ▁ME 0 +17 39 ▁A 0 +18 95 ▁MO 0 +19 18 ING 0 +20 44 S 0 +21 45 ▁MY 4 +21 46 ▁THE 0 +21 47 ▁ALL 18 +22 48 F 0 +22 45 ▁MY 5 +22 49 ▁THE 0 +22 47 ▁ALL 19 +23 50 ▁ 6 +23 51 ▁FA 35 +24 50 ▁ 7 +24 51 ▁FA 33 +25 24 EST 0 +26 36 T 0 +27 52 RE 0 +28 53 RE 0 +29 58 UM 0 +30 40 ▁A 0 +31 41 ▁A 0 +32 60 E 0 +33 67 ▁THE 0 +34 59 ▁THE 0 +35 34 T 0 +36 84 IL 0 +37 59 UM 0 +38 43 O 0 +39 74 ▁SO 0 +40 92 ▁TE 12 +40 93 ▁CO 13 +41 96 ▁JO 0 +42 109 D 0 +43 42 ▁A 0 +44 76 ▁CHA 0 +45 75 ▁SO 0 +46 77 ▁T 10 +46 78 ▁ 14 +46 79 ▁A 20 +47 68 ▁THE 0 +48 45 ▁MY 5 +48 49 ▁THE 0 +48 47 ▁ALL 19 +49 77 ▁T 11 +49 78 ▁ 15 +49 79 ▁A 21 +50 80 VO 0 +51 81 N 0 +52 85 A 0 +53 86 A 0 +54 97 EN 0 +55 54 RE 0 +56 97 ES 0 +57 56 RE 0 +58 91 ID 0 +59 51 ▁FA 0 +60 69 ▁THE 0 +61 97 ED 0 +62 61 E 0 +63 97 E 0 +64 63 E 0 +65 130 EN 0 +66 65 E 0 +67 94 ▁TE 0 +68 101 ▁ 0 +69 105 ▁COL 0 +70 50 ▁ 8 +70 51 ▁FA 38 +70 108 ▁TE 39 +71 70 ▁THE 0 +72 50 ▁ 9 +72 51 ▁FA 37 +72 108 ▁TE 40 +73 72 ▁THE 0 +74 97 NG 0 +75 98 UND 0 +76 82 N 0 +77 97 V 0 +78 99 L 0 +79 102 IR 0 +80 103 LU 0 +81 62 ▁SP 0 +82 83 N 0 +83 97 EL 0 +84 18 ATION 0 +85 71 SE 0 +86 73 SE 0 +87 122 TURE 0 +88 87 A 0 +89 97 TURE 0 +90 89 A 0 +91 104 I 0 +92 97 A 0 +93 64 FF 0 +94 106 MP 0 +95 97 DE 0 +96 97 KE 0 +97 +98 112 BO 0 +99 97 IGHT 0 +100 116 IGHT 0 +101 100 L 0 +102 111 ▁CON 0 +103 97 ME 0 +104 113 F 0 +105 114 OR 0 +106 88 ER 0 +107 90 ER 0 +108 107 MP 0 +109 115 J 0 +110 123 ITION 0 +111 110 D 0 +112 97 X 0 +113 18 Y 0 +114 120 ▁TO 0 +115 35 US 0 +116 97 S 0 +117 66 IGHT 0 +118 131 ▁DE 26 +118 130 ▁ONE 27 +118 130 ▁TWO 28 +118 130 ▁THREE 29 +118 130 ▁FOUR 30 +118 132 ▁F 31 +118 130 ▁SIX 32 +119 118 Y 0 +120 124 ▁RE 16 +120 55 ▁G 17 +121 125 ▁SIX 22 +121 126 ▁SE 23 +121 117 ▁E 24 +121 127 ▁NI 25 +121 128 ▁T 0 +122 121 ▁TO 0 +123 97 ER 0 +124 97 D 0 +125 65 TE 0 +126 125 VEN 0 +127 125 NE 0 +128 129 W 0 +129 119 ENT 0 +130 131 ▁DE 0 +131 57 G 0 +132 130 IVE 0 diff --git a/model/multinet_model/fst/fst_reversed.txt b/model/multinet_model/fst/fst_reversed.txt new file mode 100644 index 0000000..a8ca67e --- /dev/null +++ b/model/multinet_model/fst/fst_reversed.txt @@ -0,0 +1,188 @@ +0 1 KE 1 +0 2 NG 2 +0 3 EL 3 +0 4 X 0 +0 5 ME 0 +0 6 V 0 +0 7 A 12 +0 8 E 13 +0 9 IGHT 0 +0 10 D 16 +0 11 EN 17 +0 12 S 0 +0 13 ER 0 +0 14 ES 0 +0 15 ED 0 +0 16 TURE 0 +0 17 DE 0 +1 18 ▁JO 0 +2 19 ▁SO 0 +3 22 N 0 +4 26 BO 0 +5 27 LU 0 +6 28 ▁T 0 +7 31 ▁TE 0 +8 36 E 0 +9 40 L 0 +10 43 ▁RE 0 +11 44 RE 0 +12 42 IGHT 0 +13 54 ITION 0 +14 45 RE 0 +15 37 E 0 +16 55 A 0 +17 61 ▁MO 0 +18 62 ▁A 0 +19 64 ▁A 0 +20 107 ▁MY 0 +21 20 ▁SO 0 +22 23 N 0 +23 94 ▁CHA 0 +24 114 ▁FA 0 +25 24 N 0 +26 21 UND 0 +27 69 VO 0 +28 70 ▁THE 0 +29 82 ▁TO 0 +30 29 ▁T 0 +31 63 ▁A 0 +32 76 ▁THE 0 +33 32 ▁TE 0 +34 78 ▁THE 0 +35 34 ▁TE 0 +36 79 FF 0 +37 25 ▁SP 0 +38 132 NG 0 +39 38 E 0 +40 72 ▁ 0 +41 74 ▁ 0 +42 41 L 0 +43 81 ▁TO 0 +44 43 ▁G 0 +45 85 G 0 +46 135 C 0 +47 46 RE 0 +48 136 C 0 +49 48 RE 0 +50 137 C 0 +51 50 RE 0 +52 29 ▁E 0 +53 52 IGHT 0 +54 83 D 0 +55 86 ER 0 +56 87 ER 0 +57 56 A 0 +58 47 A 0 +59 49 A 0 +60 51 A 0 +61 88 ING 0 +61 89 ATION 43 +61 90 Y 44 +62 91 ▁ME 0 +63 92 ▁ME 0 +64 93 ING 0 +65 77 ▁THE 0 +66 65 ▁A 0 +67 138 O 0 +68 67 ▁A 0 +69 80 ▁ 0 +70 95 ▁ON 10 +70 96 F 11 +70 95 ▁OF 11 +71 95 ▁ON 14 +71 96 F 15 +71 95 ▁OF 15 +72 71 ▁THE 0 +73 120 ▁ALL 0 +74 73 ▁THE 0 +75 39 ▁THE 0 +76 60 SE 0 +77 95 ▁ON 20 +77 96 F 21 +77 95 ▁OF 21 +78 116 T 0 +79 31 ▁CO 0 +80 108 EST 0 +80 109 ▁THE 0 +81 97 OR 0 +82 57 TURE 0 +83 98 ▁CON 0 +84 68 D 0 +85 99 ▁DE 0 +86 33 MP 0 +87 35 MP 0 +88 100 OL 41 +88 101 AT 42 +89 102 IL 0 +90 103 F 0 +91 104 LL 0 +92 105 ▁MAKE 0 +93 105 ▁S 0 +94 106 S 0 +95 105 ▁TURN 0 +96 95 ▁OF 0 +97 75 ▁COL 0 +98 66 IR 0 +99 110 EN 0 +99 111 Y 26 +99 112 ▁ONE 27 +99 112 ▁TWO 28 +99 112 ▁THREE 29 +99 112 ▁FOUR 30 +99 113 IVE 31 +99 112 ▁SIX 32 +100 105 ▁CO 0 +101 105 ▁HE 0 +102 115 T 0 +103 117 I 0 +104 105 ▁TE 0 +105 +106 118 ▁NEW 0 +107 95 ▁ON 4 +107 96 F 5 +107 95 ▁OF 5 +108 119 LOW 7 +108 105 ▁HIGH 6 +109 58 SE 0 +110 121 TE 0 +110 53 E 24 +111 122 ENT 0 +112 111 Y 0 +113 112 ▁F 0 +114 123 EST 0 +114 124 UM 34 +114 125 ▁THE 0 +115 119 VEN 0 +116 105 ▁SE 0 +117 126 ID 0 +118 105 ▁PLAY 0 +119 105 ▁ 0 +120 95 ▁ON 18 +120 96 F 19 +120 95 ▁OF 19 +121 29 ▁SIX 22 +121 127 VEN 23 +121 128 NE 25 +122 30 W 0 +123 119 LOW 33 +123 105 ▁HIGH 35 +124 129 DI 0 +125 130 T 36 +125 59 SE 0 +126 131 UM 0 +127 29 ▁SE 0 +128 29 ▁NI 0 +129 105 ▁ME 0 +130 133 US 0 +131 134 H 0 +132 105 ▁CHA 0 +133 84 J 0 +134 105 ▁DE 0 +135 105 ▁IN 8 +135 105 ▁DE 9 +136 105 ▁DE 37 +136 105 ▁IN 38 +137 105 ▁IN 39 +137 105 ▁DE 40 +138 139 UT 0 +139 105 ▁A 0 diff --git a/model/multinet_model/fst/tokens.txt b/model/multinet_model/fst/tokens.txt new file mode 100644 index 0000000..198542c --- /dev/null +++ b/model/multinet_model/fst/tokens.txt @@ -0,0 +1,99 @@ + 0 +S 3 +▁THE 4 +T 5 +▁A 6 +N 7 +D 8 +ED 9 +E 10 +▁OF 11 +Y 12 +▁S 14 +▁IN 15 +▁TO 17 +▁ 18 +A 19 +ING 20 +O 22 +▁HE 24 +ER 25 +C 26 +G 27 +I 28 +L 29 +RE 31 +F 37 +▁RE 38 +W 40 +▁E 45 +OR 50 +▁F 51 +ES 55 +LL 56 +ENT 65 +H 66 +▁DE 68 +▁G 71 +EN 72 +▁ON 73 +SE 74 +▁T 75 +▁ME 78 +IL 81 +NE 86 +TE 87 +▁SO 89 +ATION 90 +NG 92 +ME 93 +▁CON 95 +EL 103 +IR 115 +▁MO 117 +▁CO 119 +▁SE 122 +▁FA 136 +V 138 +US 146 +▁ALL 151 +X 152 +IVE 156 +▁ONE 157 +KE 159 +▁TE 175 +AT 178 +LU 180 +MP 182 +▁SP 186 +▁MY 188 +DE 193 +IGHT 196 +UT 198 +EST 204 +UND 209 +FF 216 +J 220 +▁CHA 226 +OL 227 +▁TWO 237 +ID 251 +UM 256 +VO 259 +DI 266 +LOW 281 +TURE 286 +▁NEW 304 +ITION 310 +BO 312 +VEN 326 +▁PLAY 338 +▁JO 356 +▁THREE 367 +▁COL 375 +▁HIGH 381 +▁FOUR 424 +▁MAKE 430 +▁NI 446 +▁TURN 457 +▁SIX 483 diff --git a/model/multinet_model/mn6_en/_MODEL_INFO_ b/model/multinet_model/mn6_en/_MODEL_INFO_ new file mode 100644 index 0000000..4e2769f --- /dev/null +++ b/model/multinet_model/mn6_en/_MODEL_INFO_ @@ -0,0 +1,2 @@ +# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_... +MN6_v1_english_8_0.9_0.90 diff --git a/model/multinet_model/mn6_en/mn6_data b/model/multinet_model/mn6_en/mn6_data new file mode 100644 index 0000000..4850190 Binary files /dev/null and b/model/multinet_model/mn6_en/mn6_data differ diff --git a/model/multinet_model/mn6_en/mn6_index b/model/multinet_model/mn6_en/mn6_index new file mode 100644 index 0000000..43a3e31 Binary files /dev/null and b/model/multinet_model/mn6_en/mn6_index differ diff --git a/model/multinet_model/mn6_en_ctc/_MODEL_INFO_ b/model/multinet_model/mn6_en_ctc/_MODEL_INFO_ new file mode 100644 index 0000000..dc2c373 --- /dev/null +++ b/model/multinet_model/mn6_en_ctc/_MODEL_INFO_ @@ -0,0 +1,2 @@ +# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_... +MN6_v11_english_8_0.9_0.90 diff --git a/model/multinet_model/mn6_en_ctc/mn6_data b/model/multinet_model/mn6_en_ctc/mn6_data new file mode 100644 index 0000000..313d20b Binary files /dev/null and b/model/multinet_model/mn6_en_ctc/mn6_data differ diff --git a/model/multinet_model/mn6_en_ctc/mn6_index b/model/multinet_model/mn6_en_ctc/mn6_index new file mode 100644 index 0000000..5db24af Binary files /dev/null and b/model/multinet_model/mn6_en_ctc/mn6_index differ diff --git a/model/target/fst/commands.txt b/model/target/fst/commands.txt new file mode 100644 index 0000000..ac0d192 --- /dev/null +++ b/model/target/fst/commands.txt @@ -0,0 +1,49 @@ +1 ▁TE LL ▁ME ▁A ▁JO KE +2 ▁S ING ▁A ▁SO NG +3 ▁PLAY ▁NEW S ▁CHA N N EL +4 ▁TURN ▁ON ▁MY ▁SO UND BO X +5 ▁TURN ▁OF F ▁MY ▁SO UND BO X +5 ▁TURN ▁OF ▁MY ▁SO UND BO X +6 ▁HIGH EST ▁ VO LU ME +7 ▁ LOW EST ▁ VO LU ME +8 ▁IN C RE A SE ▁THE ▁ VO LU ME +9 ▁DE C RE A SE ▁THE ▁ VO LU ME +10 ▁TURN ▁ON ▁THE ▁T V +11 ▁TURN ▁OF F ▁THE ▁T V +11 ▁TURN ▁OF ▁THE ▁T V +12 ▁MAKE ▁ME ▁A ▁TE A +13 ▁MAKE ▁ME ▁A ▁CO FF E E +14 ▁TURN ▁ON ▁THE ▁ L IGHT +15 ▁TURN ▁OF F ▁THE ▁ L IGHT +15 ▁TURN ▁OF ▁THE ▁ L IGHT +16 ▁CHA NG E ▁THE ▁COL OR ▁TO ▁RE D +17 ▁CHA NG E ▁THE ▁COL OR ▁TO ▁G RE EN +18 ▁TURN ▁ON ▁ALL ▁THE ▁ L IGHT S +19 ▁TURN ▁OF F ▁ALL ▁THE ▁ L IGHT S +19 ▁TURN ▁OF ▁ALL ▁THE ▁ L IGHT S +20 ▁TURN ▁ON ▁THE ▁A IR ▁CON D ITION ER +21 ▁TURN ▁OF F ▁THE ▁A IR ▁CON D ITION ER +21 ▁TURN ▁OF ▁THE ▁A IR ▁CON D ITION ER +22 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁SIX TE EN ▁DE G RE ES +23 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁SE VEN TE EN ▁DE G RE ES +24 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁E IGHT E EN ▁DE G RE ES +25 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁NI NE TE EN ▁DE G RE ES +26 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁DE G RE ES +27 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁ONE ▁DE G RE ES +28 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁TWO ▁DE G RE ES +29 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁THREE ▁DE G RE ES +30 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁FOUR ▁DE G RE ES +31 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁F IVE ▁DE G RE ES +32 ▁SE T ▁THE ▁TE MP ER A TURE ▁TO ▁T W ENT Y ▁SIX ▁DE G RE ES +33 ▁ LOW EST ▁FA N ▁SP E ED +34 ▁ME DI UM ▁FA N ▁SP E ED +35 ▁HIGH EST ▁FA N ▁SP E ED +36 ▁A UT O ▁A D J US T ▁THE ▁FA N ▁SP E ED +37 ▁DE C RE A SE ▁THE ▁FA N ▁SP E ED +38 ▁IN C RE A SE ▁THE ▁FA N ▁SP E ED +39 ▁IN C RE A SE ▁THE ▁TE MP ER A TURE +40 ▁DE C RE A SE ▁THE ▁TE MP ER A TURE +41 ▁CO OL ING ▁MO DE +42 ▁HE AT ING ▁MO DE +43 ▁ VEN T IL ATION ▁MO DE +44 ▁DE H UM ID I F Y ▁MO DE diff --git a/model/target/fst/fst.txt b/model/target/fst/fst.txt new file mode 100644 index 0000000..f7c3e59 --- /dev/null +++ b/model/target/fst/fst.txt @@ -0,0 +1,179 @@ +0 1 ▁TE 1 +0 2 ▁S 2 +0 3 ▁PLAY 3 +0 4 ▁TURN 0 +0 5 ▁HIGH 0 +0 6 ▁ 0 +0 7 ▁IN 0 +0 8 ▁DE 0 +0 9 ▁MAKE 0 +0 10 ▁CHA 0 +0 11 ▁SE 0 +0 12 ▁ME 34 +0 13 ▁A 36 +0 14 ▁CO 41 +0 15 ▁HE 42 +1 16 LL 0 +2 17 ING 0 +3 20 ▁NEW 0 +4 21 ▁ON 0 +4 22 ▁OF 0 +5 23 EST 0 +6 25 LOW 0 +6 26 VEN 43 +7 27 C 0 +8 28 C 0 +8 29 H 44 +9 30 ▁ME 0 +10 32 NG 0 +11 33 T 0 +12 37 DI 0 +13 38 UT 0 +14 19 OL 0 +15 19 AT 0 +16 31 ▁ME 0 +17 39 ▁A 0 +18 95 ▁MO 0 +19 18 ING 0 +20 44 S 0 +21 45 ▁MY 4 +21 46 ▁THE 0 +21 47 ▁ALL 18 +22 48 F 0 +22 45 ▁MY 5 +22 49 ▁THE 0 +22 47 ▁ALL 19 +23 50 ▁ 6 +23 51 ▁FA 35 +24 50 ▁ 7 +24 51 ▁FA 33 +25 24 EST 0 +26 36 T 0 +27 52 RE 0 +28 53 RE 0 +29 58 UM 0 +30 40 ▁A 0 +31 41 ▁A 0 +32 60 E 0 +33 67 ▁THE 0 +34 59 ▁THE 0 +35 34 T 0 +36 84 IL 0 +37 59 UM 0 +38 43 O 0 +39 74 ▁SO 0 +40 92 ▁TE 12 +40 93 ▁CO 13 +41 96 ▁JO 0 +42 109 D 0 +43 42 ▁A 0 +44 76 ▁CHA 0 +45 75 ▁SO 0 +46 77 ▁T 10 +46 78 ▁ 14 +46 79 ▁A 20 +47 68 ▁THE 0 +48 45 ▁MY 5 +48 49 ▁THE 0 +48 47 ▁ALL 19 +49 77 ▁T 11 +49 78 ▁ 15 +49 79 ▁A 21 +50 80 VO 0 +51 81 N 0 +52 85 A 0 +53 86 A 0 +54 97 EN 0 +55 54 RE 0 +56 97 ES 0 +57 56 RE 0 +58 91 ID 0 +59 51 ▁FA 0 +60 69 ▁THE 0 +61 97 ED 0 +62 61 E 0 +63 97 E 0 +64 63 E 0 +65 130 EN 0 +66 65 E 0 +67 94 ▁TE 0 +68 101 ▁ 0 +69 105 ▁COL 0 +70 50 ▁ 8 +70 51 ▁FA 38 +70 108 ▁TE 39 +71 70 ▁THE 0 +72 50 ▁ 9 +72 51 ▁FA 37 +72 108 ▁TE 40 +73 72 ▁THE 0 +74 97 NG 0 +75 98 UND 0 +76 82 N 0 +77 97 V 0 +78 99 L 0 +79 102 IR 0 +80 103 LU 0 +81 62 ▁SP 0 +82 83 N 0 +83 97 EL 0 +84 18 ATION 0 +85 71 SE 0 +86 73 SE 0 +87 122 TURE 0 +88 87 A 0 +89 97 TURE 0 +90 89 A 0 +91 104 I 0 +92 97 A 0 +93 64 FF 0 +94 106 MP 0 +95 97 DE 0 +96 97 KE 0 +97 +98 112 BO 0 +99 97 IGHT 0 +100 116 IGHT 0 +101 100 L 0 +102 111 ▁CON 0 +103 97 ME 0 +104 113 F 0 +105 114 OR 0 +106 88 ER 0 +107 90 ER 0 +108 107 MP 0 +109 115 J 0 +110 123 ITION 0 +111 110 D 0 +112 97 X 0 +113 18 Y 0 +114 120 ▁TO 0 +115 35 US 0 +116 97 S 0 +117 66 IGHT 0 +118 131 ▁DE 26 +118 130 ▁ONE 27 +118 130 ▁TWO 28 +118 130 ▁THREE 29 +118 130 ▁FOUR 30 +118 132 ▁F 31 +118 130 ▁SIX 32 +119 118 Y 0 +120 124 ▁RE 16 +120 55 ▁G 17 +121 125 ▁SIX 22 +121 126 ▁SE 23 +121 117 ▁E 24 +121 127 ▁NI 25 +121 128 ▁T 0 +122 121 ▁TO 0 +123 97 ER 0 +124 97 D 0 +125 65 TE 0 +126 125 VEN 0 +127 125 NE 0 +128 129 W 0 +129 119 ENT 0 +130 131 ▁DE 0 +131 57 G 0 +132 130 IVE 0 diff --git a/model/target/fst/fst_reversed.txt b/model/target/fst/fst_reversed.txt new file mode 100644 index 0000000..a8ca67e --- /dev/null +++ b/model/target/fst/fst_reversed.txt @@ -0,0 +1,188 @@ +0 1 KE 1 +0 2 NG 2 +0 3 EL 3 +0 4 X 0 +0 5 ME 0 +0 6 V 0 +0 7 A 12 +0 8 E 13 +0 9 IGHT 0 +0 10 D 16 +0 11 EN 17 +0 12 S 0 +0 13 ER 0 +0 14 ES 0 +0 15 ED 0 +0 16 TURE 0 +0 17 DE 0 +1 18 ▁JO 0 +2 19 ▁SO 0 +3 22 N 0 +4 26 BO 0 +5 27 LU 0 +6 28 ▁T 0 +7 31 ▁TE 0 +8 36 E 0 +9 40 L 0 +10 43 ▁RE 0 +11 44 RE 0 +12 42 IGHT 0 +13 54 ITION 0 +14 45 RE 0 +15 37 E 0 +16 55 A 0 +17 61 ▁MO 0 +18 62 ▁A 0 +19 64 ▁A 0 +20 107 ▁MY 0 +21 20 ▁SO 0 +22 23 N 0 +23 94 ▁CHA 0 +24 114 ▁FA 0 +25 24 N 0 +26 21 UND 0 +27 69 VO 0 +28 70 ▁THE 0 +29 82 ▁TO 0 +30 29 ▁T 0 +31 63 ▁A 0 +32 76 ▁THE 0 +33 32 ▁TE 0 +34 78 ▁THE 0 +35 34 ▁TE 0 +36 79 FF 0 +37 25 ▁SP 0 +38 132 NG 0 +39 38 E 0 +40 72 ▁ 0 +41 74 ▁ 0 +42 41 L 0 +43 81 ▁TO 0 +44 43 ▁G 0 +45 85 G 0 +46 135 C 0 +47 46 RE 0 +48 136 C 0 +49 48 RE 0 +50 137 C 0 +51 50 RE 0 +52 29 ▁E 0 +53 52 IGHT 0 +54 83 D 0 +55 86 ER 0 +56 87 ER 0 +57 56 A 0 +58 47 A 0 +59 49 A 0 +60 51 A 0 +61 88 ING 0 +61 89 ATION 43 +61 90 Y 44 +62 91 ▁ME 0 +63 92 ▁ME 0 +64 93 ING 0 +65 77 ▁THE 0 +66 65 ▁A 0 +67 138 O 0 +68 67 ▁A 0 +69 80 ▁ 0 +70 95 ▁ON 10 +70 96 F 11 +70 95 ▁OF 11 +71 95 ▁ON 14 +71 96 F 15 +71 95 ▁OF 15 +72 71 ▁THE 0 +73 120 ▁ALL 0 +74 73 ▁THE 0 +75 39 ▁THE 0 +76 60 SE 0 +77 95 ▁ON 20 +77 96 F 21 +77 95 ▁OF 21 +78 116 T 0 +79 31 ▁CO 0 +80 108 EST 0 +80 109 ▁THE 0 +81 97 OR 0 +82 57 TURE 0 +83 98 ▁CON 0 +84 68 D 0 +85 99 ▁DE 0 +86 33 MP 0 +87 35 MP 0 +88 100 OL 41 +88 101 AT 42 +89 102 IL 0 +90 103 F 0 +91 104 LL 0 +92 105 ▁MAKE 0 +93 105 ▁S 0 +94 106 S 0 +95 105 ▁TURN 0 +96 95 ▁OF 0 +97 75 ▁COL 0 +98 66 IR 0 +99 110 EN 0 +99 111 Y 26 +99 112 ▁ONE 27 +99 112 ▁TWO 28 +99 112 ▁THREE 29 +99 112 ▁FOUR 30 +99 113 IVE 31 +99 112 ▁SIX 32 +100 105 ▁CO 0 +101 105 ▁HE 0 +102 115 T 0 +103 117 I 0 +104 105 ▁TE 0 +105 +106 118 ▁NEW 0 +107 95 ▁ON 4 +107 96 F 5 +107 95 ▁OF 5 +108 119 LOW 7 +108 105 ▁HIGH 6 +109 58 SE 0 +110 121 TE 0 +110 53 E 24 +111 122 ENT 0 +112 111 Y 0 +113 112 ▁F 0 +114 123 EST 0 +114 124 UM 34 +114 125 ▁THE 0 +115 119 VEN 0 +116 105 ▁SE 0 +117 126 ID 0 +118 105 ▁PLAY 0 +119 105 ▁ 0 +120 95 ▁ON 18 +120 96 F 19 +120 95 ▁OF 19 +121 29 ▁SIX 22 +121 127 VEN 23 +121 128 NE 25 +122 30 W 0 +123 119 LOW 33 +123 105 ▁HIGH 35 +124 129 DI 0 +125 130 T 36 +125 59 SE 0 +126 131 UM 0 +127 29 ▁SE 0 +128 29 ▁NI 0 +129 105 ▁ME 0 +130 133 US 0 +131 134 H 0 +132 105 ▁CHA 0 +133 84 J 0 +134 105 ▁DE 0 +135 105 ▁IN 8 +135 105 ▁DE 9 +136 105 ▁DE 37 +136 105 ▁IN 38 +137 105 ▁IN 39 +137 105 ▁DE 40 +138 139 UT 0 +139 105 ▁A 0 diff --git a/model/target/fst/tokens.txt b/model/target/fst/tokens.txt new file mode 100644 index 0000000..198542c --- /dev/null +++ b/model/target/fst/tokens.txt @@ -0,0 +1,99 @@ + 0 +S 3 +▁THE 4 +T 5 +▁A 6 +N 7 +D 8 +ED 9 +E 10 +▁OF 11 +Y 12 +▁S 14 +▁IN 15 +▁TO 17 +▁ 18 +A 19 +ING 20 +O 22 +▁HE 24 +ER 25 +C 26 +G 27 +I 28 +L 29 +RE 31 +F 37 +▁RE 38 +W 40 +▁E 45 +OR 50 +▁F 51 +ES 55 +LL 56 +ENT 65 +H 66 +▁DE 68 +▁G 71 +EN 72 +▁ON 73 +SE 74 +▁T 75 +▁ME 78 +IL 81 +NE 86 +TE 87 +▁SO 89 +ATION 90 +NG 92 +ME 93 +▁CON 95 +EL 103 +IR 115 +▁MO 117 +▁CO 119 +▁SE 122 +▁FA 136 +V 138 +US 146 +▁ALL 151 +X 152 +IVE 156 +▁ONE 157 +KE 159 +▁TE 175 +AT 178 +LU 180 +MP 182 +▁SP 186 +▁MY 188 +DE 193 +IGHT 196 +UT 198 +EST 204 +UND 209 +FF 216 +J 220 +▁CHA 226 +OL 227 +▁TWO 237 +ID 251 +UM 256 +VO 259 +DI 266 +LOW 281 +TURE 286 +▁NEW 304 +ITION 310 +BO 312 +VEN 326 +▁PLAY 338 +▁JO 356 +▁THREE 367 +▁COL 375 +▁HIGH 381 +▁FOUR 424 +▁MAKE 430 +▁NI 446 +▁TURN 457 +▁SIX 483 diff --git a/model/target/mn6_en_ctc/_MODEL_INFO_ b/model/target/mn6_en_ctc/_MODEL_INFO_ new file mode 100644 index 0000000..dc2c373 --- /dev/null +++ b/model/target/mn6_en_ctc/_MODEL_INFO_ @@ -0,0 +1,2 @@ +# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_... +MN6_v11_english_8_0.9_0.90 diff --git a/model/target/mn6_en_ctc/mn6_data b/model/target/mn6_en_ctc/mn6_data new file mode 100644 index 0000000..313d20b Binary files /dev/null and b/model/target/mn6_en_ctc/mn6_data differ diff --git a/model/target/mn6_en_ctc/mn6_index b/model/target/mn6_en_ctc/mn6_index new file mode 100644 index 0000000..5db24af Binary files /dev/null and b/model/target/mn6_en_ctc/mn6_index differ diff --git a/model/target/wn9_hiesp/_MODEL_INFO_ b/model/target/wn9_hiesp/_MODEL_INFO_ new file mode 100644 index 0000000..b80513b --- /dev/null +++ b/model/target/wn9_hiesp/_MODEL_INFO_ @@ -0,0 +1 @@ +wakeNet9_v1h24_hiesp_3_0.63_0.635 \ No newline at end of file diff --git a/model/target/wn9_hiesp/wn9_data b/model/target/wn9_hiesp/wn9_data new file mode 100644 index 0000000..7d99255 Binary files /dev/null and b/model/target/wn9_hiesp/wn9_data differ diff --git a/model/target/wn9_hiesp/wn9_index b/model/target/wn9_hiesp/wn9_index new file mode 100644 index 0000000..3845b37 Binary files /dev/null and b/model/target/wn9_hiesp/wn9_index differ diff --git a/src/esp_process_sdkconfig.c b/src/esp_process_sdkconfig.c index 9711add..06ae9f5 100644 --- a/src/esp_process_sdkconfig.c +++ b/src/esp_process_sdkconfig.c @@ -12,42 +12,42 @@ void check_chip_config(void) { #ifdef CONFIG_IDF_TARGET_ESP32S3 #ifndef CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240 - ESP_LOGE(TAG, "CPU freq should be 240MHz"); + ESP_LOGW(TAG, "CPU freq should be 240MHz"); #endif -#ifndef CONFIG_ESPTOOLPY_FLASHFREQ_80M - ESP_LOGE(TAG, "Flash freq should be 80MHz"); +#if (! defined CONFIG_ESPTOOLPY_FLASHFREQ_80M) && (! defined CONFIG_ESPTOOLPY_FLASHFREQ_120M) + ESP_LOGW(TAG, "Flash freq should be not less than 80MHz"); #endif -#ifndef CONFIG_SPIRAM_SPEED_80M - ESP_LOGE(TAG, "PSRAM freq should be 80MHz"); +#if (! defined CONFIG_SPIRAM_SPEED_80M) && (! defined CONFIG_SPIRAM_SPEED_120M) + ESP_LOGW(TAG, "PSRAM freq should be not less than 80MHz"); #endif #ifndef CONFIG_ESP32S3_DATA_CACHE_64KB - ESP_LOGE(TAG, "Data cache should be 64KB"); + ESP_LOGW(TAG, "Data cache recommends 64KB"); #endif #ifndef CONFIG_ESP32S3_DATA_CACHE_LINE_64B - ESP_LOGE(TAG, "Data cache line should be 64B"); + ESP_LOGW(TAG, "Data cache line recommends 64B"); #endif #elif CONFIG_IDF_TARGET_ESP32 #ifndef CONFIG_ESP32_DEFAULT_CPU_FREQ_240 - ESP_LOGE(TAG, "CPU freq should be 240MHz"); + ESP_LOGW(TAG, "CPU freq should be 240MHz"); #endif #ifndef CONFIG_SPIRAM_SPEED_80M - ESP_LOGE(TAG, "PSRAM freq should be 80MHz"); + ESP_LOGW(TAG, "PSRAM freq should be 80MHz"); #endif #ifndef CONFIG_ESPTOOLPY_FLASHFREQ_80M - ESP_LOGE(TAG, "Flash freq should be 80MHz"); + ESP_LOGW(TAG, "Flash freq should be 80MHz"); #endif #ifndef CONFIG_ESPTOOLPY_FLASHMODE_QIO - ESP_LOGE(TAG, "Flash mode should be QIO"); + ESP_LOGW(TAG, "Flash mode should be QIO"); #endif #else - ESP_LOGE(TAG, "ESP-SR-AFE only support ESP32/ESP32S3"); + ESP_LOGW(TAG, "ESP-SR-AFE only support ESP32/ESP32S3"); #endif } diff --git a/tool/README.md b/tool/README.md index 63af62a..375f2fa 100644 --- a/tool/README.md +++ b/tool/README.md @@ -1,7 +1,4 @@ -**multinet_g2p.py** is used to convert English phrase into phonemes which can be recognized by multinet. - - - +## MultiNet5 #### 1. Install g2p_en, please refer to https://pypi.org/project/g2p-en/ ``` @@ -45,4 +42,45 @@ multinet->reset(model_data, new_commands_str, err_id); // turn off the light -> commond id=2 ``` +## MultiNet6 + +The FST (Finite State Transducer) is used to save a list of commands. + +#### Step 1. Data preparation + +Requirements: +- python>3.8 +- sentencepiece + +To create a FST from a list of commands, two files are needed: +- commands.txt: maps a command id to subwords +- tokens.txt: maps subword tokens to it's indices in the bpe model + +Assume you have a list of commands written in a text file `commands_list.txt` of the following format: + +``` +# command_id command_sentence +1 TELL ME A JOKE +2 MAKE A COFFEE +``` +**Note**: command ids starts from 1, 0 is reserved in FST. + +Run the following command to create the required files, do not change the filenames `commands.txt` and `tokens.txt`. + +```sh +pip install -r requirements.txt + +python fst/prepare_for_fst.py \ + --infile commands_list.txt \ + --bpe-model fst/bpe.model \ + --out-command-list commands.txt \ + --out-token-symbols tokens.txt +``` + +#### Step 2. Move created files + +1. Remove `/model/multinet_model/fst/fst.txt` and `/model/multinet_model/fst/fst_reversed.txt` if those files exist. +2. Move the following files to `/model/multinet_model/fst/` +- commands.txt +- tokens.txt \ No newline at end of file diff --git a/tool/fst/bpe.model b/tool/fst/bpe.model new file mode 100644 index 0000000..3e33be8 Binary files /dev/null and b/tool/fst/bpe.model differ diff --git a/tool/fst/commands_list.txt b/tool/fst/commands_list.txt new file mode 100644 index 0000000..431815f --- /dev/null +++ b/tool/fst/commands_list.txt @@ -0,0 +1,49 @@ +1 TELL ME A JOKE +2 SING A SONG +3 PLAY NEWS CHANNEL +4 TURN ON MY SOUNDBOX +5 TURN OFF MY SOUNDBOX +5 TURN OF MY SOUNDBOX +6 HIGHEST VOLUME +7 LOWEST VOLUME +8 INCREASE THE VOLUME +9 DECREASE THE VOLUME +10 TURN ON THE TV +11 TURN OFF THE TV +11 TURN OF THE TV +12 MAKE ME A TEA +13 MAKE ME A COFFEE +14 TURN ON THE LIGHT +15 TURN OFF THE LIGHT +15 TURN OF THE LIGHT +16 CHANGE THE COLOR TO RED +17 CHANGE THE COLOR TO GREEN +18 TURN ON ALL THE LIGHTS +19 TURN OFF ALL THE LIGHTS +19 TURN OF ALL THE LIGHTS +20 TURN ON THE AIR CONDITIONER +21 TURN OFF THE AIR CONDITIONER +21 TURN OF THE AIR CONDITIONER +22 SET THE TEMPERATURE TO SIXTEEN DEGREES +23 SET THE TEMPERATURE TO SEVENTEEN DEGREES +24 SET THE TEMPERATURE TO EIGHTEEN DEGREES +25 SET THE TEMPERATURE TO NINETEEN DEGREES +26 SET THE TEMPERATURE TO TWENTY DEGREES +27 SET THE TEMPERATURE TO TWENTY ONE DEGREES +28 SET THE TEMPERATURE TO TWENTY TWO DEGREES +29 SET THE TEMPERATURE TO TWENTY THREE DEGREES +30 SET THE TEMPERATURE TO TWENTY FOUR DEGREES +31 SET THE TEMPERATURE TO TWENTY FIVE DEGREES +32 SET THE TEMPERATURE TO TWENTY SIX DEGREES +33 LOWEST FAN SPEED +34 MEDIUM FAN SPEED +35 HIGHEST FAN SPEED +36 AUTO ADJUST THE FAN SPEED +37 DECREASE THE FAN SPEED +38 INCREASE THE FAN SPEED +39 INCREASE THE TEMPERATURE +40 DECREASE THE TEMPERATURE +41 COOLING MODE +42 HEATING MODE +43 VENTILATION MODE +44 DEHUMIDIFY MODE \ No newline at end of file diff --git a/tool/fst/prepare_for_fst.py b/tool/fst/prepare_for_fst.py new file mode 100644 index 0000000..ce0009f --- /dev/null +++ b/tool/fst/prepare_for_fst.py @@ -0,0 +1,64 @@ +import argparse +from pathlib import Path +from typing import List, Set, Tuple + +import sentencepiece as spm + + +def process_commands(infile: List[str], sp: spm.SentencePieceProcessor + ) -> Tuple[List[str], Set[int]]: + out_commands = [] + tokens = set() + + for line in infile: + command_id = line.split()[0] + command = ' '.join(line.split()[1:]) + command_tokens = sp.encode(command, out_type=str) + for token in command_tokens: + tokens.add(token) + command_tokens = [command_id] + command_tokens + out_commands.append('\t'.join(command_tokens)) + return out_commands, tokens + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--infile', type=str, required=True, + help='the text file of commands id and commands.') + parser.add_argument('--bpe-model', type=str, default='bpe.model', + help='subword bpe model file.') + parser.add_argument('--out-command-list', type=str, + default='commands_tokens.txt', + help='the output subword commands text filename.') + parser.add_argument('--out-token-symbols', type=str, + default='tokens.txt', + help='the output token to subword id mapping.') + args = parser.parse_args() + + if not Path(args.infile).is_file(): + raise FileNotFoundError(args.infile) + + if not Path(args.bpe_model).is_file(): + raise FileNotFoundError(args.bpe_model) + + with open(args.infile) as f: + infile = f.readlines() + infile = [x.strip() for x in infile] + + sp = spm.SentencePieceProcessor() + sp.load(args.bpe_model) + + out_commands, tokens = process_commands(infile, sp) + + token_symbols = [] + for i in range(sp.vocab_size()): + if sp.id_to_piece(i) in tokens or i == 0: + token_symbols.append(f'{sp.id_to_piece(i)}\t{i}') + + with open(args.out_command_list, 'wt') as f: + f.write('\n'.join(out_commands)) + f.write('\n') + + with open(args.out_token_symbols, 'wt') as f: + f.write('\n'.join(token_symbols)) + f.write('\n') diff --git a/tool/fst/requirements.txt b/tool/fst/requirements.txt new file mode 100644 index 0000000..800b1c9 --- /dev/null +++ b/tool/fst/requirements.txt @@ -0,0 +1 @@ +sentencepiece==0.1.97 \ No newline at end of file diff --git a/tool/requirements b/tool/requirements index a6ae40a..310c9c8 100644 --- a/tool/requirements +++ b/tool/requirements @@ -1 +1,2 @@ g2p-en +sentencepiece==0.1.97 \ No newline at end of file