mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
Merge branch 'feat/add_himfive' into 'master'
Feat/add himfive See merge request speech-recognition-framework/esp-sr!57
This commit is contained in:
commit
f6e7ffcf9b
@ -4,6 +4,13 @@
|
||||
- Available storage is less than the remaining flash space on IDF v5.0.
|
||||
If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later.
|
||||
|
||||
## unrelease
|
||||
- Add esp32c6 tts lib
|
||||
- Return the volume of wake word audio when one wake word is detected
|
||||
- Reduce MultiNet6 SRAM size from 48KB to 32 KB
|
||||
- Add "Hi M Five" wake word model from M5Stack
|
||||
- Remove all MultiNet4 models
|
||||
|
||||
## 1.4.2
|
||||
- Reset timeout trigger of multinet6 when a new speech command is detected
|
||||
- Allocate all beams from PSRAM
|
||||
|
||||
@ -80,6 +80,10 @@ choice SR_WN_MODEL_LOAD
|
||||
config SR_WN_WN9_HIESP
|
||||
bool "Hi,ESP (wn9_hiesp)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
config SR_WN_WN9_HIMFIVE
|
||||
bool "Hi,M Five (wn9_himfive)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
config SR_WN_WN9_NIHAOXIAOZHI
|
||||
bool "nihaoxiaozhi (wn9_nihaoxiaozhi)"
|
||||
@ -122,7 +126,7 @@ config USE_MULTINET
|
||||
|
||||
choice CHINESE_SR_MN_MODEL_SEL
|
||||
prompt "Chinese Speech Commands Model"
|
||||
default SR_MN_CN_MULTINET4_5_SINGLE_RECOGNITION
|
||||
default SR_MN_CN_MULTINET6_QUANT
|
||||
depends on USE_MULTINET
|
||||
help
|
||||
Select the Wake Word Engine to be used.
|
||||
@ -134,14 +138,6 @@ choice CHINESE_SR_MN_MODEL_SEL
|
||||
bool "chinese single recognition (mn2_cn)"
|
||||
depends on IDF_TARGET_ESP32
|
||||
|
||||
config SR_MN_CN_MULTINET4_5_SINGLE_RECOGNITION
|
||||
bool "chinese recognition (mn4_cn)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
config SR_MN_CN_MULTINET4_5_SINGLE_RECOGNITION_QUANT8
|
||||
bool "chinese recognition (mn4q8_cn)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
config SR_MN_CN_MULTINET5_RECOGNITION_QUANT8
|
||||
bool "chinese recognition (mn5q8_cn)"
|
||||
depends on IDF_TARGET_ESP32S3
|
||||
|
||||
10
README.md
10
README.md
@ -13,7 +13,7 @@ ESP-SR framework includes the following modules:
|
||||
* [Audio Front-end AFE](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/audio_front_end/README.html)
|
||||
* [Wake Word Engine WakeNet](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/wake_word_engine/README.html)
|
||||
* [Speech Command Word Recognition MultiNet](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/speech_command_recognition/README.html)
|
||||
* Speech Synthesis (only supports Chinese language)
|
||||
* [Speech Synthesis](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/speech_synthesis/readme.html)
|
||||
|
||||
These algorithms are provided in the form of a component, so they can be integrated into your projects with minimum effort.
|
||||
|
||||
@ -23,15 +23,15 @@ The new algorithms will no longer support ESP32 chips.
|
||||
|
||||
## Wake Word Engine
|
||||
|
||||
Espressif wake word engine **WakeNet** is specially designed to provide a high performance and low memory footprint wake word detection algorithm for users, which enables devices always listen to wake words, such as “Alexa”, “Hi,lexin” and “Hi,ESP”. You can refer to **Model loading method** to build your project.
|
||||
Espressif wake word engine **WakeNet** is specially designed to provide a high performance and low memory footprint wake word detection algorithm for users, which enables devices always listen to wake words, such as “Alexa”, “Hi,lexin” and “Hi,ESP”.
|
||||
|
||||
Currently, Espressif has not only provided an official wake word "Hi,Lexin","Hi,ESP" to the public for free, but also allows customized wake words. For details on how to customize your own wake words, please see **Espressif Speech Wake Words Customization Process**.
|
||||
Currently, Espressif has not only provided an official wake word "Hi,Lexin","Hi,ESP" to the public for free, but also allows customized wake words. For details on how to customize your own wake words, please see [Espressif Speech Wake Words Customization Process](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/wake_word_engine/ESP_Wake_Words_Customization.html).
|
||||
|
||||
## Speech Command Recognition
|
||||
|
||||
Espressif's speech command recognition model **MultiNet** is specially designed to provide a flexible off-line speech command recognition model. With this model, you can easily add your own speech commands, eliminating the need to train model again. You can refer to **Model loading method** to build your project.
|
||||
Espressif's speech command recognition model **MultiNet** is specially designed to provide a flexible off-line speech command recognition model. With this model, you can easily add your own speech commands, eliminating the need to train model again.
|
||||
|
||||
Currently, Espressif **MultiNet** supports up to 200 Chinese or English speech commands, such as “打开空调” (Turn on the air conditioner) and “打开卧室灯” (Turn on the bedroom light).
|
||||
Currently, Espressif **MultiNet** supports up to 300 Chinese or English speech commands, such as “打开空调” (Turn on the air conditioner) and “打开卧室灯” (Turn on the bedroom light).
|
||||
|
||||
## Audio Front End
|
||||
|
||||
|
||||
@ -8,10 +8,4 @@ files:
|
||||
exclude:
|
||||
- ".github"
|
||||
- "docs/**/*"
|
||||
- "test_apps/**/*"
|
||||
targets:
|
||||
- esp32
|
||||
- esp32s2
|
||||
- esp32s3
|
||||
- esp32c3
|
||||
- esp32c6
|
||||
- "test_apps/**/*"
|
||||
@ -22,8 +22,8 @@ typedef enum {
|
||||
} afe_memory_alloc_mode_t;
|
||||
|
||||
typedef enum {
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -5, // The peak amplitude of audio fed to multinet is -5dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -4, // The peak amplitude of audio fed to multinet is -4dB
|
||||
AFE_MN_PEAK_AGC_MODE_1 = -9, // The peak amplitude of audio fed to multinet is -9dB
|
||||
AFE_MN_PEAK_AGC_MODE_2 = -6, // The peak amplitude of audio fed to multinet is -6dB
|
||||
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB
|
||||
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
|
||||
} afe_mn_peak_agc_mode_t;
|
||||
@ -72,7 +72,9 @@ typedef struct {
|
||||
int afe_perferred_priority;
|
||||
int afe_ringbuf_size;
|
||||
afe_memory_alloc_mode_t memory_alloc_mode;
|
||||
afe_mn_peak_agc_mode_t agc_mode; // The agc mode for ASR
|
||||
float afe_linear_gain; // The linear gain for sr output(note: invaild for vc), the value should be in [0.1, 10.0].
|
||||
// This value acts directly on the output amplitude: out_linear_gain * amplitude.
|
||||
afe_mn_peak_agc_mode_t agc_mode; // The AGC mode for ASR. and the gain generated by AGC acts on the audio after far linear gain.
|
||||
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
|
||||
bool debug_init;
|
||||
afe_debug_hook_t debug_hook[AFE_DEBUG_HOOK_MAX];
|
||||
@ -97,6 +99,7 @@ typedef struct {
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
|
||||
.afe_linear_gain = 1.0, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config.total_ch_num = 2, \
|
||||
.pcm_config.mic_num = 1, \
|
||||
@ -123,6 +126,7 @@ typedef struct {
|
||||
.afe_perferred_priority = 5, \
|
||||
.afe_ringbuf_size = 50, \
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
|
||||
.afe_linear_gain = 1.0, \
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
|
||||
.pcm_config.total_ch_num = 3, \
|
||||
.pcm_config.mic_num = 2, \
|
||||
|
||||
@ -25,6 +25,8 @@ typedef struct afe_fetch_result_t
|
||||
{
|
||||
int16_t *data; // the data of audio.
|
||||
int data_size; // the size of data. The unit is byte.
|
||||
float data_volume; // the volume of input audio, the unit is decibel(dB). This value is calculated before agc. (note: invalid in vc).
|
||||
// if enable wakenet, the window length is the receptive fields of wakenet(about 1.5s), otherwise is the frame length.
|
||||
wakenet_state_t wakeup_state; // the value is wakenet_state_t
|
||||
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
|
||||
int wakenet_model_index; // if there are multiple wakenets, this value identifies which model be wakes up. Index start from 1.
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -47,6 +47,28 @@ def copy_wakenet_from_sdkconfig(model_path, sdkconfig_path, target_path):
|
||||
models.append('wn9_alexa')
|
||||
if "CONFIG_SR_WN_WN9_HIESP" in models_string:
|
||||
models.append('wn9_hiesp')
|
||||
if "CONFIG_SR_WN_WN9_HIMFIVE" in models_string:
|
||||
models.append('wn9_himfive')
|
||||
if "CONFIG_SR_WN_WN9_NIHAOXIAOZHI" in models_string:
|
||||
models.append('wn9_nihaoxiaozhi')
|
||||
if "CONFIG_SR_WN_WN9_CUSTOMWORD" in models_string:
|
||||
models.append('wn9_customword')
|
||||
|
||||
for item in models:
|
||||
shutil.copytree(model_path + '/wakenet_model/' + item, target_path+'/'+item)
|
||||
|
||||
def copy_multinet_from_sdkconfig(model_path, sdkconfig_path, target_path):
|
||||
"""
|
||||
Copy multinet model from model_path to target_path based on sdkconfig
|
||||
"""
|
||||
with io.open(sdkconfig_path, "r") as f:
|
||||
models_string = ''
|
||||
for label in f:
|
||||
label = label.strip("\n")
|
||||
if 'CONFIG_SR_MN' in label and label[0] != '#':
|
||||
models_string += label
|
||||
|
||||
models = []
|
||||
if "CONFIG_SR_WN_WN9_NIHAOXIAOZHI" in models_string:
|
||||
models.append('wn9_nihaoxiaozhi')
|
||||
if "CONFIG_SR_WN_WN9_CUSTOMWORD" in models_string:
|
||||
|
||||
2
model/wakenet_model/wn9_himfive/_MODEL_INFO_
Normal file
2
model/wakenet_model/wn9_himfive/_MODEL_INFO_
Normal file
@ -0,0 +1,2 @@
|
||||
# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
|
||||
wakenet9l_v2h8_himfive_3_0.640_0.645
|
||||
BIN
model/wakenet_model/wn9_himfive/wn9_data
Normal file
BIN
model/wakenet_model/wn9_himfive/wn9_data
Normal file
Binary file not shown.
BIN
model/wakenet_model/wn9_himfive/wn9_index
Normal file
BIN
model/wakenet_model/wn9_himfive/wn9_index
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user