mirror of
https://github.com/espressif/esp-sr.git
synced 2025-09-15 15:28:44 +08:00
Merge branch 'master' into 'doc/mn7_en_doc'
# Conflicts: # docs/en/benchmark/README.rst
This commit is contained in:
commit
e80a8cb690
@ -4,6 +4,12 @@
|
||||
- Available storage is less than the remaining flash space on IDF v5.0.
|
||||
If you can not map model partition successfully, please check the left free storage by `spi_flash_mmap_get_free_pages(ESP_PARTITION_MMAP_DATA)` or update IDF to v5.1 or later.
|
||||
|
||||
## 1.5.1
|
||||
- Reduce Internal RAM of multinet7
|
||||
- Update benchmark
|
||||
- Add ci build test for esp32
|
||||
- Fix some bugs
|
||||
|
||||
## 1.5.0
|
||||
- Add esp32c6 tts lib
|
||||
- Return the volume of wake word audio when one wake word is detected
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
version: "1.5.0"
|
||||
version: "1.5.1"
|
||||
description: esp_sr provides basic algorithms for Speech Recognition applications
|
||||
url: https://github.com/espressif/esp-sr
|
||||
dependencies:
|
||||
|
||||
@ -21,7 +21,7 @@ typedef enum {
|
||||
} esp_mn_state_t;
|
||||
|
||||
//Set multinet loading mode
|
||||
//The memory comsumption is decreased with increasing mode,
|
||||
//The memory comsumption is decreased with increasing mode,
|
||||
//As a consequence also the CPU loading rate goes up
|
||||
typedef enum {
|
||||
ESP_MN_LOAD_FROM_PSRAM = 0, // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
|
||||
@ -52,6 +52,7 @@ typedef struct{
|
||||
|
||||
typedef struct {
|
||||
char *string; // command string
|
||||
char *phonemes; // command phonemes, if applicable
|
||||
int16_t command_id; // the command id
|
||||
float threshold; // trigger threshold, default: 0
|
||||
int16_t *wave; // prompt wave data of the phrase
|
||||
@ -79,7 +80,7 @@ typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name,
|
||||
|
||||
/**
|
||||
* @brief Switch multinet mode to change memory consumption and CPU loading
|
||||
*
|
||||
*
|
||||
* @warning Just Support multinet6 or later versions
|
||||
*
|
||||
* @param model The model object to query
|
||||
@ -109,7 +110,7 @@ typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
|
||||
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
* @brief Set the detection threshold to manually abjust the probability
|
||||
*
|
||||
* @param model The model object to query
|
||||
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
|
||||
@ -127,7 +128,7 @@ typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
|
||||
/**
|
||||
* @brief Get the language of model
|
||||
*
|
||||
* @param model The language name
|
||||
* @param model The language name
|
||||
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
|
||||
*/
|
||||
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
|
||||
@ -136,7 +137,7 @@ typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
|
||||
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
|
||||
*
|
||||
* @param model The model object to query.
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
|
||||
* get_samp_chunksize function.
|
||||
* @return The state of multinet
|
||||
*/
|
||||
@ -150,10 +151,10 @@ typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, in
|
||||
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
|
||||
|
||||
/**
|
||||
* @brief Get recognition results
|
||||
* @brief Get recognition results
|
||||
*
|
||||
* @param model The Model object to query
|
||||
*
|
||||
*
|
||||
* @return The current results.
|
||||
*/
|
||||
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
|
||||
@ -186,14 +187,14 @@ typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_
|
||||
|
||||
/**
|
||||
* @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
|
||||
*
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
*/
|
||||
typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
|
||||
|
||||
/**
|
||||
* @brief Check if input string can be tokenized
|
||||
*
|
||||
*
|
||||
* @param model_data The model object to query
|
||||
* @param str The input string
|
||||
*/
|
||||
@ -206,7 +207,7 @@ typedef struct {
|
||||
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
|
||||
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
|
||||
esp_mn_iface_op_get_language_t get_language;
|
||||
esp_mn_iface_op_detect_t detect;
|
||||
esp_mn_iface_op_detect_t detect;
|
||||
esp_mn_iface_op_destroy_t destroy;
|
||||
esp_mn_iface_op_get_results_t get_results;
|
||||
esp_mn_iface_op_open_log_t open_log;
|
||||
|
||||
20
include/esp32/flite_g2p.h
Normal file
20
include/esp32/flite_g2p.h
Normal file
@ -0,0 +1,20 @@
|
||||
#ifndef __FLITE_G2P_H__
|
||||
#define __FLITE_G2P_H__
|
||||
|
||||
typedef struct {
|
||||
int num_phonemes;
|
||||
int phoneme_size;
|
||||
char **phonemes;
|
||||
} flite_g2p_result;
|
||||
|
||||
void flite_g2p_result_free(flite_g2p_result *result);
|
||||
|
||||
flite_g2p_result *flite_g2p_get_result(char *grapheme);
|
||||
|
||||
void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
|
||||
|
||||
char *flite_g2p_result_get_string(flite_g2p_result *result, int map_phonemes);
|
||||
|
||||
char *flite_g2p(char *graphemes, int map_phonemes);
|
||||
|
||||
#endif
|
||||
Binary file not shown.
Binary file not shown.
BIN
lib/esp32/libflite_g2p.a
Normal file
BIN
lib/esp32/libflite_g2p.a
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -12,6 +12,14 @@ static esp_mn_node_t *esp_mn_root = NULL;
|
||||
const static esp_mn_iface_t *esp_mn_model_handle = NULL;
|
||||
static model_iface_data_t *esp_mn_model_data = NULL;
|
||||
|
||||
void *_esp_mn_calloc_(int n, int size)
|
||||
{
|
||||
#ifdef ESP_PLATFORM
|
||||
return heap_caps_calloc(n, size, MALLOC_CAP_SPIRAM);
|
||||
#else
|
||||
return calloc(n, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define ESP_RETURN_ON_FALSE(a, err_code, log_tag, format, ...) do { \
|
||||
if (!(a)) { \
|
||||
@ -130,7 +138,11 @@ esp_err_t esp_mn_commands_add(int command_id, char *string)
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
#ifdef CONFIG_SR_MN_EN_MULTINET7_QUANT
|
||||
phrase->phonemes = phonemes;
|
||||
int phoneme_len = strlen(phonemes);
|
||||
phrase->phonemes = _esp_mn_calloc_(phoneme_len+1, sizeof(char));
|
||||
memcpy(phrase->phonemes, phonemes, phoneme_len);
|
||||
phrase->phonemes[phoneme_len] = '\0';
|
||||
free(phonemes);
|
||||
#endif
|
||||
esp_mn_node_t *new_node = esp_mn_node_alloc(phrase);
|
||||
while (temp->next != NULL) {
|
||||
@ -168,7 +180,11 @@ esp_err_t esp_mn_commands_modify(char *old_string, char *new_string)
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
#ifdef CONFIG_SR_MN_EN_MULTINET7_QUANT
|
||||
phrase->phonemes = phonemes;
|
||||
int phoneme_len = strlen(phonemes);
|
||||
phrase->phonemes = _esp_mn_calloc_(phoneme_len+1, sizeof(char));
|
||||
memcpy(phrase->phonemes, phonemes, phoneme_len);
|
||||
phrase->phonemes[phoneme_len] = '\0';
|
||||
free(phonemes);
|
||||
#endif
|
||||
esp_mn_phrase_free(temp->phrase);
|
||||
temp->phrase = phrase;
|
||||
@ -297,15 +313,6 @@ void esp_mn_active_commands_print(void)
|
||||
ESP_LOGI(TAG, "---------------------------------------------------------\n");
|
||||
}
|
||||
|
||||
void *_esp_mn_calloc_(int n, int size)
|
||||
{
|
||||
#ifdef ESP_PLATFORM
|
||||
return heap_caps_calloc(n, size, MALLOC_CAP_SPIRAM);
|
||||
#else
|
||||
return calloc(n, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *string)
|
||||
{
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
test_apps/esp-sr:
|
||||
enable:
|
||||
- if: IDF_TARGET in ["esp32s3"]
|
||||
- if: IDF_TARGET in ["esp32s3", "esp32"]
|
||||
temporary: false
|
||||
|
||||
test_apps/esp-tts:
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
if(IDF_TARGET STREQUAL "esp32s3")
|
||||
|
||||
set(srcs
|
||||
"test_app_main.c"
|
||||
@ -13,4 +12,3 @@ idf_component_register(SRCS ${srcs}
|
||||
WHOLE_ARCHIVE)
|
||||
|
||||
target_compile_options(${COMPONENT_LIB} PRIVATE "-Wno-format")
|
||||
endif()
|
||||
|
||||
3
test_apps/esp-sr/partitions_esp32.csv
Normal file
3
test_apps/esp-sr/partitions_esp32.csv
Normal file
@ -0,0 +1,3 @@
|
||||
# Espressif ESP32 Partition Table
|
||||
# Name, Type, SubType, Offset, Size
|
||||
factory, app, factory, 0x010000, 8000k
|
||||
|
1922
test_apps/esp-sr/sdkconfig.ci.mn2_cn
Normal file
1922
test_apps/esp-sr/sdkconfig.ci.mn2_cn
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user