diff --git a/include/esp32/esp_mn_iface.h b/include/esp32/esp_mn_iface.h
index 9297e5a..20f96d8 100644
--- a/include/esp32/esp_mn_iface.h
+++ b/include/esp32/esp_mn_iface.h
@@ -21,7 +21,7 @@ typedef enum {
 } esp_mn_state_t;
 
 //Set multinet loading mode
-//The memory comsumption is decreased with increasing mode, 
+//The memory comsumption is decreased with increasing mode,
 //As a consequence also the CPU loading rate goes up
 typedef enum {
     ESP_MN_LOAD_FROM_PSRAM = 0,          // Load all weights from PSRAM. Fastest computation with Maximum memory consumption
@@ -52,6 +52,7 @@ typedef struct{
 
 typedef struct {
     char *string;                               // command string
+    char *phonemes;                             // command phonemes, if applicable
     int16_t command_id;                         // the command id
     float threshold;                            // trigger threshold, default: 0
     int16_t *wave;                              // prompt wave data of the phrase
@@ -79,7 +80,7 @@ typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name,
 
 /**
  * @brief Switch multinet mode to change memory consumption and CPU loading
- * 
+ *
  * @warning Just Support multinet6 or later versions
  *
  * @param model The model object to query
@@ -109,7 +110,7 @@ typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
 typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
 
 /**
- * @brief Set the detection threshold to manually abjust the probability 
+ * @brief Set the detection threshold to manually abjust the probability
  *
  * @param model The model object to query
  * @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
@@ -127,7 +128,7 @@ typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
 /**
  * @brief Get the language of model
  *
- * @param model       The language name 
+ * @param model       The language name
  * @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
  */
 typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
@@ -136,7 +137,7 @@ typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
  * @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
  *
  * @param model       The model object to query.
- * @param samples     An array of 16-bit signed audio samples. The array size used can be queried by the 
+ * @param samples     An array of 16-bit signed audio samples. The array size used can be queried by the
  *                    get_samp_chunksize function.
  * @return The state of multinet
  */
@@ -150,10 +151,10 @@ typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, in
 typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
 
 /**
- * @brief Get recognition results 
+ * @brief Get recognition results
  *
  * @param model       The Model object to query
- * 
+ *
  * @return The current results.
  */
 typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
@@ -186,14 +187,14 @@ typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_
 
 /**
  * @brief Print out current commands in fst, note the ones "added" but not "updated" will not be shown here
- * 
+ *
  * @param model_data     The model object to query
 */
 typedef void (*esp_mn_iface_op_print_active_speech_commands)(model_iface_data_t *model_data);
 
 /**
  * @brief Check if input string can be tokenized
- * 
+ *
  * @param model_data     The model object to query
  * @param str            The input string
 */
@@ -206,7 +207,7 @@ typedef struct {
     esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
     esp_mn_iface_op_set_det_threshold_t set_det_threshold;
     esp_mn_iface_op_get_language_t get_language;
-    esp_mn_iface_op_detect_t detect; 
+    esp_mn_iface_op_detect_t detect;
     esp_mn_iface_op_destroy_t destroy;
     esp_mn_iface_op_get_results_t get_results;
     esp_mn_iface_op_open_log_t open_log;
diff --git a/include/esp32/flite_g2p.h b/include/esp32/flite_g2p.h
new file mode 100644
index 0000000..0d081cd
--- /dev/null
+++ b/include/esp32/flite_g2p.h
@@ -0,0 +1,20 @@
+#ifndef __FLITE_G2P_H__
+#define __FLITE_G2P_H__
+
+typedef struct {
+    int num_phonemes;
+    int phoneme_size;
+    char **phonemes;
+} flite_g2p_result;
+
+void flite_g2p_result_free(flite_g2p_result *result);
+
+flite_g2p_result *flite_g2p_get_result(char *grapheme);
+
+void flite_g2p_result_print_string(flite_g2p_result *result, int map_phonemes);
+
+char *flite_g2p_result_get_string(flite_g2p_result *result, int map_phonemes);
+
+char *flite_g2p(char *graphemes, int map_phonemes);
+
+#endif
\ No newline at end of file
diff --git a/lib/esp32/libesp_audio_front_end.a b/lib/esp32/libesp_audio_front_end.a
index cf73428..1bc77b7 100644
Binary files a/lib/esp32/libesp_audio_front_end.a and b/lib/esp32/libesp_audio_front_end.a differ
diff --git a/lib/esp32/libesp_audio_processor.a b/lib/esp32/libesp_audio_processor.a
index 047891e..3aef0fb 100644
Binary files a/lib/esp32/libesp_audio_processor.a and b/lib/esp32/libesp_audio_processor.a differ
diff --git a/lib/esp32/libflite_g2p.a b/lib/esp32/libflite_g2p.a
new file mode 100644
index 0000000..e0faf3c
Binary files /dev/null and b/lib/esp32/libflite_g2p.a differ
diff --git a/lib/esp32/libmultinet.a b/lib/esp32/libmultinet.a
index 44c3a18..a279922 100644
Binary files a/lib/esp32/libmultinet.a and b/lib/esp32/libmultinet.a differ
diff --git a/lib/esp32/libwakenet.a b/lib/esp32/libwakenet.a
index d3e02c6..fa845c4 100644
Binary files a/lib/esp32/libwakenet.a and b/lib/esp32/libwakenet.a differ