mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Merge branch dzh_straming_test into dev_gzf_deepspeed
Title: align others with dev_gzf_deepspeed 本次代码评审主要涉及添加CTC模块、优化MP3编码处理、简化音素令牌处理和引入新库,旨在增强模型的语音识别与文本转语音功能,同时改进音频处理逻辑。 Link: https://code.alibaba-inc.com/zhifu.gzf/FunASR/codereview/18421381
This commit is contained in:
commit
9edbcd5420
@ -3223,7 +3223,7 @@ class LLMASRXvecSlotTTS(nn.Module):
|
||||
token_list.append(cur_token)
|
||||
feat_list.append(feat)
|
||||
# we should return this data to web page for playing.
|
||||
mp3_data = self.convert_wav_to_mp3(wav)
|
||||
mp3_data = self.convert_wav_to_mp3(wav, is_last)
|
||||
wav_list.append(wav)
|
||||
mp3_list.append(mp3_data)
|
||||
|
||||
@ -3329,7 +3329,7 @@ class LLMASRXvecSlotTTS(nn.Module):
|
||||
states["chunk_idx"] = chunk_idx
|
||||
if format == "mp3":
|
||||
if cur_token is not None:
|
||||
wav = self.convert_wav_to_mp3(wav)
|
||||
wav = self.convert_wav_to_mp3(wav, is_last)
|
||||
return cur_token, feat, wav
|
||||
|
||||
def write_mel_wav(self, output_dir, feat, wav, mp3, key):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user