mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Fix audio format (#2159)
* 添加了对音频文件扩展名是否为.mp3的补丁,是mp3格式则转化为wav格式 * 增加检测音频文件是否为mp3格式的补丁 * 完善对音频文件后缀名的检查,若文件后缀不是.wav,则转化为wav * 增加音频文件后缀名检查;音频文件无效时抛出错误
This commit is contained in:
parent
757d20b3e8
commit
a76f15c785
@ -196,7 +196,24 @@ class SenseVoiceSmall:
|
||||
return asr_res
|
||||
|
||||
def load_data(self, wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List:
|
||||
|
||||
def convert_to_wav(input_path, output_path):
|
||||
from pydub import AudioSegment
|
||||
try:
|
||||
audio = AudioSegment.from_mp3(input_path)
|
||||
audio.export(output_path, format="wav")
|
||||
print("音频文件为mp3格式,已转换为wav格式")
|
||||
|
||||
except Exception as e:
|
||||
print(f"转换失败:{e}")
|
||||
|
||||
def load_wav(path: str) -> np.ndarray:
|
||||
if not path.lower().endswith('.wav'):
|
||||
import os
|
||||
input_path = path
|
||||
path = os.path.splitext(path)[0]+'.wav'
|
||||
convert_to_wav(input_path,path) #将mp3格式转换成wav格式
|
||||
|
||||
waveform, _ = librosa.load(path, sr=fs)
|
||||
return waveform
|
||||
|
||||
@ -215,6 +232,10 @@ class SenseVoiceSmall:
|
||||
feats, feats_len = [], []
|
||||
for waveform in waveform_list:
|
||||
speech, _ = self.frontend.fbank(waveform)
|
||||
|
||||
if speech is None or speech.size == 0:
|
||||
print("detected speech size {speech.size}")
|
||||
raise ValueError("Empty speech detected, skipping this waveform.")
|
||||
feat, feat_len = self.frontend.lfr_cmvn(speech)
|
||||
feats.append(feat)
|
||||
feats_len.append(feat_len)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user