mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Fix audio format 2.0 (#2186)
* 添加了对音频文件扩展名是否为.mp3的补丁,是mp3格式则转化为wav格式 * 增加检测音频文件是否为mp3格式的补丁 * 完善对音频文件后缀名的检查,若文件后缀不是.wav,则转化为wav * 增加音频文件后缀名检查;音频文件无效时抛出错误 * 在paraformer、vad两个模型中加入对音频文件后缀的检查,并将非wav格式转为wav格式 * 修改wav_path的数据类型,使demo能够顺利运行
This commit is contained in:
parent
6224003492
commit
7e9696f156
@ -3,7 +3,8 @@ from funasr_onnx.paraformer_online_bin import Paraformer
|
||||
from pathlib import Path
|
||||
|
||||
model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online"
|
||||
wav_path = ["{}/.cache/modelscope/hub/{}/example/asr_example.wav".format(Path.home(), model_dir)]
|
||||
#wav_path = ["{}/.cache/modelscope/hub/{}/example/asr_example.wav".format(Path.home(), model_dir)]
|
||||
wav_path = "{}/.cache/modelscope/hub/{}/example/asr_example.wav".format(Path.home(), model_dir)
|
||||
|
||||
chunk_size = [5, 10, 5]
|
||||
model = Paraformer(
|
||||
|
||||
@ -175,7 +175,23 @@ class Paraformer:
|
||||
plt.savefig(plotname, bbox_inches="tight")
|
||||
|
||||
def load_data(self, wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List:
|
||||
def convert_to_wav(input_path, output_path):
|
||||
from pydub import AudioSegment
|
||||
try:
|
||||
audio = AudioSegment.from_mp3(input_path)
|
||||
audio.export(output_path, format="wav")
|
||||
print("音频文件为mp3格式,已转换为wav格式")
|
||||
|
||||
except Exception as e:
|
||||
print(f"转换失败:{e}")
|
||||
|
||||
def load_wav(path: str) -> np.ndarray:
|
||||
if not path.lower().endswith('.wav'):
|
||||
import os
|
||||
input_path = path
|
||||
path = os.path.splitext(path)[0]+'.wav'
|
||||
convert_to_wav(input_path,path) #将mp3格式转换成wav格式
|
||||
|
||||
waveform, _ = librosa.load(path, sr=fs)
|
||||
return waveform
|
||||
|
||||
|
||||
@ -208,7 +208,24 @@ class Paraformer:
|
||||
return asr_res
|
||||
|
||||
def load_data(self, wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List:
|
||||
|
||||
def convert_to_wav(input_path, output_path):
|
||||
from pydub import AudioSegment
|
||||
try:
|
||||
audio = AudioSegment.from_mp3(input_path)
|
||||
audio.export(output_path, format="wav")
|
||||
print("音频文件为mp3格式,已转换为wav格式")
|
||||
|
||||
except Exception as e:
|
||||
print(f"转换失败:{e}")
|
||||
|
||||
def load_wav(path: str) -> np.ndarray:
|
||||
if not path.lower().endswith('.wav'):
|
||||
import os
|
||||
input_path = path
|
||||
path = os.path.splitext(path)[0]+'.wav'
|
||||
convert_to_wav(input_path,path) #将mp3格式转换成wav格式
|
||||
|
||||
waveform, _ = librosa.load(path, sr=fs)
|
||||
return waveform
|
||||
|
||||
|
||||
@ -143,7 +143,24 @@ class Fsmn_vad:
|
||||
return segments
|
||||
|
||||
def load_data(self, wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List:
|
||||
|
||||
def convert_to_wav(input_path, output_path):
|
||||
from pydub import AudioSegment
|
||||
try:
|
||||
audio = AudioSegment.from_mp3(input_path)
|
||||
audio.export(output_path, format="wav")
|
||||
print("音频文件为mp3格式,已转换为wav格式")
|
||||
|
||||
except Exception as e:
|
||||
print(f"转换失败:{e}")
|
||||
|
||||
def load_wav(path: str) -> np.ndarray:
|
||||
if not path.lower().endswith('.wav'):
|
||||
import os
|
||||
input_path = path
|
||||
path = os.path.splitext(path)[0]+'.wav'
|
||||
convert_to_wav(input_path,path) #将mp3格式转换成wav格式
|
||||
|
||||
waveform, _ = librosa.load(path, sr=fs)
|
||||
return waveform
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user