mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
* add cmakelist * add paraformer-torch * add debug for funasr-onnx-offline * fix redefinition of jieba StdExtension.hpp * add loading torch models * update funasr-onnx-offline * add SwitchArg for wss-server * add SwitchArg for funasr-onnx-offline * update cmakelist * update funasr-onnx-offline-rtf * add define condition * add gpu define for offlne-stream * update com define * update offline-stream * update cmakelist * update func CompileHotwordEmbedding * add timestamp for paraformer-torch * add C10_USE_GLOG for paraformer-torch * update paraformer-torch * fix func FunASRWfstDecoderInit * update model.h * fix func FunASRWfstDecoderInit * fix tpass_stream * update paraformer-torch * add bladedisc for funasr-onnx-offline * update comdefine * update funasr-wss-server * add log for torch * fix GetValue BLADEDISC * fix log * update cmakelist * update warmup to 10 * update funasrruntime * add batch_size for wss-server * add batch for bins * add batch for offline-stream * add batch for paraformer * add batch for offline-stream * fix func SetBatchSize * add SetBatchSize for model * add SetBatchSize for model * fix func Forward * fix padding * update funasrruntime * add dec reset for batch * set batch default value * add argv for CutSplit * sort frame_queue * sorted msgs * fix FunOfflineInfer * add dynamic batch for fetch * fix FetchDynamic * update run_server.sh * update run_server.sh * cpp http post server support (#1739) * add cpp http server * add some comment * remove some comments * del debug infos * restore run_server.sh * adapt to new model struct * 修复了onnxruntime在macos下编译失败的错误 (#1748) * Add files via upload 增加macos的编译支持 * Add files via upload 增加macos支持 * Add files via upload target_link_directories(funasr PUBLIC ${ONNXRUNTIME_DIR}/lib) target_link_directories(funasr PUBLIC ${FFMPEG_DIR}/lib) 添加 if(APPLE) 限制 --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> * Delete docs/images/wechat.png * Add files via upload * fixed the issues about seaco-onnx timestamp * fix bug (#1764) 当语音识别结果包含 `http` 时,标点符号预测会把它会被当成 url * fix empty asr result (#1765) 解码结果为空的语音片段,text 用空字符串 * docs * docs * docs * docs * docs * keep empty speech result (#1772) * docs * docs * update wechat QRcode * Add python funasr api support for websocket srv (#1777) * add python funasr_api supoort * change little to README.md * add core tools stream * modified a little * fix bug for timeout * support for buffer decode * add ffmpeg decode for buffer * auto frontend * auto frontend * auto frontend * auto frontend * auto frontend * auto frontend * auto frontend * auto frontend * Dev gzf exp (#1785) * resume from step * batch * batch * batch * batch * batch * batch * batch * batch * batch * batch * batch * batch * batch * batch * batch * train_loss_avg train_acc_avg * train_loss_avg train_acc_avg * train_loss_avg train_acc_avg * log step * wav is not exist * wav is not exist * decoding * decoding * decoding * wechat * decoding key * decoding key * decoding key * decoding key * decoding key * decoding key * dynamic batch * start_data_split_i=0 * total_time/accum_grad * total_time/accum_grad * total_time/accum_grad * update avg slice * update avg slice * sensevoice sanm * sensevoice sanm * sensevoice sanm --------- Co-authored-by: 北念 <lzr265946@alibaba-inc.com> * auto frontend --------- Co-authored-by: 雾聪 <wucong.lyb@alibaba-inc.com> Co-authored-by: zhaomingwork <61895407+zhaomingwork@users.noreply.github.com> Co-authored-by: szsteven008 <97944818+szsteven008@users.noreply.github.com> Co-authored-by: Ephemeroptera <605686962@qq.com> Co-authored-by: 彭震东 <zhendong.peng@qq.com> Co-authored-by: Shi Xian <40013335+R1ckShi@users.noreply.github.com> Co-authored-by: 维石 <shixian.shi@alibaba-inc.com> Co-authored-by: 北念 <lzr265946@alibaba-inc.com>
110 lines
4.0 KiB
Python
110 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
|
# -*- encoding: utf-8 -*-
|
|
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
|
|
# MIT License (https://opensource.org/licenses/MIT)
|
|
|
|
import json
|
|
import time
|
|
import torch
|
|
import hydra
|
|
import random
|
|
import string
|
|
import logging
|
|
import os.path
|
|
from tqdm import tqdm
|
|
from omegaconf import DictConfig, OmegaConf, ListConfig
|
|
|
|
from funasr.register import tables
|
|
from funasr.utils.load_utils import load_bytes
|
|
from funasr.download.file import download_from_url
|
|
from funasr.auto.auto_model import prepare_data_iterator
|
|
from funasr.utils.timestamp_tools import timestamp_sentence
|
|
from funasr.download.download_from_hub import download_model
|
|
from funasr.utils.vad_utils import slice_padding_audio_samples
|
|
from funasr.train_utils.set_all_random_seed import set_all_random_seed
|
|
from funasr.train_utils.load_pretrained_model import load_pretrained_model
|
|
from funasr.utils.load_utils import load_audio_text_image_video, extract_fbank
|
|
from funasr.models.campplus.utils import sv_chunk, postprocess, distribute_spk
|
|
|
|
|
|
class AutoFrontend:
|
|
def __init__(self, **kwargs):
|
|
assert "model" in kwargs
|
|
if "model_conf" not in kwargs:
|
|
logging.info("download models from model hub: {}".format(kwargs.get("hub", "ms")))
|
|
kwargs = download_model(**kwargs)
|
|
|
|
# build frontend
|
|
frontend = kwargs.get("frontend", None)
|
|
if frontend is not None:
|
|
frontend_class = tables.frontend_classes.get(frontend)
|
|
frontend = frontend_class(**kwargs["frontend_conf"])
|
|
|
|
self.frontend = frontend
|
|
if "frontend" in kwargs:
|
|
del kwargs["frontend"]
|
|
self.kwargs = kwargs
|
|
|
|
def __call__(self, input, input_len=None, kwargs=None, **cfg):
|
|
|
|
kwargs = self.kwargs if kwargs is None else kwargs
|
|
kwargs.update(cfg)
|
|
|
|
key_list, data_list = prepare_data_iterator(input, input_len=input_len)
|
|
batch_size = kwargs.get("batch_size", 1)
|
|
device = kwargs.get("device", "cuda")
|
|
if device == "cpu":
|
|
batch_size = 1
|
|
|
|
meta_data = {}
|
|
|
|
result_list = []
|
|
num_samples = len(data_list)
|
|
# pbar = tqdm(colour="blue", total=num_samples + 1, dynamic_ncols=True)
|
|
|
|
time0 = time.perf_counter()
|
|
for beg_idx in range(0, num_samples, batch_size):
|
|
end_idx = min(num_samples, beg_idx + batch_size)
|
|
data_batch = data_list[beg_idx:end_idx]
|
|
key_batch = key_list[beg_idx:end_idx]
|
|
|
|
# extract fbank feats
|
|
time1 = time.perf_counter()
|
|
audio_sample_list = load_audio_text_image_video(
|
|
data_batch, fs=self.frontend.fs, audio_fs=kwargs.get("fs", 16000)
|
|
)
|
|
time2 = time.perf_counter()
|
|
meta_data["load_data"] = f"{time2 - time1:0.3f}"
|
|
speech, speech_lengths = extract_fbank(
|
|
audio_sample_list,
|
|
data_type=kwargs.get("data_type", "sound"),
|
|
frontend=self.frontend,
|
|
**kwargs,
|
|
)
|
|
time3 = time.perf_counter()
|
|
meta_data["extract_feat"] = f"{time3 - time2:0.3f}"
|
|
meta_data["batch_data_time"] = (
|
|
speech_lengths.sum().item() * self.frontend.frame_shift * self.frontend.lfr_n / 1000
|
|
)
|
|
|
|
if kwargs.get("return_pt", True):
|
|
speech, speech_lengths = speech.to(device=device), speech_lengths.to(device=device)
|
|
else:
|
|
speech, speech_lengths = speech.numpy(), speech_lengths.numpy()
|
|
batch = {
|
|
"input": speech,
|
|
"input_len": speech_lengths,
|
|
"key": key_batch,
|
|
"data_type": "fbank",
|
|
}
|
|
result_list.append(batch)
|
|
|
|
# pbar.update(1)
|
|
# description = f"{meta_data}, "
|
|
# pbar.set_description(description)
|
|
|
|
time_end = time.perf_counter()
|
|
# pbar.set_description(f"time escaped total: {time_end - time0:0.3f}")
|
|
|
|
return result_list
|