mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
streaming bugfix (#1271)
* funasr1.0 funetine * funasr1.0 pbar * update with main (#1260) * Update websocket_protocol_zh.md * update --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> * update with main (#1264) * Funasr1.0 (#1261) * funasr1.0 funetine * funasr1.0 pbar * update with main (#1260) * Update websocket_protocol_zh.md * update --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> * bug fix --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> * funasr1.0 sanm scama * funasr1.0 infer_after_finetune * funasr1.0 fsmn-vad bug fix * funasr1.0 fsmn-vad bug fix * funasr1.0 fsmn-vad bug fix --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com>
This commit is contained in:
parent
b28f3c9da9
commit
12496e559f
@ -10,7 +10,6 @@ encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-atten
|
||||
decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
|
||||
|
||||
model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online", model_revision="v2.0.2")
|
||||
cache = {}
|
||||
res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav",
|
||||
chunk_size=chunk_size,
|
||||
encoder_chunk_look_back=encoder_chunk_look_back,
|
||||
|
||||
@ -501,7 +501,9 @@ class FsmnVADStreaming(nn.Module):
|
||||
# self.AllResetDetection()
|
||||
return segments
|
||||
|
||||
|
||||
def init_cache(self, cache: dict = {}, **kwargs):
|
||||
|
||||
cache["frontend"] = {}
|
||||
cache["prev_samples"] = torch.empty(0)
|
||||
cache["encoder"] = {}
|
||||
@ -528,7 +530,7 @@ class FsmnVADStreaming(nn.Module):
|
||||
cache: dict = {},
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
|
||||
if len(cache) == 0:
|
||||
self.init_cache(cache, **kwargs)
|
||||
|
||||
@ -583,7 +585,7 @@ class FsmnVADStreaming(nn.Module):
|
||||
|
||||
cache["prev_samples"] = audio_sample[:-m]
|
||||
if _is_final:
|
||||
cache = {}
|
||||
self.init_cache(cache)
|
||||
|
||||
ibest_writer = None
|
||||
if ibest_writer is None and kwargs.get("output_dir") is not None:
|
||||
|
||||
@ -502,8 +502,7 @@ class ParaformerStreaming(Paraformer):
|
||||
logging.info("enable beam_search")
|
||||
self.init_beam_search(**kwargs)
|
||||
self.nbest = kwargs.get("nbest", 1)
|
||||
|
||||
|
||||
|
||||
if len(cache) == 0:
|
||||
self.init_cache(cache, **kwargs)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user