mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Merge branch 'main' of github.com:alibaba-damo-academy/FunASR
merge
This commit is contained in:
commit
506c3d4648
@ -6,7 +6,7 @@
|
||||
#git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path}
|
||||
|
||||
## generate jsonl from wav.scp and text.txt
|
||||
#python funasr/datasets/audio_datasets/scp2jsonl.py \
|
||||
#python -m funasr.datasets.audio_datasets.scp2jsonl \
|
||||
#++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
||||
#++data_type_list='["source", "target"]' \
|
||||
#++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
||||
|
||||
@ -72,14 +72,7 @@ def parse_context_length(data_list: list, data_type: str):
|
||||
|
||||
@hydra.main(config_name=None, version_base=None)
|
||||
def main_hydra(cfg: DictConfig):
|
||||
"""
|
||||
python funasr/datasets/audio_datasets/scp2jsonl.py \
|
||||
++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
||||
++data_type_list='["source", "target"]' \
|
||||
++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
||||
|
||||
"""
|
||||
|
||||
|
||||
kwargs = OmegaConf.to_container(cfg, resolve=True)
|
||||
|
||||
scp_file_list = kwargs.get("scp_file_list", ("/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"))
|
||||
@ -90,6 +83,13 @@ def main_hydra(cfg: DictConfig):
|
||||
gen_jsonl_from_wav_text_list(scp_file_list, data_type_list=data_type_list, jsonl_file_out=jsonl_file_out)
|
||||
|
||||
|
||||
"""
|
||||
python -m funasr.datasets.audio_datasets.scp2jsonl \
|
||||
++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
||||
++data_type_list='["source", "target"]' \
|
||||
++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
||||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
main_hydra()
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@ import torch.nn.functional as F
|
||||
try:
|
||||
from rotary_embedding_torch import RotaryEmbedding
|
||||
except:
|
||||
print("Please install rotary_embedding_torch by: \n pip install -U rotary_embedding_torch")
|
||||
print("If you want use mossformer, lease install rotary_embedding_torch by: \n pip install -U rotary_embedding_torch")
|
||||
from funasr.models.transformer.layer_norm import GlobalLayerNorm, CumulativeLayerNorm, ScaleNorm
|
||||
from funasr.models.transformer.embedding import ScaledSinuEmbedding
|
||||
from funasr.models.transformer.mossformer import FLASH_ShareA_FFConvM
|
||||
|
||||
@ -455,7 +455,9 @@ class Paraformer(torch.nn.Module):
|
||||
speech, speech_lengths = data_in, data_lengths
|
||||
if len(speech.shape) < 3:
|
||||
speech = speech[None, :, :]
|
||||
if speech_lengths is None:
|
||||
if speech_lengths is not None:
|
||||
speech_lengths = speech_lengths.squeeze(-1)
|
||||
else:
|
||||
speech_lengths = speech.shape[1]
|
||||
else:
|
||||
# extract fbank feats
|
||||
|
||||
@ -1,16 +1,15 @@
|
||||
"""Receptance Weighted Key Value (RWKV) block definition.
|
||||
|
||||
Based/modified from https://github.com/BlinkDL/RWKV-LM/blob/main/RWKV-v4/src/model.py
|
||||
|
||||
"""
|
||||
|
||||
from typing import Dict, Optional, Tuple
|
||||
#!/usr/bin/env python3
|
||||
# -*- encoding: utf-8 -*-
|
||||
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
|
||||
# MIT License (https://opensource.org/licenses/MIT)
|
||||
|
||||
import torch
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
from funasr.models.rwkv_bat.rwkv_attention import EncoderSelfAttention, DecoderSelfAttention
|
||||
from funasr.models.rwkv_bat.rwkv_feed_forward import FeedForward
|
||||
from funasr.models.transformer.layer_norm import LayerNorm
|
||||
from funasr.models.rwkv_bat.rwkv_feed_forward import FeedForward
|
||||
from funasr.models.rwkv_bat.rwkv_attention import EncoderSelfAttention, DecoderSelfAttention
|
||||
|
||||
|
||||
class RWKV(torch.nn.Module):
|
||||
"""RWKV module.
|
||||
|
||||
@ -1,17 +1,14 @@
|
||||
"""Attention (time mixing) modules for RWKV block.
|
||||
|
||||
Based/Modified from https://github.com/BlinkDL/RWKV-LM/blob/main/RWKV-v4/src/model.py.
|
||||
|
||||
Some variables are renamed according to https://github.com/huggingface/transformers/blob/main/src/transformers/models/rwkv/modeling_rwkv.py.
|
||||
|
||||
""" # noqa
|
||||
#!/usr/bin/env python3
|
||||
# -*- encoding: utf-8 -*-
|
||||
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
|
||||
# MIT License (https://opensource.org/licenses/MIT)
|
||||
|
||||
import math
|
||||
from importlib.util import find_spec
|
||||
import torch
|
||||
from pathlib import Path
|
||||
from importlib.util import find_spec
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
|
||||
wkv_kernel_encoder = None
|
||||
wkv_kernel_decoder = None
|
||||
|
||||
@ -1,17 +1,20 @@
|
||||
"""RWKV encoder definition for Transducer models."""
|
||||
|
||||
import math
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
#!/usr/bin/env python3
|
||||
# -*- encoding: utf-8 -*-
|
||||
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
|
||||
# MIT License (https://opensource.org/licenses/MIT)
|
||||
|
||||
import torch
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from funasr.models.encoder.abs_encoder import AbsEncoder
|
||||
from funasr.register import tables
|
||||
from funasr.models.rwkv_bat.rwkv import RWKV
|
||||
from funasr.models.transformer.layer_norm import LayerNorm
|
||||
from funasr.models.rwkv_bat.rwkv_subsampling import RWKVConvInput
|
||||
from funasr.models.transformer.utils.nets_utils import make_source_mask
|
||||
from funasr.models.rwkv_bat.rwkv_subsampling import RWKVConvInput
|
||||
|
||||
class RWKVEncoder(AbsEncoder):
|
||||
|
||||
@tables.register("encoder_classes", "RWKVEncoder")
|
||||
class RWKVEncoder(torch.nn.Module):
|
||||
"""RWKV encoder module.
|
||||
|
||||
Based on https://arxiv.org/pdf/2305.13048.pdf.
|
||||
@ -44,6 +47,7 @@ class RWKVEncoder(AbsEncoder):
|
||||
subsampling_factor: int =4,
|
||||
time_reduction_factor: int = 1,
|
||||
kernel: int = 3,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Construct a RWKVEncoder object."""
|
||||
super().__init__()
|
||||
|
||||
@ -1,14 +1,10 @@
|
||||
"""Feed-forward (channel mixing) module for RWKV block.
|
||||
|
||||
Based/Modified from https://github.com/BlinkDL/RWKV-LM/blob/main/RWKV-v4/src/model.py
|
||||
|
||||
Some variables are renamed according to https://github.com/huggingface/transformers/blob/main/src/transformers/models/rwkv/modeling_rwkv.py.
|
||||
|
||||
""" # noqa
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
#!/usr/bin/env python3
|
||||
# -*- encoding: utf-8 -*-
|
||||
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
|
||||
# MIT License (https://opensource.org/licenses/MIT)
|
||||
|
||||
import torch
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
|
||||
class FeedForward(torch.nn.Module):
|
||||
|
||||
@ -1,19 +1,13 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- encoding: utf-8 -*-
|
||||
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
|
||||
# MIT License (https://opensource.org/licenses/MIT)
|
||||
|
||||
# Copyright 2019 Shigeki Karita
|
||||
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
|
||||
|
||||
"""Subsampling layer definition."""
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from funasr.models.transformer.embedding import PositionalEncoding
|
||||
import logging
|
||||
from funasr.models.scama.utils import sequence_mask
|
||||
from funasr.models.transformer.utils.nets_utils import sub_factor_to_params, pad_to_len
|
||||
from typing import Optional, Tuple, Union
|
||||
import math
|
||||
import torch
|
||||
from typing import Optional, Tuple, Union
|
||||
from funasr.models.transformer.utils.nets_utils import pad_to_len
|
||||
|
||||
|
||||
class TooShortUttError(Exception):
|
||||
"""Raised when the utt is too short for subsampling.
|
||||
|
||||
@ -302,17 +302,14 @@ class Trainer:
|
||||
)
|
||||
pbar.set_description(description)
|
||||
if self.writer:
|
||||
self.writer.add_scalar(f'rank{self.local_rank}_Loss/train', loss.item(),
|
||||
epoch*len(self.dataloader_train) + batch_idx)
|
||||
self.writer.add_scalar(f'rank{self.local_rank}_Loss/train', loss.item(), self.batch_total)
|
||||
self.writer.add_scalar(f'rank{self.local_rank}_lr/train', lr, self.batch_total)
|
||||
for key, var in stats.items():
|
||||
self.writer.add_scalar(f'rank{self.local_rank}_{key}/train', var.item(),
|
||||
epoch * len(self.dataloader_train) + batch_idx)
|
||||
self.writer.add_scalar(f'rank{self.local_rank}_{key}/train', var.item(), self.batch_total)
|
||||
for key, var in speed_stats.items():
|
||||
self.writer.add_scalar(f'rank{self.local_rank}_{key}/train', eval(var),
|
||||
epoch * len(self.dataloader_train) + batch_idx)
|
||||
|
||||
# if batch_idx == 2:
|
||||
# break
|
||||
self.writer.add_scalar(f'rank{self.local_rank}_{key}/train', eval(var), self.batch_total)
|
||||
|
||||
|
||||
pbar.close()
|
||||
|
||||
def _validate_epoch(self, epoch):
|
||||
@ -356,7 +353,10 @@ class Trainer:
|
||||
|
||||
if (batch_idx+1) % self.log_interval == 0 or (batch_idx+1) == len(self.dataloader_val):
|
||||
pbar.update(self.log_interval)
|
||||
time_now = datetime.now()
|
||||
time_now = time_now.strftime("%Y-%m-%d %H:%M:%S")
|
||||
description = (
|
||||
f"{time_now}, "
|
||||
f"rank: {self.local_rank}, "
|
||||
f"validation epoch: {epoch}/{self.max_epoch}, "
|
||||
f"step: {batch_idx+1}/{len(self.dataloader_val)}, "
|
||||
|
||||
Loading…
Reference in New Issue
Block a user