mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
update trainer
This commit is contained in:
parent
b2ea4424a3
commit
2bc330a599
@ -6,7 +6,7 @@
|
|||||||
#git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path}
|
#git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path}
|
||||||
|
|
||||||
## generate jsonl from wav.scp and text.txt
|
## generate jsonl from wav.scp and text.txt
|
||||||
#python funasr/datasets/audio_datasets/scp2jsonl.py \
|
#python -m funasr.datasets.audio_datasets.scp2jsonl \
|
||||||
#++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
#++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
||||||
#++data_type_list='["source", "target"]' \
|
#++data_type_list='["source", "target"]' \
|
||||||
#++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
#++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
||||||
|
|||||||
@ -72,14 +72,7 @@ def parse_context_length(data_list: list, data_type: str):
|
|||||||
|
|
||||||
@hydra.main(config_name=None, version_base=None)
|
@hydra.main(config_name=None, version_base=None)
|
||||||
def main_hydra(cfg: DictConfig):
|
def main_hydra(cfg: DictConfig):
|
||||||
"""
|
|
||||||
python funasr/datasets/audio_datasets/scp2jsonl.py \
|
|
||||||
++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
|
||||||
++data_type_list='["source", "target"]' \
|
|
||||||
++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
kwargs = OmegaConf.to_container(cfg, resolve=True)
|
kwargs = OmegaConf.to_container(cfg, resolve=True)
|
||||||
|
|
||||||
scp_file_list = kwargs.get("scp_file_list", ("/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"))
|
scp_file_list = kwargs.get("scp_file_list", ("/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"))
|
||||||
@ -90,6 +83,13 @@ def main_hydra(cfg: DictConfig):
|
|||||||
gen_jsonl_from_wav_text_list(scp_file_list, data_type_list=data_type_list, jsonl_file_out=jsonl_file_out)
|
gen_jsonl_from_wav_text_list(scp_file_list, data_type_list=data_type_list, jsonl_file_out=jsonl_file_out)
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
python -m funasr.datasets.audio_datasets.scp2jsonl \
|
||||||
|
++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
|
||||||
|
++data_type_list='["source", "target"]' \
|
||||||
|
++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
|
||||||
|
"""
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main_hydra()
|
main_hydra()
|
||||||
|
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import torch.nn.functional as F
|
|||||||
try:
|
try:
|
||||||
from rotary_embedding_torch import RotaryEmbedding
|
from rotary_embedding_torch import RotaryEmbedding
|
||||||
except:
|
except:
|
||||||
print("Please install rotary_embedding_torch by: \n pip install -U rotary_embedding_torch")
|
print("If you want use mossformer, lease install rotary_embedding_torch by: \n pip install -U rotary_embedding_torch")
|
||||||
from funasr.models.transformer.layer_norm import GlobalLayerNorm, CumulativeLayerNorm, ScaleNorm
|
from funasr.models.transformer.layer_norm import GlobalLayerNorm, CumulativeLayerNorm, ScaleNorm
|
||||||
from funasr.models.transformer.embedding import ScaledSinuEmbedding
|
from funasr.models.transformer.embedding import ScaledSinuEmbedding
|
||||||
from funasr.models.transformer.mossformer import FLASH_ShareA_FFConvM
|
from funasr.models.transformer.mossformer import FLASH_ShareA_FFConvM
|
||||||
|
|||||||
@ -302,17 +302,14 @@ class Trainer:
|
|||||||
)
|
)
|
||||||
pbar.set_description(description)
|
pbar.set_description(description)
|
||||||
if self.writer:
|
if self.writer:
|
||||||
self.writer.add_scalar(f'rank{self.local_rank}_Loss/train', loss.item(),
|
self.writer.add_scalar(f'rank{self.local_rank}_Loss/train', loss.item(), self.batch_total)
|
||||||
epoch*len(self.dataloader_train) + batch_idx)
|
self.writer.add_scalar(f'rank{self.local_rank}_lr/train', lr, self.batch_total)
|
||||||
for key, var in stats.items():
|
for key, var in stats.items():
|
||||||
self.writer.add_scalar(f'rank{self.local_rank}_{key}/train', var.item(),
|
self.writer.add_scalar(f'rank{self.local_rank}_{key}/train', var.item(), self.batch_total)
|
||||||
epoch * len(self.dataloader_train) + batch_idx)
|
|
||||||
for key, var in speed_stats.items():
|
for key, var in speed_stats.items():
|
||||||
self.writer.add_scalar(f'rank{self.local_rank}_{key}/train', eval(var),
|
self.writer.add_scalar(f'rank{self.local_rank}_{key}/train', eval(var), self.batch_total)
|
||||||
epoch * len(self.dataloader_train) + batch_idx)
|
|
||||||
|
|
||||||
# if batch_idx == 2:
|
|
||||||
# break
|
|
||||||
pbar.close()
|
pbar.close()
|
||||||
|
|
||||||
def _validate_epoch(self, epoch):
|
def _validate_epoch(self, epoch):
|
||||||
@ -356,7 +353,10 @@ class Trainer:
|
|||||||
|
|
||||||
if (batch_idx+1) % self.log_interval == 0 or (batch_idx+1) == len(self.dataloader_val):
|
if (batch_idx+1) % self.log_interval == 0 or (batch_idx+1) == len(self.dataloader_val):
|
||||||
pbar.update(self.log_interval)
|
pbar.update(self.log_interval)
|
||||||
|
time_now = datetime.now()
|
||||||
|
time_now = time_now.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
description = (
|
description = (
|
||||||
|
f"{time_now}, "
|
||||||
f"rank: {self.local_rank}, "
|
f"rank: {self.local_rank}, "
|
||||||
f"validation epoch: {epoch}/{self.max_epoch}, "
|
f"validation epoch: {epoch}/{self.max_epoch}, "
|
||||||
f"step: {batch_idx+1}/{len(self.dataloader_val)}, "
|
f"step: {batch_idx+1}/{len(self.dataloader_val)}, "
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user