From 2cca8104d26b454112f39b8405dcb0e70d365990 Mon Sep 17 00:00:00 2001 From: zhifu gao Date: Fri, 19 Jan 2024 17:05:08 +0800 Subject: [PATCH] Funasr1.0 (#1275) * funasr1.0 funetine * funasr1.0 pbar * update with main (#1260) * Update websocket_protocol_zh.md * update --------- Co-authored-by: Yabin Li Co-authored-by: shixian.shi * update with main (#1264) * Funasr1.0 (#1261) * funasr1.0 funetine * funasr1.0 pbar * update with main (#1260) * Update websocket_protocol_zh.md * update --------- Co-authored-by: Yabin Li Co-authored-by: shixian.shi --------- Co-authored-by: Yabin Li Co-authored-by: shixian.shi * bug fix --------- Co-authored-by: Yabin Li Co-authored-by: shixian.shi * funasr1.0 sanm scama * funasr1.0 infer_after_finetune * funasr1.0 fsmn-vad bug fix * funasr1.0 fsmn-vad bug fix * funasr1.0 fsmn-vad bug fix * funasr1.0 finetune * funasr1.0 finetune * funasr1.0 finetune --------- Co-authored-by: Yabin Li Co-authored-by: shixian.shi --- .../paraformer/finetune.sh | 4 ++-- funasr/auto/auto_model.py | 2 +- funasr/bin/train.py | 2 +- funasr/datasets/audio_datasets/samplers.py | 2 +- funasr/models/fsmn_vad_streaming/model.py | 2 -- funasr/train_utils/trainer.py | 20 +++++++++++++++---- 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/examples/industrial_data_pretraining/paraformer/finetune.sh b/examples/industrial_data_pretraining/paraformer/finetune.sh index 7d8987602..1aff0683c 100644 --- a/examples/industrial_data_pretraining/paraformer/finetune.sh +++ b/examples/industrial_data_pretraining/paraformer/finetune.sh @@ -11,9 +11,9 @@ python funasr/bin/train.py \ +model_revision="v2.0.2" \ +train_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \ +valid_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \ -++dataset_conf.batch_size=2 \ +++dataset_conf.batch_size=64 \ ++dataset_conf.batch_type="example" \ ++train_conf.max_epoch=2 \ +++dataset_conf.num_workers=4 \ +output_dir="outputs/debug/ckpt/funasr2/exp2" \ -+device="cpu" \ +debug="true" \ No newline at end of file diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py index bedc17d16..332013612 100644 --- a/funasr/auto/auto_model.py +++ b/funasr/auto/auto_model.py @@ -132,7 +132,7 @@ class AutoModel: self.punc_kwargs = punc_kwargs self.spk_model = spk_model self.spk_kwargs = spk_kwargs - self.model_path = kwargs["model_path"] + self.model_path = kwargs.get("model_path", "./") def build_model(self, **kwargs): diff --git a/funasr/bin/train.py b/funasr/bin/train.py index 0334006c5..d9d4d6241 100644 --- a/funasr/bin/train.py +++ b/funasr/bin/train.py @@ -40,7 +40,7 @@ def main_hydra(kwargs: DictConfig): def main(**kwargs): - + print(kwargs) # set random seed tables.print() set_all_random_seed(kwargs.get("seed", 0)) diff --git a/funasr/datasets/audio_datasets/samplers.py b/funasr/datasets/audio_datasets/samplers.py index e170c681b..0d9309814 100644 --- a/funasr/datasets/audio_datasets/samplers.py +++ b/funasr/datasets/audio_datasets/samplers.py @@ -28,7 +28,7 @@ class BatchSampler(torch.utils.data.BatchSampler): self.shuffle = shuffle and is_training def __len__(self): - return self.total_samples + return (self.total_samples-1) // self.batch_size + 1 def set_epoch(self, epoch): np.random.seed(epoch) diff --git a/funasr/models/fsmn_vad_streaming/model.py b/funasr/models/fsmn_vad_streaming/model.py index 7c2156174..becfd56e3 100644 --- a/funasr/models/fsmn_vad_streaming/model.py +++ b/funasr/models/fsmn_vad_streaming/model.py @@ -255,7 +255,6 @@ class Stats(object): self.waveform = None self.last_drop_frames = 0 - @tables.register("model_classes", "FsmnVADStreaming") class FsmnVADStreaming(nn.Module): """ @@ -500,7 +499,6 @@ class FsmnVADStreaming(nn.Module): # # reset class variables and clear the dict for the next query # self.AllResetDetection() return segments - def init_cache(self, cache: dict = {}, **kwargs): diff --git a/funasr/train_utils/trainer.py b/funasr/train_utils/trainer.py index 91b30b0a8..62d6be80b 100644 --- a/funasr/train_utils/trainer.py +++ b/funasr/train_utils/trainer.py @@ -147,9 +147,17 @@ class Trainer: for epoch in range(self.start_epoch, self.max_epoch + 1): self._train_epoch(epoch) + + if self.use_ddp or self.use_fsdp: + dist.barrier() + self._validate_epoch(epoch) - + + if self.use_ddp or self.use_fsdp: + dist.barrier() + + if self.rank == 0: self._save_checkpoint(epoch) @@ -164,7 +172,9 @@ class Trainer: if self.use_ddp or self.use_fsdp: dist.barrier() - self.writer.close() + + if self.writer: + self.writer.close() def _train_epoch(self, epoch): @@ -230,6 +240,8 @@ class Trainer: continue # Execute an optimization step (update model parameters) + if self.use_ddp or self.use_fsdp: + dist.barrier() self.optim.step() self.scheduler.step() # Clear gradients for the next accumulation stage @@ -244,7 +256,7 @@ class Trainer: pbar.update(1) if self.local_rank == 0: description = ( - f"Epoch: {epoch}/{self.max_epoch}, " + f"Train epoch: {epoch}/{self.max_epoch}, " f"step {batch_idx}/{len(self.dataloader_train)}, " f"{speed_stats}, " f"(loss: {loss.detach().cpu().item():.3f}), " @@ -306,7 +318,7 @@ class Trainer: pbar.update(1) if self.local_rank == 0: description = ( - f"validation: \nEpoch: {epoch}/{self.max_epoch}, " + f"validation epoch: {epoch}/{self.max_epoch}, " f"step {batch_idx}/{len(self.dataloader_train)}, " f"{speed_stats}, " f"(loss: {loss.detach().cpu().item():.3f}), "