train finetune

2025-09-15 14:48:36 +08:00 · 2024-02-20 18:01:15 +08:00 · 2024-02-20 18:01:15 +08:00 · 2e8dc0933f
commit 2e8dc0933f
parent 45d9ccafef
5 changed files with 12 additions and 8 deletions
--- a/examples/aishell/branchformer/run.sh
+++ b/examples/aishell/branchformer/run.sh
@ -105,7 +105,8 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
  echo "stage 4: ASR Training"

  mkdir -p ${exp_dir}/exp/${model_dir}
-  log_file="${exp_dir}/exp/${model_dir}/train.log.txt"
+  current_time=$(date "+%Y-%m-%d_%H-%M")
+  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
--- a/examples/aishell/e_branchformer/run.sh
+++ b/examples/aishell/e_branchformer/run.sh
@ -105,7 +105,8 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
  echo "stage 4: ASR Training"

  mkdir -p ${exp_dir}/exp/${model_dir}
-  log_file="${exp_dir}/exp/${model_dir}/train.log.txt"
+  current_time=$(date "+%Y-%m-%d_%H-%M")
+  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
--- a/examples/aishell/paraformer/run.sh
+++ b/examples/aishell/paraformer/run.sh
@ -105,7 +105,8 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
  echo "stage 4: ASR Training"

  mkdir -p ${exp_dir}/exp/${model_dir}
-  log_file="${exp_dir}/exp/${model_dir}/train.log.txt"
+  current_time=$(date "+%Y-%m-%d_%H-%M")
+  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
--- a/examples/aishell/transformer/run.sh
+++ b/examples/aishell/transformer/run.sh
@ -105,7 +105,8 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
  echo "stage 4: ASR Training"

  mkdir -p ${exp_dir}/exp/${model_dir}
-  log_file="${exp_dir}/exp/${model_dir}/train.log.txt"
+  current_time=$(date "+%Y-%m-%d_%H-%M")
+  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
--- a/funasr/train_utils/trainer.py
+++ b/funasr/train_utils/trainer.py
@ -188,7 +188,7 @@ class Trainer:
            epoch (int): The current epoch number.
        """
        self.model.train()
-        pbar = tqdm(colour="blue", desc=f"Training Epoch: {epoch + 1}", total=len(self.dataloader_train),
+        pbar = tqdm(colour="blue", desc=f"rank: {self.local_rank}, Training Epoch: {epoch + 1}", total=len(self.dataloader_train),
                    dynamic_ncols=True)
        
        # Set the number of steps for gradient accumulation
@ -278,7 +278,7 @@ class Trainer:
                    f"epoch: {epoch}/{self.max_epoch}, "
                    f"step: {batch_idx}/{len(self.dataloader_train)}, total: {self.batch_total}, "
                    f"(loss: {loss.detach().cpu().item():.3f}), "
-                    f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}"
+                    f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}, "
                    f"{speed_stats}, "
                    f"{gpu_info}"
                )
@ -307,7 +307,7 @@ class Trainer:
        """
        self.model.eval()
        with torch.no_grad():
-            pbar = tqdm(colour="red", desc=f"Training Epoch: {epoch + 1}", total=len(self.dataloader_val),
+            pbar = tqdm(colour="red", desc=f"rank: {self.local_rank}, Validation Epoch: {epoch + 1}", total=len(self.dataloader_val),
                        dynamic_ncols=True)
            speed_stats = {}
            time5 = time.perf_counter()
@ -343,7 +343,7 @@ class Trainer:
                        f"validation epoch: {epoch}/{self.max_epoch}, "
                        f"step: {batch_idx}/{len(self.dataloader_val)}, "
                        f"(loss: {loss.detach().cpu().item():.3f}), "
-                        f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}"
+                        f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}, "
                        f"{speed_stats}, "
                    )
                    pbar.set_description(description)