mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
Bugfix: Only allow rank==0 to clean up old checkpoints (#2558)
Fixes bug: https://github.com/modelscope/FunASR/issues/2557
This commit is contained in:
parent
a3d6e48fe1
commit
443bc09c11
@ -272,22 +272,23 @@ class Trainer:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
print("Undo")
|
print("Undo")
|
||||||
self.saved_ckpts[ckpt_name] = getattr(
|
if self.rank == 0:
|
||||||
self, f"val_{self.avg_keep_nbest_models_type}_step_or_epoch"
|
self.saved_ckpts[ckpt_name] = getattr(
|
||||||
)[ckpt_name]
|
self, f"val_{self.avg_keep_nbest_models_type}_step_or_epoch"
|
||||||
if self.keep_nbest_models > 0:
|
)[ckpt_name]
|
||||||
if len(self.saved_ckpts) > self.keep_nbest_models:
|
if self.keep_nbest_models > 0:
|
||||||
if self.avg_keep_nbest_models_type == "acc":
|
if len(self.saved_ckpts) > self.keep_nbest_models:
|
||||||
key = min(self.saved_ckpts, key=self.saved_ckpts.get)
|
if self.avg_keep_nbest_models_type == "acc":
|
||||||
else:
|
key = min(self.saved_ckpts, key=self.saved_ckpts.get)
|
||||||
key = max(self.saved_ckpts, key=self.saved_ckpts.get)
|
else:
|
||||||
if key in self.saved_ckpts:
|
key = max(self.saved_ckpts, key=self.saved_ckpts.get)
|
||||||
del self.saved_ckpts[key]
|
if key in self.saved_ckpts:
|
||||||
filename = os.path.join(self.output_dir, key)
|
del self.saved_ckpts[key]
|
||||||
logging.info(f"Delete: {filename}")
|
filename = os.path.join(self.output_dir, key)
|
||||||
if os.path.exists(filename):
|
logging.info(f"Delete: {filename}")
|
||||||
# os.remove(filename)
|
if os.path.exists(filename):
|
||||||
misc_utils.smart_remove(filename)
|
# os.remove(filename)
|
||||||
|
misc_utils.smart_remove(filename)
|
||||||
|
|
||||||
elif self.use_fsdp:
|
elif self.use_fsdp:
|
||||||
pass
|
pass
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user