fix: resolve unexpected 'out of memory' issue in multi-GPU setup (#2373)

Fixed a bug where calling torch.cuda.empty_cache() caused extra memory usage on 'cuda:0', leading to unexpected 'out of memory' errors in multi-GPU environments.

Reference:
- https://github.com/pytorch/pytorch/issues/25752
- https://github.com/pytorch/pytorch/issues/144025
This commit is contained in:
BienBoy 2025-02-01 23:29:34 +08:00 committed by GitHub
parent c4e7014492
commit c1e365fea0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 8 additions and 4 deletions

View File

@ -366,6 +366,7 @@ class AutoModel:
if pbar:
# pbar.update(1)
pbar.set_description(f"rtf_avg: {time_escape_total/time_speech_total:0.3f}")
with torch.cuda.device(next(model.parameters()).device):
torch.cuda.empty_cache()
return asr_result_list

View File

@ -221,6 +221,7 @@ def main(**kwargs):
)
trainer.start_step = 0
with torch.cuda.device(kwargs["device"]):
torch.cuda.empty_cache()
time_escaped = (time.perf_counter() - time_slice_i) / 3600.0

View File

@ -184,6 +184,7 @@ def main(**kwargs):
)
trainer.start_step = 0
with torch.cuda.device(kwargs["device"]):
torch.cuda.empty_cache()
time_escaped = (time.perf_counter() - time_slice_i) / 3600.0

View File

@ -873,6 +873,7 @@ class Decoder(torch.nn.Module, ScorerInterface):
ctc_state[idx], accum_best_ids
)
with torch.cuda.device(vscores.device):
torch.cuda.empty_cache()
dummy_hyps = [{"yseq": [self.sos, self.eos], "score": np.array([-float("inf")])}]