mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
fix: resolve unexpected 'out of memory' issue in multi-GPU setup (#2373)
Fixed a bug where calling torch.cuda.empty_cache() caused extra memory usage on 'cuda:0', leading to unexpected 'out of memory' errors in multi-GPU environments. Reference: - https://github.com/pytorch/pytorch/issues/25752 - https://github.com/pytorch/pytorch/issues/144025
This commit is contained in:
parent
c4e7014492
commit
c1e365fea0
@ -366,7 +366,8 @@ class AutoModel:
|
||||
if pbar:
|
||||
# pbar.update(1)
|
||||
pbar.set_description(f"rtf_avg: {time_escape_total/time_speech_total:0.3f}")
|
||||
torch.cuda.empty_cache()
|
||||
with torch.cuda.device(next(model.parameters()).device):
|
||||
torch.cuda.empty_cache()
|
||||
return asr_result_list
|
||||
|
||||
def inference_with_vad(self, input, input_len=None, **cfg):
|
||||
|
||||
@ -221,7 +221,8 @@ def main(**kwargs):
|
||||
)
|
||||
trainer.start_step = 0
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
with torch.cuda.device(kwargs["device"]):
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
time_escaped = (time.perf_counter() - time_slice_i) / 3600.0
|
||||
logging.info(
|
||||
|
||||
@ -184,7 +184,8 @@ def main(**kwargs):
|
||||
)
|
||||
trainer.start_step = 0
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
with torch.cuda.device(kwargs["device"]):
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
time_escaped = (time.perf_counter() - time_slice_i) / 3600.0
|
||||
logging.info(
|
||||
|
||||
@ -873,7 +873,8 @@ class Decoder(torch.nn.Module, ScorerInterface):
|
||||
ctc_state[idx], accum_best_ids
|
||||
)
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
with torch.cuda.device(vscores.device):
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
dummy_hyps = [{"yseq": [self.sos, self.eos], "score": np.array([-float("inf")])}]
|
||||
ended_hyps = [
|
||||
|
||||
Loading…
Reference in New Issue
Block a user