* Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py
This commit is contained in:
parent
09d4475cc7
commit
73d08557ba
|
@ -899,6 +899,9 @@ We recommend you switch to ddp if you want to use amp
|
|||
|
||||
self.__add_tqdm_metrics(model_specific_tqdm_metrics_dic)
|
||||
|
||||
# accumulate loss (if accumulate_grad_batches = 1 no effect)
|
||||
loss = loss / self.accumulate_grad_batches
|
||||
|
||||
# backward pass
|
||||
if self.use_amp:
|
||||
# scale loss when using amp
|
||||
|
@ -918,12 +921,11 @@ We recommend you switch to ddp if you want to use amp
|
|||
for param in model.parameters():
|
||||
print(param.grad.float().sum())
|
||||
|
||||
# avoid memory leaks
|
||||
# track total loss for logging (avoid mem leaks)
|
||||
self.batch_loss_value += loss.item()
|
||||
|
||||
# gradient update with accumulated gradients
|
||||
if (self.batch_nb + 1) % self.accumulate_grad_batches == 0:
|
||||
|
||||
# clip gradients
|
||||
if self.gradient_clip > 0:
|
||||
model = self.__get_model()
|
||||
|
@ -941,11 +943,7 @@ We recommend you switch to ddp if you want to use amp
|
|||
# clear gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# queuing loss across batches blows it up proportionally...
|
||||
# divide out the number accumulated
|
||||
self.batch_loss_value = self.batch_loss_value / self.accumulate_grad_batches
|
||||
|
||||
# track loss
|
||||
# calculate running loss for display
|
||||
self.running_loss.append(self.batch_loss_value)
|
||||
self.batch_loss_value = 0
|
||||
self.avg_loss = np.mean(self.running_loss[-100:])
|
||||
|
|
Loading…
Reference in New Issue