fix accumulated grad norm fixes #87 (#88)

* Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py
2019-08-10 08:32:45 -04:00 · 2019-08-10 08:32:45 -04:00 · 73d08557ba
parent 09d4475cc7
commit 73d08557ba
1 changed files with 5 additions and 7 deletions
--- a/pytorch_lightning/models/trainer.py
+++ b/pytorch_lightning/models/trainer.py
@ -899,6 +899,9 @@ We recommend you switch to ddp if you want to use amp

        self.__add_tqdm_metrics(model_specific_tqdm_metrics_dic)

+        # accumulate loss (if accumulate_grad_batches = 1 no effect)
+        loss = loss / self.accumulate_grad_batches
+
        # backward pass
        if self.use_amp:
            # scale loss when using amp
@ -918,12 +921,11 @@ We recommend you switch to ddp if you want to use amp
            for param in model.parameters():
                print(param.grad.float().sum())

-        # avoid memory leaks
+        # track total loss for logging (avoid mem leaks)
        self.batch_loss_value += loss.item()

        # gradient update with accumulated gradients
        if (self.batch_nb + 1) % self.accumulate_grad_batches == 0:
-
            # clip gradients
            if self.gradient_clip > 0:
                model = self.__get_model()
@ -941,11 +943,7 @@ We recommend you switch to ddp if you want to use amp
                # clear gradients
                optimizer.zero_grad()

-            # queuing loss across batches blows it up proportionally...
-            #  divide out the number accumulated
-            self.batch_loss_value = self.batch_loss_value / self.accumulate_grad_batches
-
-            # track loss
+            # calculate running loss for display
            self.running_loss.append(self.batch_loss_value)
            self.batch_loss_value = 0
            self.avg_loss = np.mean(self.running_loss[-100:])