diff --git a/docs/Trainer/hooks.md b/docs/Trainer/hooks.md index 13c58eb3f5..a5a776d76e 100644 --- a/docs/Trainer/hooks.md +++ b/docs/Trainer/hooks.md @@ -97,6 +97,23 @@ def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i): # add as many optimizers as you want ``` +This step allows you to do a lot of non-standard training tricks such as learning-rate warm-up: + +```python +# learning rate warm-up +def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i): + # warm up lr + if self.trainer.global_step < 500: + lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) + for pg in optimizer.param_groups: + pg['lr'] = lr_scale * self.hparams.learning_rate + + # update params + optimizer.step() + optimizer.zero_grad() +``` + + --- #### on_before_zero_grad Called in the training loop after taking an optimizer step and before zeroing grads.