2020-03-12 16:47:23 +00:00
|
|
|
from typing import Any
|
2019-11-28 17:48:55 +00:00
|
|
|
|
2019-03-31 01:45:16 +00:00
|
|
|
import torch
|
2020-03-12 16:47:23 +00:00
|
|
|
from torch import Tensor
|
|
|
|
from torch.optim.optimizer import Optimizer
|
2019-08-04 18:08:14 +00:00
|
|
|
|
2019-10-24 11:56:56 +00:00
|
|
|
try:
|
|
|
|
from apex import amp
|
|
|
|
except ImportError:
|
|
|
|
APEX_AVAILABLE = False
|
2020-03-17 00:50:36 +00:00
|
|
|
else:
|
|
|
|
APEX_AVAILABLE = True
|
2019-10-24 11:56:56 +00:00
|
|
|
|
|
|
|
|
2019-03-31 01:45:16 +00:00
|
|
|
class ModelHooks(torch.nn.Module):
|
2019-08-07 11:51:55 +00:00
|
|
|
|
2020-04-16 16:04:55 +00:00
|
|
|
# TODO: remove in v0.9.0
|
2019-08-07 11:51:55 +00:00
|
|
|
def on_sanity_check_start(self):
|
|
|
|
"""
|
2020-04-16 16:04:55 +00:00
|
|
|
Called before starting evaluation.
|
|
|
|
|
|
|
|
Warning:
|
|
|
|
Deprecated. Will be removed in v0.9.0.
|
2019-08-07 11:51:55 +00:00
|
|
|
"""
|
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_train_start(self) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called at the beginning of training before sanity check.
|
2019-12-07 13:52:06 +00:00
|
|
|
"""
|
|
|
|
# do something at the start of training
|
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_train_end(self) -> None:
|
2019-12-07 13:52:06 +00:00
|
|
|
"""
|
2020-04-16 16:04:55 +00:00
|
|
|
Called at the end of training before logger experiment is closed.
|
2019-12-07 13:52:06 +00:00
|
|
|
"""
|
|
|
|
# do something at the end of training
|
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_batch_start(self, batch: Any) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called in the training loop before anything happens for that batch.
|
2019-11-28 17:48:55 +00:00
|
|
|
|
2020-04-08 12:38:53 +00:00
|
|
|
If you return -1 here, you will skip training for the rest of the current epoch.
|
|
|
|
|
2020-04-16 16:04:55 +00:00
|
|
|
Args:
|
|
|
|
batch: The batched data as it is returned by the training DataLoader.
|
2019-11-28 17:48:55 +00:00
|
|
|
"""
|
|
|
|
# do something when the batch starts
|
2019-03-31 01:45:16 +00:00
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_batch_end(self) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called in the training loop after the batch.
|
|
|
|
"""
|
2019-11-28 17:48:55 +00:00
|
|
|
# do something when the batch ends
|
2019-03-31 01:45:16 +00:00
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_epoch_start(self) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called in the training loop at the very beginning of the epoch.
|
|
|
|
"""
|
2019-11-28 17:48:55 +00:00
|
|
|
# do something when the epoch starts
|
2019-03-31 01:45:16 +00:00
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_epoch_end(self) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called in the training loop at the very end of the epoch.
|
|
|
|
"""
|
2019-11-28 17:48:55 +00:00
|
|
|
# do something when the epoch ends
|
2019-03-31 01:45:16 +00:00
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_pre_performance_check(self) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called at the very beginning of the validation loop.
|
|
|
|
"""
|
2019-11-28 17:48:55 +00:00
|
|
|
# do something before validation starts
|
2019-03-31 01:45:16 +00:00
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_post_performance_check(self) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called at the very end of the validation loop.
|
|
|
|
"""
|
2019-11-28 17:48:55 +00:00
|
|
|
# do something before validation end
|
2019-04-21 16:26:35 +00:00
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_before_zero_grad(self, optimizer: Optimizer) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called after optimizer.step() and before optimizer.zero_grad().
|
2019-11-28 17:48:55 +00:00
|
|
|
|
|
|
|
Called in the training loop after taking an optimizer step and before zeroing grads.
|
|
|
|
Good place to inspect weight information with weights updated.
|
|
|
|
|
2020-04-16 16:04:55 +00:00
|
|
|
This is where it is called::
|
2019-07-21 22:15:58 +00:00
|
|
|
|
2020-04-16 16:04:55 +00:00
|
|
|
for optimizer in optimizers:
|
|
|
|
optimizer.step()
|
|
|
|
model.on_before_zero_grad(optimizer) # < ---- called here
|
|
|
|
optimizer.zero_grad
|
2019-07-21 22:15:58 +00:00
|
|
|
|
2020-04-16 16:04:55 +00:00
|
|
|
Args:
|
|
|
|
optimizer: The optimizer for which grads should be zeroed.
|
2019-07-21 22:15:58 +00:00
|
|
|
"""
|
2019-11-28 17:48:55 +00:00
|
|
|
# do something with the optimizer or inspect it.
|
2019-07-21 22:15:58 +00:00
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def on_after_backward(self) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Called in the training loop after loss.backward() and before optimizers do anything.
|
|
|
|
This is the ideal place to inspect or log gradient information.
|
2019-11-28 17:48:55 +00:00
|
|
|
|
2020-04-16 16:04:55 +00:00
|
|
|
Example::
|
2019-11-28 17:48:55 +00:00
|
|
|
|
|
|
|
def on_after_backward(self):
|
|
|
|
# example to inspect gradient information in tensorboard
|
|
|
|
if self.trainer.global_step % 25 == 0: # don't make the tf file huge
|
|
|
|
params = self.state_dict()
|
|
|
|
for k, v in params.items():
|
|
|
|
grads = v
|
|
|
|
name = k
|
|
|
|
self.logger.experiment.add_histogram(tag=name, values=grads,
|
|
|
|
global_step=self.trainer.global_step)
|
|
|
|
|
2019-07-21 22:23:48 +00:00
|
|
|
"""
|
2019-10-24 11:56:56 +00:00
|
|
|
|
2020-03-12 16:47:23 +00:00
|
|
|
def backward(self, trainer, loss: Tensor, optimizer: Optimizer, optimizer_idx: int) -> None:
|
2020-04-16 16:04:55 +00:00
|
|
|
"""
|
|
|
|
Override backward with your own implementation if you need to.
|
2019-11-28 17:48:55 +00:00
|
|
|
|
2020-04-16 16:04:55 +00:00
|
|
|
Args:
|
|
|
|
trainer: Pointer to the trainer
|
|
|
|
loss: Loss is already scaled by accumulated grads
|
|
|
|
optimizer: Current optimizer being used
|
|
|
|
optimizer_idx: Index of the current optimizer being used
|
2019-11-28 17:48:55 +00:00
|
|
|
|
|
|
|
Called to perform backward step.
|
|
|
|
Feel free to override as needed.
|
|
|
|
|
|
|
|
The loss passed in has already been scaled for accumulated gradients if requested.
|
|
|
|
|
2020-04-16 16:04:55 +00:00
|
|
|
Example::
|
2019-11-28 17:48:55 +00:00
|
|
|
|
|
|
|
def backward(self, use_amp, loss, optimizer):
|
|
|
|
if use_amp:
|
|
|
|
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
|
|
|
scaled_loss.backward()
|
|
|
|
else:
|
|
|
|
loss.backward()
|
|
|
|
|
2019-10-24 11:56:56 +00:00
|
|
|
"""
|
2020-02-17 21:01:20 +00:00
|
|
|
if trainer.precision == 16:
|
|
|
|
# .backward is not special on 16-bit with TPUs
|
2020-04-23 18:47:08 +00:00
|
|
|
if trainer.on_tpu:
|
|
|
|
return
|
|
|
|
|
|
|
|
if self.trainer.use_native_amp:
|
|
|
|
self.trainer.scaler.scale(loss).backward()
|
|
|
|
|
|
|
|
# TODO: remove in v0.8.0
|
|
|
|
else:
|
2020-02-17 21:01:20 +00:00
|
|
|
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
|
|
|
scaled_loss.backward()
|
2019-10-24 11:56:56 +00:00
|
|
|
else:
|
|
|
|
loss.backward()
|