2020-02-01 20:47:58 +00:00
|
|
|
import logging as log
|
2019-12-04 15:57:32 +00:00
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
2019-10-22 01:16:51 +00:00
|
|
|
import torch
|
2020-01-20 19:50:31 +00:00
|
|
|
|
2019-10-22 01:16:51 +00:00
|
|
|
from pytorch_lightning.callbacks import GradientAccumulationScheduler
|
|
|
|
|
|
|
|
|
2019-12-04 15:57:32 +00:00
|
|
|
class TrainerTrainingTricksMixin(ABC):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
# this is just a summary on variables used in this abstract class,
|
|
|
|
# the proper values/initialisation should be done in child class
|
|
|
|
self.gradient_clip_val = None
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def get_model(self):
|
|
|
|
# this is just empty shell for code from other class
|
|
|
|
pass
|
2019-10-22 01:16:51 +00:00
|
|
|
|
|
|
|
def clip_gradients(self):
|
|
|
|
if self.gradient_clip_val > 0:
|
|
|
|
model = self.get_model()
|
|
|
|
torch.nn.utils.clip_grad_norm_(model.parameters(), self.gradient_clip_val)
|
|
|
|
|
|
|
|
def print_nan_gradients(self):
|
|
|
|
model = self.get_model()
|
|
|
|
for param in model.parameters():
|
2019-12-04 12:04:58 +00:00
|
|
|
if (param.grad is not None) and torch.isnan(param.grad.float()).any():
|
2020-02-01 20:47:58 +00:00
|
|
|
log.info(param, param.grad)
|
2019-10-22 01:16:51 +00:00
|
|
|
|
|
|
|
def configure_accumulated_gradients(self, accumulate_grad_batches):
|
|
|
|
self.accumulate_grad_batches = None
|
|
|
|
|
|
|
|
if isinstance(accumulate_grad_batches, dict):
|
|
|
|
self.accumulation_scheduler = GradientAccumulationScheduler(accumulate_grad_batches)
|
|
|
|
elif isinstance(accumulate_grad_batches, int):
|
|
|
|
schedule = {1: accumulate_grad_batches}
|
|
|
|
self.accumulation_scheduler = GradientAccumulationScheduler(schedule)
|
|
|
|
else:
|
|
|
|
raise TypeError("Gradient accumulation supports only int and dict types")
|
2020-02-16 05:03:05 +00:00
|
|
|
|
|
|
|
self.accumulation_scheduler.set_trainer(self)
|