lightning/pytorch_lightning/testing/lm_test_module_mixins.py

382 lines
12 KiB
Python
Raw Normal View History

Refactor test modules (#180) * Expectopatronum implement #89 (#182) * rename validate -> evaluate; implement test logic; allow multiple test_loaders * add test_step and test_end to LightningModule * add in_test_mode to pretraining to implement case 2 (test pretrained model) * fix code style issues * LightningTestModel: add optional second test set, implement test_step and test_end * implemented test for multiple test_dataloaders; fixed typo * add two test cases for #89 * add documentation for test_step, test_end; fix computation of loss in validation_step example * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Added proper dp ddp routing calls for test mode * Update trainer.py * Update test_models.py * Update trainer.py * Update trainer.py * Update override_data_parallel.py * Update test_models.py * Update test_models.py * Update trainer.py * Update trainer.py * Update trainer.py * Update test_models.py * Update test_models.py * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * Update trainer.py * Update override_data_parallel.py * Update debug.py * Update lm_test_module.py * Update test_models.py * release v0.4.8 * Update README.md * add training loop docs * testing loop docs * testing loop docs * Convert __dataloader to _dataloader This will let inherited classes use it * Factor common test model setup into base class * Specialized test modules inherit from LightningTestModelBase * Fix __is_overriden so that it works with more complicated inheritance * Use mixins to add functionality to test models * Fix test with no val_dataloader * Remove unused imports * Get rid of wild card import * Update trainer.py * Update lm_test_module.py
2019-09-02 19:46:16 +00:00
import os
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
from torchvision.datasets import MNIST
from torchvision import transforms
from test_tube import HyperOptArgumentParser
from pytorch_lightning.root_module.root_module import LightningModule
from pytorch_lightning import data_loader
class LightningValidationStepMixin:
"""
Add val_dataloader and validation_step methods for the case
when val_dataloader returns a single dataloader
"""
@data_loader
def val_dataloader(self):
return self._dataloader(train=False)
def validation_step(self, data_batch, batch_i):
"""
Lightning calls this inside the validation loop
:param data_batch:
:return:
"""
x, y = data_batch
x = x.view(x.size(0), -1)
y_hat = self.forward(x)
loss_val = self.loss(y, y_hat)
# acc
labels_hat = torch.argmax(y_hat, dim=1)
val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
val_acc = torch.tensor(val_acc)
if self.on_gpu:
val_acc = val_acc.cuda(loss_val.device.index)
# in DP mode (default) make sure if result is scalar, there's another dim in the beginning
if self.trainer.use_dp:
loss_val = loss_val.unsqueeze(0)
val_acc = val_acc.unsqueeze(0)
# alternate possible outputs to test
if batch_i % 1 == 0:
output = OrderedDict({
'val_loss': loss_val,
'val_acc': val_acc,
})
return output
if batch_i % 2 == 0:
return val_acc
if batch_i % 3 == 0:
output = OrderedDict({
'val_loss': loss_val,
'val_acc': val_acc,
'test_dic': {'val_loss_a': loss_val}
})
return output
class LightningValidationMixin(LightningValidationStepMixin):
"""
Add val_dataloader, validation_step, and validation_end methods for the case
when val_dataloader returns a single dataloader
"""
def validation_end(self, outputs):
"""
Called at the end of validation to aggregate outputs
:param outputs: list of individual outputs of each validation step
:return:
"""
# if returned a scalar from validation_step, outputs is a list of tensor scalars
# we return just the average in this case (if we want)
# return torch.stack(outputs).mean()
val_loss_mean = 0
val_acc_mean = 0
for output in outputs:
val_loss = output['val_loss']
# reduce manually when using dp
if self.trainer.use_dp:
val_loss = torch.mean(val_loss)
val_loss_mean += val_loss
# reduce manually when using dp
val_acc = output['val_acc']
if self.trainer.use_dp:
val_acc = torch.mean(val_acc)
val_acc_mean += val_acc
val_loss_mean /= len(outputs)
val_acc_mean /= len(outputs)
tqdm_dic = {'val_loss': val_loss_mean.item(), 'val_acc': val_acc_mean.item()}
return tqdm_dic
class LightningValidationStepMultipleDataloadersMixin:
"""
Add val_dataloader and validation_step methods for the case
when val_dataloader returns multiple dataloaders
"""
@data_loader
def val_dataloader(self):
return [self._dataloader(train=False), self._dataloader(train=False)]
def validation_step(self, data_batch, batch_i, dataloader_i):
"""
Lightning calls this inside the validation loop
:param data_batch:
:return:
"""
x, y = data_batch
x = x.view(x.size(0), -1)
y_hat = self.forward(x)
loss_val = self.loss(y, y_hat)
# acc
labels_hat = torch.argmax(y_hat, dim=1)
val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
val_acc = torch.tensor(val_acc)
if self.on_gpu:
val_acc = val_acc.cuda(loss_val.device.index)
# in DP mode (default) make sure if result is scalar, there's another dim in the beginning
if self.trainer.use_dp:
loss_val = loss_val.unsqueeze(0)
val_acc = val_acc.unsqueeze(0)
# alternate possible outputs to test
if batch_i % 1 == 0:
output = OrderedDict({
'val_loss': loss_val,
'val_acc': val_acc,
})
return output
if batch_i % 2 == 0:
return val_acc
if batch_i % 3 == 0:
output = OrderedDict({
'val_loss': loss_val,
'val_acc': val_acc,
'test_dic': {'val_loss_a': loss_val}
})
return output
if batch_i % 5 == 0:
output = OrderedDict({
f'val_loss_{dataloader_i}': loss_val,
f'val_acc_{dataloader_i}': val_acc,
})
return output
class LightningValidationMultipleDataloadersMixin(LightningValidationStepMultipleDataloadersMixin):
"""
Add val_dataloader, validation_step, and validation_end methods for the case
when val_dataloader returns multiple dataloaders
"""
def validation_end(self, outputs):
"""
Called at the end of validation to aggregate outputs
:param outputs: list of individual outputs of each validation step
:return:
"""
# if returned a scalar from validation_step, outputs is a list of tensor scalars
# we return just the average in this case (if we want)
# return torch.stack(outputs).mean()
val_loss_mean = 0
val_acc_mean = 0
for output in outputs:
val_loss = output['val_loss']
# reduce manually when using dp
if self.trainer.use_dp:
val_loss = torch.mean(val_loss)
val_loss_mean += val_loss
# reduce manually when using dp
val_acc = output['val_acc']
if self.trainer.use_dp:
val_acc = torch.mean(val_acc)
val_acc_mean += val_acc
val_loss_mean /= len(outputs)
val_acc_mean /= len(outputs)
tqdm_dic = {'val_loss': val_loss_mean.item(), 'val_acc': val_acc_mean.item()}
return tqdm_dic
class LightningTestStepMixin:
@data_loader
def test_dataloader(self):
return self._dataloader(train=False)
def test_step(self, data_batch, batch_i):
"""
Lightning calls this inside the validation loop
:param data_batch:
:return:
"""
x, y = data_batch
x = x.view(x.size(0), -1)
y_hat = self.forward(x)
loss_test = self.loss(y, y_hat)
# acc
labels_hat = torch.argmax(y_hat, dim=1)
test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
test_acc = torch.tensor(test_acc)
if self.on_gpu:
test_acc = test_acc.cuda(loss_test.device.index)
# in DP mode (default) make sure if result is scalar, there's another dim in the beginning
if self.trainer.use_dp:
loss_test = loss_test.unsqueeze(0)
test_acc = test_acc.unsqueeze(0)
# alternate possible outputs to test
if batch_i % 1 == 0:
output = OrderedDict({
'test_loss': loss_test,
'test_acc': test_acc,
})
return output
if batch_i % 2 == 0:
return test_acc
if batch_i % 3 == 0:
output = OrderedDict({
'test_loss': loss_test,
'test_acc': test_acc,
'test_dic': {'test_loss_a': loss_test}
})
return output
class LightningTestMixin(LightningTestStepMixin):
def test_end(self, outputs):
"""
Called at the end of validation to aggregate outputs
:param outputs: list of individual outputs of each validation step
:return:
"""
# if returned a scalar from test_step, outputs is a list of tensor scalars
# we return just the average in this case (if we want)
# return torch.stack(outputs).mean()
test_loss_mean = 0
test_acc_mean = 0
for output in outputs:
test_loss = output['test_loss']
# reduce manually when using dp
if self.trainer.use_dp:
test_loss = torch.mean(test_loss)
test_loss_mean += test_loss
# reduce manually when using dp
test_acc = output['test_acc']
if self.trainer.use_dp:
test_acc = torch.mean(test_acc)
test_acc_mean += test_acc
test_loss_mean /= len(outputs)
test_acc_mean /= len(outputs)
tqdm_dic = {'test_loss': test_loss_mean.item(), 'test_acc': test_acc_mean.item()}
return tqdm_dic
class LightningTestStepMultipleDataloadersMixin:
@data_loader
def test_dataloader(self):
return [self._dataloader(train=False), self._dataloader(train=False)]
def test_step(self, data_batch, batch_i, dataloader_i):
"""
Lightning calls this inside the validation loop
:param data_batch:
:return:
"""
x, y = data_batch
x = x.view(x.size(0), -1)
y_hat = self.forward(x)
loss_test = self.loss(y, y_hat)
# acc
labels_hat = torch.argmax(y_hat, dim=1)
test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
test_acc = torch.tensor(test_acc)
if self.on_gpu:
test_acc = test_acc.cuda(loss_test.device.index)
# in DP mode (default) make sure if result is scalar, there's another dim in the beginning
if self.trainer.use_dp:
loss_test = loss_test.unsqueeze(0)
test_acc = test_acc.unsqueeze(0)
# alternate possible outputs to test
if batch_i % 1 == 0:
output = OrderedDict({
'test_loss': loss_test,
'test_acc': test_acc,
})
return output
if batch_i % 2 == 0:
return test_acc
if batch_i % 3 == 0:
output = OrderedDict({
'test_loss': loss_test,
'test_acc': test_acc,
'test_dic': {'test_loss_a': loss_test}
})
return output
if batch_i % 5 == 0:
output = OrderedDict({
f'test_loss_{dataloader_i}': loss_test,
f'test_acc_{dataloader_i}': test_acc,
})
return output
class LightningTestMultipleDataloadersMixin(LightningTestStepMultipleDataloadersMixin):
def test_end(self, outputs):
"""
Called at the end of validation to aggregate outputs
:param outputs: list of individual outputs of each validation step
:return:
"""
# if returned a scalar from test_step, outputs is a list of tensor scalars
# we return just the average in this case (if we want)
# return torch.stack(outputs).mean()
test_loss_mean = 0
test_acc_mean = 0
for output in outputs:
test_loss = output['test_loss']
# reduce manually when using dp
if self.trainer.use_dp:
test_loss = torch.mean(test_loss)
test_loss_mean += test_loss
# reduce manually when using dp
test_acc = output['test_acc']
if self.trainer.use_dp:
test_acc = torch.mean(test_acc)
test_acc_mean += test_acc
test_loss_mean /= len(outputs)
test_acc_mean /= len(outputs)
tqdm_dic = {'test_loss': test_loss_mean.item(), 'test_acc': test_acc_mean.item()}
return tqdm_dic