500 lines
16 KiB
Python
500 lines
16 KiB
Python
import platform
|
|
|
|
import pytest
|
|
import torch
|
|
from torch.utils.data.dataloader import DataLoader
|
|
from torch.utils.data.dataset import Subset
|
|
|
|
import tests.base.utils as tutils
|
|
from pytorch_lightning import Trainer
|
|
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
|
from tests.base import EvalModelTemplate
|
|
|
|
|
|
def test_fit_train_loader_only(tmpdir):
|
|
|
|
model = EvalModelTemplate()
|
|
train_dataloader = model.train_dataloader()
|
|
|
|
model.train_dataloader = None
|
|
model.val_dataloader = None
|
|
model.test_dataloader = None
|
|
|
|
model.validation_step = None
|
|
model.validation_epoch_end = None
|
|
|
|
model.test_step = None
|
|
model.test_epoch_end = None
|
|
|
|
trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
|
|
trainer.fit(model, train_dataloader=train_dataloader)
|
|
|
|
|
|
def test_fit_val_loader_only(tmpdir):
|
|
|
|
model = EvalModelTemplate()
|
|
train_dataloader = model.train_dataloader()
|
|
val_dataloader = model.val_dataloader()
|
|
|
|
model.train_dataloader = None
|
|
model.val_dataloader = None
|
|
model.test_dataloader = None
|
|
|
|
model.test_step = None
|
|
model.test_epoch_end = None
|
|
|
|
trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
|
|
trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader)
|
|
|
|
|
|
@pytest.mark.parametrize("dataloader_options", [
|
|
dict(train_percent_check=-0.1),
|
|
dict(train_percent_check=1.1),
|
|
dict(val_check_interval=1.1),
|
|
dict(val_check_interval=10000),
|
|
])
|
|
def test_dataloader_config_errors(tmpdir, dataloader_options):
|
|
|
|
model = EvalModelTemplate()
|
|
|
|
# fit model
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
**dataloader_options,
|
|
)
|
|
|
|
with pytest.raises(ValueError):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_multiple_val_dataloader(tmpdir):
|
|
"""Verify multiple val_dataloader."""
|
|
|
|
model = EvalModelTemplate()
|
|
model.val_dataloader = model.val_dataloader__multiple
|
|
model.validation_step = model.validation_step__multiple_dataloaders
|
|
model.validation_epoch_end = model.validation_epoch_end_multiple_dataloaders
|
|
|
|
# fit model
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_percent_check=0.1,
|
|
train_percent_check=1.0,
|
|
)
|
|
result = trainer.fit(model)
|
|
|
|
# verify training completed
|
|
assert result == 1
|
|
|
|
# verify there are 2 val loaders
|
|
assert len(trainer.val_dataloaders) == 2, \
|
|
'Multiple val_dataloaders not initiated properly'
|
|
|
|
# make sure predictions are good for each val set
|
|
for dataloader in trainer.val_dataloaders:
|
|
tutils.run_prediction(dataloader, trainer.model)
|
|
|
|
|
|
@pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific'])
|
|
def test_multiple_test_dataloader(tmpdir, ckpt_path):
|
|
"""Verify multiple test_dataloader."""
|
|
|
|
model_template = EvalModelTemplate()
|
|
|
|
class MultipleTestDataloaderModel(EvalModelTemplate):
|
|
def test_dataloader(self):
|
|
return model_template.test_dataloader__multiple()
|
|
|
|
def test_step(self, batch, batch_idx, *args, **kwargs):
|
|
return model_template.test_step__multiple_dataloaders(batch, batch_idx, *args, **kwargs)
|
|
|
|
model = MultipleTestDataloaderModel()
|
|
|
|
# fit model
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_percent_check=0.1,
|
|
train_percent_check=0.2
|
|
)
|
|
trainer.fit(model)
|
|
if ckpt_path == 'specific':
|
|
ckpt_path = trainer.checkpoint_callback.best_model_path
|
|
trainer.test(ckpt_path=ckpt_path)
|
|
|
|
# verify there are 2 test loaders
|
|
assert len(trainer.test_dataloaders) == 2, \
|
|
'Multiple test_dataloaders not initiated properly'
|
|
|
|
# make sure predictions are good for each test set
|
|
for dataloader in trainer.test_dataloaders:
|
|
tutils.run_prediction(dataloader, trainer.model)
|
|
|
|
# run the test method
|
|
trainer.test(ckpt_path=ckpt_path)
|
|
|
|
|
|
def test_train_dataloader_passed_to_fit(tmpdir):
|
|
"""Verify that train dataloader can be passed to fit """
|
|
|
|
# only train passed to fit
|
|
model = EvalModelTemplate()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_percent_check=0.1,
|
|
train_percent_check=0.2
|
|
)
|
|
fit_options = dict(train_dataloader=model.dataloader(train=True))
|
|
result = trainer.fit(model, **fit_options)
|
|
|
|
assert result == 1
|
|
|
|
|
|
def test_train_val_dataloaders_passed_to_fit(tmpdir):
|
|
""" Verify that train & val dataloader can be passed to fit """
|
|
|
|
# train, val passed to fit
|
|
model = EvalModelTemplate()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_percent_check=0.1,
|
|
train_percent_check=0.2
|
|
)
|
|
fit_options = dict(train_dataloader=model.dataloader(train=True),
|
|
val_dataloaders=model.dataloader(train=False))
|
|
|
|
result = trainer.fit(model, **fit_options)
|
|
assert result == 1
|
|
assert len(trainer.val_dataloaders) == 1, \
|
|
f'`val_dataloaders` not initiated properly, got {trainer.val_dataloaders}'
|
|
|
|
|
|
@pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific'])
|
|
def test_all_dataloaders_passed_to_fit(tmpdir, ckpt_path):
|
|
"""Verify train, val & test dataloader(s) can be passed to fit and test method"""
|
|
|
|
model = EvalModelTemplate()
|
|
|
|
# train, val and test passed to fit
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_percent_check=0.1,
|
|
train_percent_check=0.2
|
|
)
|
|
fit_options = dict(train_dataloader=model.dataloader(train=True),
|
|
val_dataloaders=model.dataloader(train=False))
|
|
result = trainer.fit(model, **fit_options)
|
|
|
|
if ckpt_path == 'specific':
|
|
ckpt_path = trainer.checkpoint_callback.best_model_path
|
|
test_options = dict(test_dataloaders=model.dataloader(train=False),
|
|
ckpt_path=ckpt_path)
|
|
trainer.test(**test_options)
|
|
|
|
assert result == 1
|
|
assert len(trainer.val_dataloaders) == 1, \
|
|
f'val_dataloaders` not initiated properly, got {trainer.val_dataloaders}'
|
|
assert len(trainer.test_dataloaders) == 1, \
|
|
f'test_dataloaders` not initiated properly, got {trainer.test_dataloaders}'
|
|
|
|
|
|
@pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific'])
|
|
def test_multiple_dataloaders_passed_to_fit(tmpdir, ckpt_path):
|
|
"""Verify that multiple val & test dataloaders can be passed to fit."""
|
|
|
|
model = EvalModelTemplate()
|
|
model.validation_step = model.validation_step__multiple_dataloaders
|
|
model.validation_epoch_end = model.validation_epoch_end_multiple_dataloaders
|
|
model.test_step = model.test_step__multiple_dataloaders
|
|
|
|
# train, multiple val and multiple test passed to fit
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_percent_check=0.1,
|
|
train_percent_check=0.2
|
|
)
|
|
fit_options = dict(train_dataloader=model.dataloader(train=True),
|
|
val_dataloaders=[model.dataloader(train=False),
|
|
model.dataloader(train=False)])
|
|
trainer.fit(model, **fit_options)
|
|
if ckpt_path == 'specific':
|
|
ckpt_path = trainer.checkpoint_callback.best_model_path
|
|
test_options = dict(test_dataloaders=[model.dataloader(train=False),
|
|
model.dataloader(train=False)],
|
|
ckpt_path=ckpt_path)
|
|
trainer.test(**test_options)
|
|
|
|
assert len(trainer.val_dataloaders) == 2, \
|
|
f'Multiple `val_dataloaders` not initiated properly, got {trainer.val_dataloaders}'
|
|
assert len(trainer.test_dataloaders) == 2, \
|
|
f'Multiple `test_dataloaders` not initiated properly, got {trainer.test_dataloaders}'
|
|
|
|
|
|
@pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific'])
|
|
def test_mixing_of_dataloader_options(tmpdir, ckpt_path):
|
|
"""Verify that dataloaders can be passed to fit"""
|
|
|
|
model = EvalModelTemplate()
|
|
|
|
trainer_options = dict(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_percent_check=0.1,
|
|
train_percent_check=0.2
|
|
)
|
|
|
|
# fit model
|
|
trainer = Trainer(**trainer_options)
|
|
results = trainer.fit(model, val_dataloaders=model.dataloader(train=False))
|
|
assert results
|
|
|
|
# fit model
|
|
trainer = Trainer(**trainer_options)
|
|
results = trainer.fit(model, val_dataloaders=model.dataloader(train=False))
|
|
assert results
|
|
if ckpt_path == 'specific':
|
|
ckpt_path = trainer.checkpoint_callback.best_model_path
|
|
trainer.test(test_dataloaders=model.dataloader(train=False), ckpt_path=ckpt_path)
|
|
|
|
assert len(trainer.val_dataloaders) == 1, \
|
|
f'`val_dataloaders` not initiated properly, got {trainer.val_dataloaders}'
|
|
assert len(trainer.test_dataloaders) == 1, \
|
|
f'`test_dataloaders` not initiated properly, got {trainer.test_dataloaders}'
|
|
|
|
|
|
@pytest.mark.skip('TODO: speed up this test')
|
|
def test_train_inf_dataloader_error(tmpdir):
|
|
"""Test inf train data loader (e.g. IterableDataset)"""
|
|
model = EvalModelTemplate()
|
|
model.train_dataloader = model.train_dataloader__infinite
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, val_check_interval=0.5)
|
|
|
|
with pytest.raises(MisconfigurationException, match='infinite DataLoader'):
|
|
trainer.fit(model)
|
|
|
|
|
|
@pytest.mark.skip('TODO: speed up this test')
|
|
def test_val_inf_dataloader_error(tmpdir):
|
|
"""Test inf train data loader (e.g. IterableDataset)"""
|
|
model = EvalModelTemplate()
|
|
model.val_dataloader = model.val_dataloader__infinite
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.5)
|
|
|
|
with pytest.raises(MisconfigurationException, match='infinite DataLoader'):
|
|
trainer.fit(model)
|
|
|
|
|
|
@pytest.mark.skip('TODO: speed up this test')
|
|
def test_test_inf_dataloader_error(tmpdir):
|
|
"""Test inf train data loader (e.g. IterableDataset)"""
|
|
model = EvalModelTemplate()
|
|
model.test_dataloader = model.test_dataloader__infinite
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, test_percent_check=0.5)
|
|
|
|
with pytest.raises(MisconfigurationException, match='infinite DataLoader'):
|
|
trainer.test(model)
|
|
|
|
|
|
@pytest.mark.parametrize('check_interval', [50, 1.0])
|
|
@pytest.mark.skip('TODO: speed up this test')
|
|
def test_inf_train_dataloader(tmpdir, check_interval):
|
|
"""Test inf train data loader (e.g. IterableDataset)"""
|
|
|
|
model = EvalModelTemplate()
|
|
model.train_dataloader = model.train_dataloader__infinite
|
|
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_check_interval=check_interval
|
|
)
|
|
result = trainer.fit(model)
|
|
# verify training completed
|
|
assert result == 1
|
|
|
|
|
|
@pytest.mark.parametrize('check_interval', [1.0])
|
|
@pytest.mark.skip('TODO: speed up this test')
|
|
def test_inf_val_dataloader(tmpdir, check_interval):
|
|
"""Test inf val data loader (e.g. IterableDataset)"""
|
|
|
|
model = EvalModelTemplate()
|
|
model.val_dataloader = model.val_dataloader__infinite
|
|
|
|
# logger file to get meta
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_check_interval=check_interval,
|
|
)
|
|
result = trainer.fit(model)
|
|
|
|
# verify training completed
|
|
assert result == 1
|
|
|
|
|
|
def test_error_on_zero_len_dataloader(tmpdir):
|
|
""" Test that error is raised if a zero-length dataloader is defined """
|
|
|
|
model = EvalModelTemplate()
|
|
model.train_dataloader = model.train_dataloader__zero_length
|
|
|
|
# fit model
|
|
with pytest.raises(ValueError):
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
train_percent_check=0.1,
|
|
val_percent_check=0.1,
|
|
test_percent_check=0.1
|
|
)
|
|
trainer.fit(model)
|
|
|
|
|
|
@pytest.mark.skipif(platform.system() == 'Windows', reason='Does not apply to Windows platform.')
|
|
@pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific'])
|
|
def test_warning_with_few_workers(tmpdir, ckpt_path):
|
|
""" Test that error is raised if dataloader with only a few workers is used """
|
|
|
|
model = EvalModelTemplate()
|
|
|
|
# logger file to get meta
|
|
trainer_options = dict(
|
|
default_root_dir=tmpdir,
|
|
max_epochs=1,
|
|
val_percent_check=0.1,
|
|
train_percent_check=0.2
|
|
)
|
|
|
|
train_dl = model.dataloader(train=True)
|
|
train_dl.num_workers = 0
|
|
|
|
val_dl = model.dataloader(train=False)
|
|
val_dl.num_workers = 0
|
|
|
|
train_dl = model.dataloader(train=False)
|
|
train_dl.num_workers = 0
|
|
|
|
fit_options = dict(train_dataloader=train_dl,
|
|
val_dataloaders=val_dl)
|
|
trainer = Trainer(**trainer_options)
|
|
|
|
# fit model
|
|
with pytest.warns(UserWarning, match='train'):
|
|
trainer.fit(model, **fit_options)
|
|
|
|
with pytest.warns(UserWarning, match='val'):
|
|
trainer.fit(model, **fit_options)
|
|
|
|
if ckpt_path == 'specific':
|
|
ckpt_path = trainer.checkpoint_callback.best_model_path
|
|
test_options = dict(test_dataloaders=train_dl, ckpt_path=ckpt_path)
|
|
with pytest.warns(UserWarning, match='test'):
|
|
trainer.test(**test_options)
|
|
|
|
|
|
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason='Test requires multiple GPUs')
|
|
def test_dataloader_reinit_for_subclass():
|
|
|
|
class CustomDataLoader(torch.utils.data.DataLoader):
|
|
def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None,
|
|
batch_sampler=None, num_workers=0, collate_fn=None,
|
|
pin_memory=False, drop_last=False, timeout=0,
|
|
worker_init_fn=None, dummy_kwarg=None):
|
|
super().__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
|
|
num_workers, collate_fn, pin_memory, drop_last, timeout,
|
|
worker_init_fn)
|
|
|
|
self.dummy_kwarg = dummy_kwarg
|
|
|
|
trainer = Trainer(
|
|
gpus=[0, 1],
|
|
num_nodes=1,
|
|
distributed_backend='ddp',
|
|
)
|
|
|
|
class CustomDummyObj:
|
|
sampler = None
|
|
|
|
result = trainer.auto_add_sampler(CustomDummyObj(), train=True)
|
|
assert isinstance(result, CustomDummyObj), "Wrongly reinstantiated data loader"
|
|
|
|
result = trainer.auto_add_sampler(CustomDataLoader(list(range(1000))), train=True)
|
|
assert isinstance(result, torch.utils.data.DataLoader)
|
|
assert isinstance(result, CustomDataLoader)
|
|
assert hasattr(result, 'dummy_kwarg')
|
|
|
|
# Shuffled DataLoader should also work
|
|
result = trainer.auto_add_sampler(CustomDataLoader(list(range(1000)), shuffle=True), train=True)
|
|
assert isinstance(result, torch.utils.data.DataLoader)
|
|
assert isinstance(result, CustomDataLoader)
|
|
assert hasattr(result, 'dummy_kwarg')
|
|
|
|
class CustomSampler(torch.utils.data.Sampler):
|
|
pass
|
|
|
|
# Should raise an error if existing sampler is being replaced
|
|
with pytest.raises(MisconfigurationException, match='DistributedSampler'):
|
|
trainer.auto_add_sampler(
|
|
CustomDataLoader(list(range(1000)), sampler=CustomSampler(list(range(1000)))), train=True)
|
|
|
|
|
|
@pytest.mark.skipif(torch.cuda.device_count() < 3, reason='Test requires multiple GPUs')
|
|
def test_batch_size_smaller_than_num_gpus():
|
|
# we need at least 3 gpus for this test
|
|
num_gpus = 3
|
|
batch_size = 3
|
|
|
|
class CurrentTestModel(EvalModelTemplate):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
# batch norm doesn't work with batch size 1, we replace it
|
|
self.c_d1_bn = torch.nn.ReLU()
|
|
|
|
def training_step(self, *args, **kwargs):
|
|
output = super().training_step(*args, **kwargs)
|
|
loss = output['loss']
|
|
# we make sure to add some metrics to the output dict,
|
|
# this is essential for this test
|
|
output['progress_bar'] = {'train_loss': loss}
|
|
return output
|
|
|
|
def train_dataloader(self):
|
|
dataloader = super().train_dataloader()
|
|
# construct a dataset with a size that is not divisible by num_gpus
|
|
# therefore the last batch will have a size < num_gpus
|
|
size = num_gpus * batch_size + (num_gpus - 1)
|
|
dataset = Subset(dataloader.dataset, range(size))
|
|
dataloader = DataLoader(
|
|
dataset,
|
|
batch_size=self.batch_size,
|
|
drop_last=False,
|
|
)
|
|
return dataloader
|
|
|
|
hparams = EvalModelTemplate.get_default_hparams()
|
|
hparams['batch_size'] = batch_size
|
|
model = CurrentTestModel(**hparams)
|
|
|
|
trainer = Trainer(
|
|
max_epochs=1,
|
|
train_percent_check=0.1,
|
|
val_percent_check=0,
|
|
gpus=num_gpus,
|
|
)
|
|
|
|
# we expect the reduction for the metrics also to happen on the last batch
|
|
# where we will get fewer metrics than gpus
|
|
result = trainer.fit(model)
|
|
assert 1 == result
|