default test logger (#1478)

* default test logger

* fix tests

* spawn

* try

* simplify tests

* simplify tests

* formatting

* loggers

* loggers

* revert to TestTube

* default

* default

* wraps

* world size

* optim imports
This commit is contained in:
Jirka Borovec 2020-04-22 02:33:10 +02:00 committed by GitHub
parent bafdeca42f
commit c1c6e3b6c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 136 additions and 264 deletions

View File

@ -102,10 +102,18 @@ class TrainerDataLoadingMixin(ABC):
sampler = DistributedSampler(
dataloader.dataset,
num_replicas=xm.xrt_world_size(),
rank=xm.get_ordinal()
rank=xm.get_ordinal(),
)
else:
sampler = DistributedSampler(dataloader.dataset)
world_size = {
'ddp': self.num_nodes * self.num_processes,
'ddp2': self.num_nodes,
}
sampler = DistributedSampler(
dataloader.dataset,
num_replicas=world_size.get(self.distributed_backend, 0),
rank=self.proc_rank,
)
dl_args['sampler'] = sampler
dataloader = type(dataloader)(**dl_args)

View File

@ -23,11 +23,7 @@ from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
from pytorch_lightning.trainer.deprecated_api import TrainerDeprecatedAPITillVer0_8, TrainerDeprecatedAPITillVer0_9
from pytorch_lightning.trainer.distrib_data_parallel import TrainerDDPMixin
from pytorch_lightning.trainer.distrib_parts import (
TrainerDPMixin,
parse_gpu_ids,
determine_root_gpu_device,
pick_multiple_gpus,
)
TrainerDPMixin, parse_gpu_ids, determine_root_gpu_device, pick_multiple_gpus)
from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin
from pytorch_lightning.trainer.logging import TrainerLoggingMixin
from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin
@ -736,13 +732,10 @@ class Trainer(
self.ddp_train(task, model)
else:
self.__set_random_port()
# track for predict
self.model = model
# train
mp.spawn(self.ddp_train, nprocs=self.num_processes, args=(model,))
# load weights if not interrupted
self.load_spawn_weights(model)
self.model = model

View File

@ -1,3 +1,18 @@
import os
import numpy as np
import torch
TEST_ROOT = os.path.dirname(__file__)
PACKAGE_ROOT = os.path.dirname(TEST_ROOT)
TEMP_PATH = os.path.join(PACKAGE_ROOT, 'test_temp')
# generate a list of random seeds for each test
RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000))
ROOT_SEED = 1234
torch.manual_seed(ROOT_SEED)
np.random.seed(ROOT_SEED)
RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000))
if not os.path.isdir(TEMP_PATH):
os.mkdir(TEMP_PATH)

View File

@ -2,7 +2,6 @@
import torch
from tests.base.models import TestModelBase, DictHparamsModel
from tests.base.eval_model_template import EvalModelTemplate
from tests.base.mixins import (
LightEmptyTestStep,
@ -31,6 +30,7 @@ from tests.base.mixins import (
LightTestNoneOptimizerMixin,
LightZeroLenDataloader
)
from tests.base.models import TestModelBase, DictHparamsModel
class LightningTestModel(LightTrainDataloader,

View File

@ -7,10 +7,10 @@ import torch
from torch import Tensor
from torch.utils.data import Dataset
from tests import TEST_ROOT
from tests import PACKAGE_ROOT
#: local path to test datasets
PATH_DATASETS = os.path.join(TEST_ROOT, 'Datasets')
PATH_DATASETS = os.path.join(PACKAGE_ROOT, 'Datasets')
class MNIST(Dataset):

View File

@ -7,7 +7,7 @@ from tests.base.datasets import TrialMNIST
# from test_models import assert_ok_test_acc, load_model, \
# clear_save_dir, get_default_testtube_logger, get_default_hparams, init_save_dir, \
# clear_save_dir, get_default_logger, get_default_hparams, init_save_dir, \
# init_checkpoint_callback, reset_seed, set_random_master_port

View File

@ -2,18 +2,18 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
from tests.base.datasets import TrialMNIST
from pytorch_lightning.core.lightning import LightningModule
from tests.base.datasets import TrialMNIST
from tests.base.eval_model_optimizers import ConfigureOptimizersPool
from tests.base.eval_model_test_dataloaders import TestDataloaderVariations
from tests.base.eval_model_test_epoch_ends import TestEpochEndVariations
from tests.base.eval_model_test_steps import TestStepVariations
from tests.base.eval_model_train_dataloaders import TrainDataloaderVariations
from tests.base.eval_model_train_steps import TrainingStepVariations
from tests.base.eval_model_utils import ModelTemplateUtils
from tests.base.eval_model_valid_dataloaders import ValDataloaderVariations
from tests.base.eval_model_valid_epoch_ends import ValidationEpochEndVariations
from tests.base.eval_model_valid_steps import ValidationStepVariations
from tests.base.eval_model_utils import ModelTemplateUtils
class EvalModelTemplate(

View File

@ -1,4 +1,5 @@
from torch.utils.data import DataLoader
from tests.base.datasets import TrialMNIST

View File

@ -1,5 +1,6 @@
from abc import ABC
from collections import OrderedDict
import torch

View File

@ -1,4 +1,3 @@
import os
from collections import OrderedDict
from typing import Dict

View File

@ -7,18 +7,11 @@ import torch
# from pl_examples import LightningTemplateModel
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TestTubeLogger, TensorBoardLogger
from tests.base import LightningTestModel, EvalModelTemplate
from pytorch_lightning.loggers import TensorBoardLogger
from tests import TEMP_PATH, RANDOM_PORTS, RANDOM_SEEDS
from tests.base import LightningTestModel
from tests.base.datasets import PATH_DATASETS
# generate a list of random seeds for each test
RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000))
ROOT_SEED = 1234
torch.manual_seed(ROOT_SEED)
np.random.seed(ROOT_SEED)
RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000))
ROOT_PATH = os.path.abspath(os.path.dirname(__file__))
def assert_speed_parity(pl_times, pt_times, num_epochs):
@ -33,7 +26,7 @@ def assert_speed_parity(pl_times, pt_times, num_epochs):
f"lightning was slower than PT (threshold {max_diff_per_epoch})"
def run_model_test_no_loggers(trainer_options, model, min_acc=0.50):
def run_model_test_without_loggers(trainer_options, model, min_acc=0.50):
# save_dir = trainer_options['default_root_dir']
# fit model
@ -66,14 +59,16 @@ def run_model_test(trainer_options, model, on_gpu=True):
save_dir = trainer_options['default_root_dir']
# logger file to get meta
logger = get_default_testtube_logger(save_dir, False)
logger = get_default_logger(save_dir)
# logger file to get weights
checkpoint = init_checkpoint_callback(logger)
# add these to the trainer options
trainer_options['checkpoint_callback'] = checkpoint
trainer_options['logger'] = logger
trainer_options.update(
checkpoint_callback=checkpoint,
logger=logger,
)
# fit model
trainer = Trainer(**trainer_options)
@ -118,8 +113,10 @@ def get_default_hparams(continue_training=False, hpc_exp_number=0):
}
if continue_training:
args['test_tube_do_checkpoint_load'] = True
args['hpc_exp_number'] = hpc_exp_number
args.update(
test_tube_do_checkpoint_load=True,
hpc_exp_number=hpc_exp_number,
)
hparams = Namespace(**args)
return hparams
@ -137,9 +134,9 @@ def get_default_model(lbfgs=False):
return model, hparams
def get_default_testtube_logger(save_dir, debug=True, version=None):
def get_default_logger(save_dir, version=None):
# set up logger object without actually saving logs
logger = TestTubeLogger(save_dir, name='lightning_logs', debug=debug, version=version)
logger = TensorBoardLogger(save_dir, name='lightning_logs', version=version)
return logger
@ -153,7 +150,10 @@ def get_data_path(expt_logger, path_dir=None):
return expt.get_data_path(name, version)
# the other experiments...
if not path_dir:
path_dir = ROOT_PATH
if hasattr(expt_logger, 'save_dir') and expt_logger.save_dir:
path_dir = expt_logger.save_dir
else:
path_dir = TEMP_PATH
path_expt = os.path.join(path_dir, name, 'version_%s' % version)
# try if the new sub-folder exists, typical case for test-tube
if not os.path.isdir(path_expt):
@ -161,9 +161,9 @@ def get_data_path(expt_logger, path_dir=None):
return path_expt
def load_model(exp, root_weights_dir, module_class=LightningTestModel, path_expt=None):
def load_model(logger, root_weights_dir, module_class=LightningTestModel, path_expt=None):
# load trained model
path_expt_dir = get_data_path(exp, path_dir=path_expt)
path_expt_dir = get_data_path(logger, path_dir=path_expt)
tags_path = os.path.join(path_expt_dir, TensorBoardLogger.NAME_CSV_TAGS)
checkpoints = [x for x in os.listdir(root_weights_dir) if '.ckpt' in x]

View File

@ -1,3 +1,5 @@
from functools import wraps
import pytest
import torch.multiprocessing as mp
@ -7,10 +9,6 @@ def pytest_configure(config):
config.addinivalue_line("markers", "spawn: spawn test in a separate process using torch.multiprocessing.spawn")
def wrap(i, fn, args):
return fn(*args)
@pytest.mark.tryfirst
def pytest_pyfunc_call(pyfuncitem):
if pyfuncitem.get_closest_marker("spawn"):
@ -18,5 +16,5 @@ def pytest_pyfunc_call(pyfuncitem):
funcargs = pyfuncitem.funcargs
testargs = tuple([funcargs[arg] for arg in pyfuncitem._fixtureinfo.argnames])
mp.spawn(wrap, (testfunction, testargs))
mp.spawn(wraps, (testfunction, testargs))
return True

View File

@ -7,7 +7,6 @@ import tests.base.utils as tutils
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import (
TensorBoardLogger, MLFlowLogger, NeptuneLogger, TestTubeLogger, CometLogger)
from tests.base import LightningTestModel
def _get_logger_args(logger_class, save_dir):

View File

@ -11,66 +11,54 @@ from tests.base import (
)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_amp_single_gpu(tmpdir):
"""Make sure DDP + AMP work."""
tutils.reset_seed()
hparams = tutils.get_default_hparams()
model = LightningTestModel(hparams)
trainer_options = dict(
default_root_dir=tmpdir,
max_epochs=1,
gpus=1,
distributed_backend='ddp',
precision=16
)
tutils.run_model_test(trainer_options, model)
@pytest.mark.spawn
@pytest.mark.parametrize("backend", ['dp', 'ddp'])
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_no_amp_single_gpu(tmpdir):
"""Make sure DDP + AMP work."""
def test_amp_single_gpu(tmpdir, backend):
"""Make sure DP/DDP + AMP work."""
tutils.reset_seed()
hparams = tutils.get_default_hparams()
model = LightningTestModel(hparams)
model, hparams = tutils.get_default_model()
trainer_options = dict(
default_root_dir=tmpdir,
max_epochs=1,
gpus=1,
distributed_backend='dp',
distributed_backend=backend,
precision=16
)
# tutils.run_model_test(trainer_options, model)
trainer = Trainer(**trainer_options)
result = trainer.fit(model)
assert result == 1
@pytest.mark.spawn
@pytest.mark.parametrize("backend", ['dp', 'ddp'])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_amp_gpu_ddp(tmpdir):
"""Make sure DDP + AMP work."""
def test_amp_multi_gpu(tmpdir, backend):
"""Make sure DP/DDP + AMP work."""
tutils.reset_seed()
tutils.set_random_master_port()
hparams = tutils.get_default_hparams()
model = LightningTestModel(hparams)
model, hparams = tutils.get_default_model()
trainer_options = dict(
default_root_dir=tmpdir,
max_epochs=1,
gpus=2,
distributed_backend='ddp',
# gpus=2,
gpus='0, 1', # test init with gpu string
distributed_backend=backend,
precision=16
)
tutils.run_model_test(trainer_options, model)
# tutils.run_model_test(trainer_options, model)
trainer = Trainer(**trainer_options)
result = trainer.fit(model)
assert result
@pytest.mark.spawn
@ -94,7 +82,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir):
)
# exp file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
# exp file to get weights
checkpoint = tutils.init_checkpoint_callback(logger)
@ -125,7 +113,6 @@ def test_cpu_model_with_amp(tmpdir):
trainer_options = dict(
default_root_dir=tmpdir,
progress_bar_refresh_rate=0,
logger=tutils.get_default_testtube_logger(tmpdir),
max_epochs=1,
train_percent_check=0.4,
val_percent_check=0.4,
@ -136,28 +123,3 @@ def test_cpu_model_with_amp(tmpdir):
with pytest.raises((MisconfigurationException, ModuleNotFoundError)):
tutils.run_model_test(trainer_options, model, on_gpu=False)
@pytest.mark.spawn
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_amp_gpu_dp(tmpdir):
"""Make sure DP + AMP work."""
tutils.reset_seed()
model, hparams = tutils.get_default_model()
trainer_options = dict(
default_root_dir=tmpdir,
max_epochs=1,
gpus='0, 1', # test init with gpu string
distributed_backend='dp',
precision=16
)
trainer = Trainer(**trainer_options)
result = trainer.fit(model)
assert result == 1
if __name__ == '__main__':
pytest.main([__file__])

View File

@ -1,5 +1,4 @@
import platform
import warnings
import pytest
import torch
@ -29,7 +28,6 @@ def test_early_stopping_cpu_model(tmpdir):
gradient_clip_val=1.0,
overfit_pct=0.20,
track_grad_norm=2,
logger=tutils.get_default_testtube_logger(tmpdir),
train_percent_check=0.1,
val_percent_check=0.1,
)
@ -42,6 +40,7 @@ def test_early_stopping_cpu_model(tmpdir):
model.unfreeze()
@pytest.mark.spawn
@pytest.mark.skipif(platform.system() == "Windows",
reason="Distributed training is not supported on Windows")
@pytest.mark.skipif((platform.system() == "Darwin" and
@ -81,7 +80,7 @@ def test_lbfgs_cpu_model(tmpdir):
)
model, hparams = tutils.get_default_model(lbfgs=True)
tutils.run_model_test_no_loggers(trainer_options, model, min_acc=0.5)
tutils.run_model_test_without_loggers(trainer_options, model, min_acc=0.5)
def test_default_logger_callbacks_cpu_model(tmpdir):
@ -99,7 +98,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir):
)
model, hparams = tutils.get_default_model()
tutils.run_model_test_no_loggers(trainer_options, model)
tutils.run_model_test_without_loggers(trainer_options, model)
# test freeze on cpu
model.freeze()
@ -114,7 +113,7 @@ def test_running_test_after_fitting(tmpdir):
model = LightningTestModel(hparams)
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
# logger file to get weights
checkpoint = tutils.init_checkpoint_callback(logger)
@ -142,7 +141,7 @@ def test_running_test_after_fitting(tmpdir):
tutils.assert_ok_model_acc(trainer, thr=0.5)
def test_running_test_without_val(tmpdir):
def test_running_test_no_val(tmpdir):
"""Verify `test()` works on a model with no `val_loader`."""
tutils.reset_seed()
@ -153,7 +152,7 @@ def test_running_test_without_val(tmpdir):
model = CurrentTestModel(hparams)
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
# logger file to get weights
checkpoint = tutils.init_checkpoint_callback(logger)
@ -253,7 +252,6 @@ def test_cpu_model(tmpdir):
trainer_options = dict(
default_root_dir=tmpdir,
progress_bar_refresh_rate=0,
logger=tutils.get_default_testtube_logger(tmpdir),
max_epochs=1,
train_percent_check=0.4,
val_percent_check=0.4
@ -274,7 +272,6 @@ def test_all_features_cpu_model(tmpdir):
overfit_pct=0.20,
track_grad_norm=2,
progress_bar_refresh_rate=0,
logger=tutils.get_default_testtube_logger(tmpdir),
accumulate_grad_batches=2,
max_epochs=1,
train_percent_check=0.4,

View File

@ -7,39 +7,17 @@ import tests.base.utils as tutils
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.core import memory
from pytorch_lightning.trainer.distrib_parts import (
parse_gpu_ids,
determine_root_gpu_device,
)
from pytorch_lightning.trainer.distrib_parts import parse_gpu_ids, determine_root_gpu_device
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.base import LightningTestModel
PRETEND_N_OF_GPUS = 16
@pytest.mark.spawn
@pytest.mark.parametrize("backend", ['dp', 'ddp', 'ddp2'])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp2(tmpdir):
"""Make sure DDP2 works."""
tutils.reset_seed()
tutils.set_random_master_port()
model, hparams = tutils.get_default_model()
trainer_options = dict(
default_root_dir=tmpdir,
max_epochs=1,
train_percent_check=0.4,
val_percent_check=0.2,
gpus=2,
weights_summary=None,
distributed_backend='ddp2'
)
tutils.run_model_test(trainer_options, model)
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp(tmpdir):
def test_multi_gpu_model(tmpdir, backend):
"""Make sure DDP works."""
tutils.reset_seed()
@ -48,15 +26,20 @@ def test_multi_gpu_model_ddp(tmpdir):
model, hparams = tutils.get_default_model()
trainer_options = dict(
default_root_dir=tmpdir,
progress_bar_refresh_rate=0,
max_epochs=1,
train_percent_check=0.4,
val_percent_check=0.2,
gpus=[0, 1],
distributed_backend='ddp'
distributed_backend=backend,
)
tutils.run_model_test(trainer_options, model)
# tutils.run_model_test(trainer_options, model)
trainer = Trainer(**trainer_options)
result = trainer.fit(model)
assert result
# test memory helper functions
memory.get_memory_profile('min_max')
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -91,7 +74,7 @@ def test_cpu_slurm_save_load(tmpdir):
model = LightningTestModel(hparams)
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
version = logger.version
trainer_options = dict(
@ -106,7 +89,7 @@ def test_cpu_slurm_save_load(tmpdir):
real_global_step = trainer.global_step
# traning complete
assert result == 1, 'amp + ddp model failed to complete'
assert result == 1, 'cpu model failed to complete'
# predict with trained model before saving
# make a prediction
@ -130,7 +113,7 @@ def test_cpu_slurm_save_load(tmpdir):
assert os.path.exists(saved_filepath)
# new logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False, version=version)
logger = tutils.get_default_logger(tmpdir, version=version)
trainer_options = dict(
max_epochs=1,
@ -175,28 +158,6 @@ def test_multi_gpu_none_backend(tmpdir):
tutils.run_model_test(trainer_options, model)
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_dp(tmpdir):
"""Make sure DP works."""
tutils.reset_seed()
model, hparams = tutils.get_default_model()
trainer_options = dict(
default_root_dir=tmpdir,
progress_bar_refresh_rate=0,
distributed_backend='dp',
max_epochs=1,
train_percent_check=0.1,
val_percent_check=0.1,
gpus='-1'
)
tutils.run_model_test(trainer_options, model)
# test memory helper functions
memory.get_memory_profile('min_max')
@pytest.fixture
def mocked_device_count(monkeypatch):
def device_count():
@ -249,21 +210,18 @@ def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distrib
@pytest.mark.gpus_param_tests
@pytest.mark.parametrize([
'gpus', 'expected_root_gpu', "distributed_backend"], [
@pytest.mark.parametrize(['gpus', 'expected_root_gpu', "distributed_backend"], [
pytest.param(None, None, None, id="None is None"),
pytest.param(None, None, "ddp", id="None is None"),
pytest.param(0, None, "ddp", id="None is None"),
])
def test_root_gpu_property_0_passing(
mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
assert Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu == expected_root_gpu
# Asking for a gpu when non are available will result in a MisconfigurationException
@pytest.mark.gpus_param_tests
@pytest.mark.parametrize([
'gpus', 'expected_root_gpu', "distributed_backend"], [
@pytest.mark.parametrize(['gpus', 'expected_root_gpu', "distributed_backend"], [
pytest.param(1, None, "ddp"),
pytest.param(3, None, "ddp"),
pytest.param(3, None, "ddp"),
@ -272,8 +230,7 @@ def test_root_gpu_property_0_passing(
pytest.param(-1, None, "ddp"),
pytest.param('-1', None, "ddp")
])
def test_root_gpu_property_0_raising(
mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
with pytest.raises(MisconfigurationException):
Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu
@ -325,11 +282,10 @@ def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
@pytest.mark.gpus_param_tests
@pytest.mark.parametrize("gpus", [''])
def test_parse_gpu_fail_on_empty_string(mocked_device_count, gpus):
def test_parse_gpu_fail_on_empty_string(mocked_device_count):
# This currently results in a ValueError instead of MisconfigurationException
with pytest.raises(ValueError):
parse_gpu_ids(gpus)
parse_gpu_ids('')
@pytest.mark.gpus_param_tests
@ -350,7 +306,3 @@ def test_parse_gpu_fail_on_non_existant_id_2(mocked_device_count):
def test_parse_gpu_returns_None_when_no_devices_are_available(mocked_device_count_0, gpus):
with pytest.raises(MisconfigurationException):
parse_gpu_ids(gpus)
# if __name__ == '__main__':
# pytest.main([__file__])

View File

@ -16,8 +16,10 @@ from tests.base import (
)
@pytest.mark.spawn
@pytest.mark.parametrize("backend", ['dp', 'ddp'])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_running_test_pretrained_model_ddp(tmpdir):
def test_running_test_pretrained_model_distrib(tmpdir, backend):
"""Verify `test()` on pretrained model."""
tutils.reset_seed()
@ -27,20 +29,20 @@ def test_running_test_pretrained_model_ddp(tmpdir):
model = LightningTestModel(hparams)
# exp file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
# exp file to get weights
checkpoint = tutils.init_checkpoint_callback(logger)
trainer_options = dict(
progress_bar_refresh_rate=0,
max_epochs=1,
max_epochs=2,
train_percent_check=0.4,
val_percent_check=0.2,
checkpoint_callback=checkpoint,
logger=logger,
gpus=[0, 1],
distributed_backend='ddp'
distributed_backend=backend,
)
# fit model
@ -59,6 +61,9 @@ def test_running_test_pretrained_model_ddp(tmpdir):
new_trainer = Trainer(**trainer_options)
new_trainer.test(pretrained_model)
# test we have good test accuracy
tutils.assert_ok_model_acc(new_trainer)
dataloaders = model.test_dataloader()
if not isinstance(dataloaders, list):
dataloaders = [dataloaders]
@ -67,7 +72,7 @@ def test_running_test_pretrained_model_ddp(tmpdir):
tutils.run_prediction(dataloader, pretrained_model)
def test_running_test_pretrained_model(tmpdir):
def test_running_test_pretrained_model_cpu(tmpdir):
"""Verify test() on pretrained model."""
tutils.reset_seed()
@ -75,7 +80,7 @@ def test_running_test_pretrained_model(tmpdir):
model = LightningTestModel(hparams)
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
# logger file to get weights
checkpoint = tutils.init_checkpoint_callback(logger)
@ -119,7 +124,6 @@ def test_load_model_from_checkpoint(tmpdir):
train_percent_check=0.4,
val_percent_check=0.2,
checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1),
logger=False,
default_root_dir=tmpdir,
)
@ -150,47 +154,6 @@ def test_load_model_from_checkpoint(tmpdir):
tutils.assert_ok_model_acc(new_trainer)
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_running_test_pretrained_model_dp(tmpdir):
"""Verify test() on pretrained model."""
tutils.reset_seed()
hparams = tutils.get_default_hparams()
model = LightningTestModel(hparams)
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
# logger file to get weights
checkpoint = tutils.init_checkpoint_callback(logger)
trainer_options = dict(
max_epochs=2,
train_percent_check=0.4,
val_percent_check=0.2,
checkpoint_callback=checkpoint,
logger=logger,
gpus=[0, 1],
distributed_backend='dp'
)
# fit model
trainer = Trainer(**trainer_options)
result = trainer.fit(model)
# correct result and ok accuracy
assert result == 1, 'training failed to complete'
pretrained_model = tutils.load_model(logger,
trainer.checkpoint_callback.dirpath,
module_class=LightningTestModel)
new_trainer = Trainer(**trainer_options)
new_trainer.test(pretrained_model)
# test we have good test accuracy
tutils.assert_ok_model_acc(new_trainer)
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_dp_resume(tmpdir):
"""Make sure DP continues training correctly."""
@ -207,7 +170,7 @@ def test_dp_resume(tmpdir):
)
# get logger
logger = tutils.get_default_testtube_logger(tmpdir, debug=False)
logger = tutils.get_default_logger(tmpdir)
# exp file to get weights
# logger file to get weights
@ -235,7 +198,7 @@ def test_dp_resume(tmpdir):
trainer.hpc_save(tmpdir, logger)
# init new trainer
new_logger = tutils.get_default_testtube_logger(tmpdir, version=logger.version)
new_logger = tutils.get_default_logger(tmpdir, version=logger.version)
trainer_options['logger'] = new_logger
trainer_options['checkpoint_callback'] = ModelCheckpoint(tmpdir)
trainer_options['train_percent_check'] = 0.5
@ -275,7 +238,7 @@ def test_model_saving_loading(tmpdir):
model = LightningTestModel(hparams)
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
trainer_options = dict(
max_epochs=1,
@ -356,7 +319,3 @@ def test_load_model_with_missing_hparams(tmpdir):
# warn if user's model has hparams argument
with pytest.warns(UserWarning, match=r".*Will pass in an empty Namespace instead."):
LightningTestModelWithUnusedHyperparametersArg.load_from_checkpoint(last_checkpoint)
# if __name__ == '__main__':
# pytest.main([__file__])

View File

@ -63,7 +63,7 @@ def test_simple_profiler_iterable_durations(simple_profiler, action, expected):
"""Ensure the reported durations are reasonably accurate."""
iterable = _sleep_generator(expected)
for duration in simple_profiler.profile_iterable(iterable, action):
for _ in simple_profiler.profile_iterable(iterable, action):
pass
# we exclude the last item in the recorded durations since that's when StopIteration is raised
@ -135,7 +135,7 @@ def test_advanced_profiler_iterable_durations(advanced_profiler, action, expecte
"""Ensure the reported durations are reasonably accurate."""
iterable = _sleep_generator(expected)
for duration in advanced_profiler.profile_iterable(iterable, action):
for _ in advanced_profiler.profile_iterable(iterable, action):
pass
recored_total_duration = _get_python_cprofile_total_duration(

View File

@ -153,7 +153,7 @@ def test_trainer_callback_system(tmpdir):
assert test_callback.on_test_end_called
def test_early_stopping_without_val_step(tmpdir):
def test_early_stopping_no_val_step(tmpdir):
"""Test that early stopping callback falls back to training metrics when no validation defined."""
tutils.reset_seed()

View File

@ -7,12 +7,9 @@ from tests.base import EvalModelTemplate
from tests.base import (
TestModelBase,
LightValidationDataloader,
LightTestDataloader,
LightValidationStepMixin,
LightValStepFitSingleDataloaderMixin,
LightTrainDataloader,
LightTestStepMixin,
LightTestFitMultipleTestDataloadersMixin,
)

View File

@ -17,7 +17,6 @@ from tests.base import (
LightValStepFitMultipleDataloadersMixin,
LightValStepFitSingleDataloaderMixin,
LightTrainDataloader,
LightValidationDataloader,
LightInfTrainDataloader,
LightInfValDataloader,
LightInfTestDataloader,
@ -540,23 +539,17 @@ def test_dataloader_reinit_for_subclass():
batch_sampler=None, num_workers=0, collate_fn=None,
pin_memory=False, drop_last=False, timeout=0,
worker_init_fn=None, dummy_kwarg=None):
super().__init__(dataset,
batch_size,
shuffle,
sampler,
batch_sampler,
num_workers,
collate_fn,
pin_memory,
drop_last,
timeout,
super().__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
num_workers, collate_fn, pin_memory, drop_last, timeout,
worker_init_fn)
self.dummy_kwarg = dummy_kwarg
trainer = Trainer(gpus=[0, 1],
num_nodes=1,
distributed_backend='ddp')
trainer = Trainer(
gpus=[0, 1],
num_nodes=1,
distributed_backend='ddp',
)
class CustomDummyObj:
sampler = None

View File

@ -1,6 +1,6 @@
import pytest
import torch
import tests.base.utils as tutils
from pytorch_lightning import Trainer
from pytorch_lightning.utilities.exceptions import MisconfigurationException

View File

@ -3,7 +3,6 @@ import torch
import tests.base.utils as tutils
from pytorch_lightning import Trainer
from tests.base import (
TestModelBase,
LightTrainDataloader,

View File

@ -1,15 +1,15 @@
import glob
import math
import os
from argparse import Namespace, ArgumentParser
from argparse import Namespace
import pytest
import torch
import tests.base.utils as tutils
from pytorch_lightning import Callback
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning import Callback
from pytorch_lightning.core.lightning import load_hparams_from_tags_csv
from pytorch_lightning.trainer.logging import TrainerLoggingMixin
from pytorch_lightning.utilities.exceptions import MisconfigurationException
@ -60,7 +60,7 @@ def test_no_val_module(tmpdir):
model = CurrentTestModel(hparams)
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
trainer_options = dict(
max_epochs=1,
@ -100,7 +100,7 @@ def test_no_val_end_module(tmpdir):
model = CurrentTestModel(hparams)
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
trainer_options = dict(
max_epochs=1,
@ -211,7 +211,7 @@ def test_loading_meta_tags(tmpdir):
hparams = tutils.get_default_hparams()
# save tags
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0))
logger.log_hyperparams(hparams)
logger.save()
@ -335,7 +335,6 @@ def test_resume_from_checkpoint_epoch_restored(tmpdir):
train_percent_check=0.65,
val_percent_check=1,
checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1),
logger=False,
default_root_dir=tmpdir,
early_stop_callback=False,
val_check_interval=1.,

View File

@ -15,7 +15,7 @@ def test_default_args(tmpdir):
tutils.reset_seed()
# logger file to get meta
logger = tutils.get_default_testtube_logger(tmpdir, False)
logger = tutils.get_default_logger(tmpdir)
parser = ArgumentParser(add_help=False)
args = parser.parse_args()