diff --git a/update.sh b/.update.sh similarity index 100% rename from update.sh rename to .update.sh diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py index bfbf058407..5b07d26d4d 100644 --- a/pytorch_lightning/trainer/data_loading.py +++ b/pytorch_lightning/trainer/data_loading.py @@ -102,10 +102,18 @@ class TrainerDataLoadingMixin(ABC): sampler = DistributedSampler( dataloader.dataset, num_replicas=xm.xrt_world_size(), - rank=xm.get_ordinal() + rank=xm.get_ordinal(), ) else: - sampler = DistributedSampler(dataloader.dataset) + world_size = { + 'ddp': self.num_nodes * self.num_processes, + 'ddp2': self.num_nodes, + } + sampler = DistributedSampler( + dataloader.dataset, + num_replicas=world_size.get(self.distributed_backend, 0), + rank=self.proc_rank, + ) dl_args['sampler'] = sampler dataloader = type(dataloader)(**dl_args) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 998a19bc16..4a27069d10 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -23,11 +23,7 @@ from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin from pytorch_lightning.trainer.deprecated_api import TrainerDeprecatedAPITillVer0_8, TrainerDeprecatedAPITillVer0_9 from pytorch_lightning.trainer.distrib_data_parallel import TrainerDDPMixin from pytorch_lightning.trainer.distrib_parts import ( - TrainerDPMixin, - parse_gpu_ids, - determine_root_gpu_device, - pick_multiple_gpus, -) + TrainerDPMixin, parse_gpu_ids, determine_root_gpu_device, pick_multiple_gpus) from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin @@ -736,13 +732,10 @@ class Trainer( self.ddp_train(task, model) else: self.__set_random_port() - # track for predict self.model = model - # train mp.spawn(self.ddp_train, nprocs=self.num_processes, args=(model,)) - # load weights if not interrupted self.load_spawn_weights(model) self.model = model diff --git a/tests/__init__.py b/tests/__init__.py index 32e384ac98..acc27596f9 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,3 +1,18 @@ import os +import numpy as np +import torch + TEST_ROOT = os.path.dirname(__file__) +PACKAGE_ROOT = os.path.dirname(TEST_ROOT) +TEMP_PATH = os.path.join(PACKAGE_ROOT, 'test_temp') + +# generate a list of random seeds for each test +RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000)) +ROOT_SEED = 1234 +torch.manual_seed(ROOT_SEED) +np.random.seed(ROOT_SEED) +RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000)) + +if not os.path.isdir(TEMP_PATH): + os.mkdir(TEMP_PATH) diff --git a/tests/base/__init__.py b/tests/base/__init__.py index b3c0972c26..fce8a8fa66 100644 --- a/tests/base/__init__.py +++ b/tests/base/__init__.py @@ -2,7 +2,6 @@ import torch -from tests.base.models import TestModelBase, DictHparamsModel from tests.base.eval_model_template import EvalModelTemplate from tests.base.mixins import ( LightEmptyTestStep, @@ -31,6 +30,7 @@ from tests.base.mixins import ( LightTestNoneOptimizerMixin, LightZeroLenDataloader ) +from tests.base.models import TestModelBase, DictHparamsModel class LightningTestModel(LightTrainDataloader, diff --git a/tests/base/datasets.py b/tests/base/datasets.py index 39bf7d0c06..af6cb062dc 100644 --- a/tests/base/datasets.py +++ b/tests/base/datasets.py @@ -7,10 +7,10 @@ import torch from torch import Tensor from torch.utils.data import Dataset -from tests import TEST_ROOT +from tests import PACKAGE_ROOT #: local path to test datasets -PATH_DATASETS = os.path.join(TEST_ROOT, 'Datasets') +PATH_DATASETS = os.path.join(PACKAGE_ROOT, 'Datasets') class MNIST(Dataset): diff --git a/tests/base/debug.py b/tests/base/debug.py index ffed2552f2..0c3b120c93 100644 --- a/tests/base/debug.py +++ b/tests/base/debug.py @@ -7,7 +7,7 @@ from tests.base.datasets import TrialMNIST # from test_models import assert_ok_test_acc, load_model, \ -# clear_save_dir, get_default_testtube_logger, get_default_hparams, init_save_dir, \ +# clear_save_dir, get_default_logger, get_default_hparams, init_save_dir, \ # init_checkpoint_callback, reset_seed, set_random_master_port diff --git a/tests/base/eval_model_template.py b/tests/base/eval_model_template.py index bd5d3b0852..cce59edd2a 100644 --- a/tests/base/eval_model_template.py +++ b/tests/base/eval_model_template.py @@ -2,18 +2,18 @@ import torch import torch.nn as nn import torch.nn.functional as F -from tests.base.datasets import TrialMNIST from pytorch_lightning.core.lightning import LightningModule +from tests.base.datasets import TrialMNIST from tests.base.eval_model_optimizers import ConfigureOptimizersPool from tests.base.eval_model_test_dataloaders import TestDataloaderVariations from tests.base.eval_model_test_epoch_ends import TestEpochEndVariations from tests.base.eval_model_test_steps import TestStepVariations from tests.base.eval_model_train_dataloaders import TrainDataloaderVariations from tests.base.eval_model_train_steps import TrainingStepVariations +from tests.base.eval_model_utils import ModelTemplateUtils from tests.base.eval_model_valid_dataloaders import ValDataloaderVariations from tests.base.eval_model_valid_epoch_ends import ValidationEpochEndVariations from tests.base.eval_model_valid_steps import ValidationStepVariations -from tests.base.eval_model_utils import ModelTemplateUtils class EvalModelTemplate( diff --git a/tests/base/eval_model_utils.py b/tests/base/eval_model_utils.py index 922fef4800..68618d17d1 100644 --- a/tests/base/eval_model_utils.py +++ b/tests/base/eval_model_utils.py @@ -1,4 +1,5 @@ from torch.utils.data import DataLoader + from tests.base.datasets import TrialMNIST diff --git a/tests/base/eval_model_valid_steps.py b/tests/base/eval_model_valid_steps.py index d97c1313db..1f40b45f80 100644 --- a/tests/base/eval_model_valid_steps.py +++ b/tests/base/eval_model_valid_steps.py @@ -1,5 +1,6 @@ from abc import ABC from collections import OrderedDict + import torch diff --git a/tests/base/models.py b/tests/base/models.py index d203f7799e..c130325c2f 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -1,4 +1,3 @@ -import os from collections import OrderedDict from typing import Dict diff --git a/tests/base/utils.py b/tests/base/utils.py index f907562a74..6d3be0de3f 100644 --- a/tests/base/utils.py +++ b/tests/base/utils.py @@ -7,18 +7,11 @@ import torch # from pl_examples import LightningTemplateModel from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint -from pytorch_lightning.loggers import TestTubeLogger, TensorBoardLogger -from tests.base import LightningTestModel, EvalModelTemplate +from pytorch_lightning.loggers import TensorBoardLogger +from tests import TEMP_PATH, RANDOM_PORTS, RANDOM_SEEDS +from tests.base import LightningTestModel from tests.base.datasets import PATH_DATASETS -# generate a list of random seeds for each test -RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000)) -ROOT_SEED = 1234 -torch.manual_seed(ROOT_SEED) -np.random.seed(ROOT_SEED) -RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000)) -ROOT_PATH = os.path.abspath(os.path.dirname(__file__)) - def assert_speed_parity(pl_times, pt_times, num_epochs): @@ -33,7 +26,7 @@ def assert_speed_parity(pl_times, pt_times, num_epochs): f"lightning was slower than PT (threshold {max_diff_per_epoch})" -def run_model_test_no_loggers(trainer_options, model, min_acc=0.50): +def run_model_test_without_loggers(trainer_options, model, min_acc=0.50): # save_dir = trainer_options['default_root_dir'] # fit model @@ -66,14 +59,16 @@ def run_model_test(trainer_options, model, on_gpu=True): save_dir = trainer_options['default_root_dir'] # logger file to get meta - logger = get_default_testtube_logger(save_dir, False) + logger = get_default_logger(save_dir) # logger file to get weights checkpoint = init_checkpoint_callback(logger) # add these to the trainer options - trainer_options['checkpoint_callback'] = checkpoint - trainer_options['logger'] = logger + trainer_options.update( + checkpoint_callback=checkpoint, + logger=logger, + ) # fit model trainer = Trainer(**trainer_options) @@ -118,8 +113,10 @@ def get_default_hparams(continue_training=False, hpc_exp_number=0): } if continue_training: - args['test_tube_do_checkpoint_load'] = True - args['hpc_exp_number'] = hpc_exp_number + args.update( + test_tube_do_checkpoint_load=True, + hpc_exp_number=hpc_exp_number, + ) hparams = Namespace(**args) return hparams @@ -137,9 +134,9 @@ def get_default_model(lbfgs=False): return model, hparams -def get_default_testtube_logger(save_dir, debug=True, version=None): +def get_default_logger(save_dir, version=None): # set up logger object without actually saving logs - logger = TestTubeLogger(save_dir, name='lightning_logs', debug=debug, version=version) + logger = TensorBoardLogger(save_dir, name='lightning_logs', version=version) return logger @@ -153,7 +150,10 @@ def get_data_path(expt_logger, path_dir=None): return expt.get_data_path(name, version) # the other experiments... if not path_dir: - path_dir = ROOT_PATH + if hasattr(expt_logger, 'save_dir') and expt_logger.save_dir: + path_dir = expt_logger.save_dir + else: + path_dir = TEMP_PATH path_expt = os.path.join(path_dir, name, 'version_%s' % version) # try if the new sub-folder exists, typical case for test-tube if not os.path.isdir(path_expt): @@ -161,9 +161,9 @@ def get_data_path(expt_logger, path_dir=None): return path_expt -def load_model(exp, root_weights_dir, module_class=LightningTestModel, path_expt=None): +def load_model(logger, root_weights_dir, module_class=LightningTestModel, path_expt=None): # load trained model - path_expt_dir = get_data_path(exp, path_dir=path_expt) + path_expt_dir = get_data_path(logger, path_dir=path_expt) tags_path = os.path.join(path_expt_dir, TensorBoardLogger.NAME_CSV_TAGS) checkpoints = [x for x in os.listdir(root_weights_dir) if '.ckpt' in x] diff --git a/tests/conftest.py b/tests/conftest.py index bfb2b0d5fc..67eb1d9aa8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,5 @@ +from functools import wraps + import pytest import torch.multiprocessing as mp @@ -7,10 +9,6 @@ def pytest_configure(config): config.addinivalue_line("markers", "spawn: spawn test in a separate process using torch.multiprocessing.spawn") -def wrap(i, fn, args): - return fn(*args) - - @pytest.mark.tryfirst def pytest_pyfunc_call(pyfuncitem): if pyfuncitem.get_closest_marker("spawn"): @@ -18,5 +16,5 @@ def pytest_pyfunc_call(pyfuncitem): funcargs = pyfuncitem.funcargs testargs = tuple([funcargs[arg] for arg in pyfuncitem._fixtureinfo.argnames]) - mp.spawn(wrap, (testfunction, testargs)) + mp.spawn(wraps, (testfunction, testargs)) return True diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index b85ccfb7a0..b8637ff79f 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -7,7 +7,6 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.loggers import ( TensorBoardLogger, MLFlowLogger, NeptuneLogger, TestTubeLogger, CometLogger) -from tests.base import LightningTestModel def _get_logger_args(logger_class, save_dir): diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py index 33e4bfcff7..9b21e711d8 100644 --- a/tests/models/test_amp.py +++ b/tests/models/test_amp.py @@ -11,66 +11,54 @@ from tests.base import ( ) -@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") -def test_amp_single_gpu(tmpdir): - """Make sure DDP + AMP work.""" - tutils.reset_seed() - - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) - - trainer_options = dict( - default_root_dir=tmpdir, - max_epochs=1, - gpus=1, - distributed_backend='ddp', - precision=16 - ) - - tutils.run_model_test(trainer_options, model) - - @pytest.mark.spawn +@pytest.mark.parametrize("backend", ['dp', 'ddp']) @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") -def test_no_amp_single_gpu(tmpdir): - """Make sure DDP + AMP work.""" +def test_amp_single_gpu(tmpdir, backend): + """Make sure DP/DDP + AMP work.""" tutils.reset_seed() - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model, hparams = tutils.get_default_model() trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, gpus=1, - distributed_backend='dp', + distributed_backend=backend, precision=16 ) + # tutils.run_model_test(trainer_options, model) + trainer = Trainer(**trainer_options) result = trainer.fit(model) assert result == 1 +@pytest.mark.spawn +@pytest.mark.parametrize("backend", ['dp', 'ddp']) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_amp_gpu_ddp(tmpdir): - """Make sure DDP + AMP work.""" +def test_amp_multi_gpu(tmpdir, backend): + """Make sure DP/DDP + AMP work.""" tutils.reset_seed() tutils.set_random_master_port() - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model, hparams = tutils.get_default_model() trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, - gpus=2, - distributed_backend='ddp', + # gpus=2, + gpus='0, 1', # test init with gpu string + distributed_backend=backend, precision=16 ) - tutils.run_model_test(trainer_options, model) + # tutils.run_model_test(trainer_options, model) + trainer = Trainer(**trainer_options) + result = trainer.fit(model) + assert result @pytest.mark.spawn @@ -94,7 +82,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): ) # exp file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) # exp file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -125,7 +113,6 @@ def test_cpu_model_with_amp(tmpdir): trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, - logger=tutils.get_default_testtube_logger(tmpdir), max_epochs=1, train_percent_check=0.4, val_percent_check=0.4, @@ -136,28 +123,3 @@ def test_cpu_model_with_amp(tmpdir): with pytest.raises((MisconfigurationException, ModuleNotFoundError)): tutils.run_model_test(trainer_options, model, on_gpu=False) - - -@pytest.mark.spawn -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_amp_gpu_dp(tmpdir): - """Make sure DP + AMP work.""" - tutils.reset_seed() - - model, hparams = tutils.get_default_model() - trainer_options = dict( - default_root_dir=tmpdir, - max_epochs=1, - gpus='0, 1', # test init with gpu string - distributed_backend='dp', - precision=16 - ) - - trainer = Trainer(**trainer_options) - result = trainer.fit(model) - - assert result == 1 - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index 375aeae40e..625d8660aa 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -1,5 +1,4 @@ import platform -import warnings import pytest import torch @@ -29,7 +28,6 @@ def test_early_stopping_cpu_model(tmpdir): gradient_clip_val=1.0, overfit_pct=0.20, track_grad_norm=2, - logger=tutils.get_default_testtube_logger(tmpdir), train_percent_check=0.1, val_percent_check=0.1, ) @@ -42,6 +40,7 @@ def test_early_stopping_cpu_model(tmpdir): model.unfreeze() +@pytest.mark.spawn @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows") @pytest.mark.skipif((platform.system() == "Darwin" and @@ -81,7 +80,7 @@ def test_lbfgs_cpu_model(tmpdir): ) model, hparams = tutils.get_default_model(lbfgs=True) - tutils.run_model_test_no_loggers(trainer_options, model, min_acc=0.5) + tutils.run_model_test_without_loggers(trainer_options, model, min_acc=0.5) def test_default_logger_callbacks_cpu_model(tmpdir): @@ -99,7 +98,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir): ) model, hparams = tutils.get_default_model() - tutils.run_model_test_no_loggers(trainer_options, model) + tutils.run_model_test_without_loggers(trainer_options, model) # test freeze on cpu model.freeze() @@ -114,7 +113,7 @@ def test_running_test_after_fitting(tmpdir): model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -142,7 +141,7 @@ def test_running_test_after_fitting(tmpdir): tutils.assert_ok_model_acc(trainer, thr=0.5) -def test_running_test_without_val(tmpdir): +def test_running_test_no_val(tmpdir): """Verify `test()` works on a model with no `val_loader`.""" tutils.reset_seed() @@ -153,7 +152,7 @@ def test_running_test_without_val(tmpdir): model = CurrentTestModel(hparams) # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -253,7 +252,6 @@ def test_cpu_model(tmpdir): trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, - logger=tutils.get_default_testtube_logger(tmpdir), max_epochs=1, train_percent_check=0.4, val_percent_check=0.4 @@ -274,7 +272,6 @@ def test_all_features_cpu_model(tmpdir): overfit_pct=0.20, track_grad_norm=2, progress_bar_refresh_rate=0, - logger=tutils.get_default_testtube_logger(tmpdir), accumulate_grad_batches=2, max_epochs=1, train_percent_check=0.4, diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 096b8497a5..38d519fb3c 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -7,39 +7,17 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core import memory -from pytorch_lightning.trainer.distrib_parts import ( - parse_gpu_ids, - determine_root_gpu_device, -) +from pytorch_lightning.trainer.distrib_parts import parse_gpu_ids, determine_root_gpu_device from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import LightningTestModel PRETEND_N_OF_GPUS = 16 +@pytest.mark.spawn +@pytest.mark.parametrize("backend", ['dp', 'ddp', 'ddp2']) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_multi_gpu_model_ddp2(tmpdir): - """Make sure DDP2 works.""" - - tutils.reset_seed() - tutils.set_random_master_port() - - model, hparams = tutils.get_default_model() - trainer_options = dict( - default_root_dir=tmpdir, - max_epochs=1, - train_percent_check=0.4, - val_percent_check=0.2, - gpus=2, - weights_summary=None, - distributed_backend='ddp2' - ) - - tutils.run_model_test(trainer_options, model) - - -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_multi_gpu_model_ddp(tmpdir): +def test_multi_gpu_model(tmpdir, backend): """Make sure DDP works.""" tutils.reset_seed() @@ -48,15 +26,20 @@ def test_multi_gpu_model_ddp(tmpdir): model, hparams = tutils.get_default_model() trainer_options = dict( default_root_dir=tmpdir, - progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=[0, 1], - distributed_backend='ddp' + distributed_backend=backend, ) - tutils.run_model_test(trainer_options, model) + # tutils.run_model_test(trainer_options, model) + trainer = Trainer(**trainer_options) + result = trainer.fit(model) + assert result + + # test memory helper functions + memory.get_memory_profile('min_max') @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -91,7 +74,7 @@ def test_cpu_slurm_save_load(tmpdir): model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) version = logger.version trainer_options = dict( @@ -106,7 +89,7 @@ def test_cpu_slurm_save_load(tmpdir): real_global_step = trainer.global_step # traning complete - assert result == 1, 'amp + ddp model failed to complete' + assert result == 1, 'cpu model failed to complete' # predict with trained model before saving # make a prediction @@ -130,7 +113,7 @@ def test_cpu_slurm_save_load(tmpdir): assert os.path.exists(saved_filepath) # new logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False, version=version) + logger = tutils.get_default_logger(tmpdir, version=version) trainer_options = dict( max_epochs=1, @@ -175,28 +158,6 @@ def test_multi_gpu_none_backend(tmpdir): tutils.run_model_test(trainer_options, model) -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_multi_gpu_model_dp(tmpdir): - """Make sure DP works.""" - tutils.reset_seed() - - model, hparams = tutils.get_default_model() - trainer_options = dict( - default_root_dir=tmpdir, - progress_bar_refresh_rate=0, - distributed_backend='dp', - max_epochs=1, - train_percent_check=0.1, - val_percent_check=0.1, - gpus='-1' - ) - - tutils.run_model_test(trainer_options, model) - - # test memory helper functions - memory.get_memory_profile('min_max') - - @pytest.fixture def mocked_device_count(monkeypatch): def device_count(): @@ -249,21 +210,18 @@ def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distrib @pytest.mark.gpus_param_tests -@pytest.mark.parametrize([ - 'gpus', 'expected_root_gpu', "distributed_backend"], [ +@pytest.mark.parametrize(['gpus', 'expected_root_gpu', "distributed_backend"], [ pytest.param(None, None, None, id="None is None"), pytest.param(None, None, "ddp", id="None is None"), pytest.param(0, None, "ddp", id="None is None"), ]) -def test_root_gpu_property_0_passing( - mocked_device_count_0, gpus, expected_root_gpu, distributed_backend): +def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend): assert Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu == expected_root_gpu # Asking for a gpu when non are available will result in a MisconfigurationException @pytest.mark.gpus_param_tests -@pytest.mark.parametrize([ - 'gpus', 'expected_root_gpu', "distributed_backend"], [ +@pytest.mark.parametrize(['gpus', 'expected_root_gpu', "distributed_backend"], [ pytest.param(1, None, "ddp"), pytest.param(3, None, "ddp"), pytest.param(3, None, "ddp"), @@ -272,8 +230,7 @@ def test_root_gpu_property_0_passing( pytest.param(-1, None, "ddp"), pytest.param('-1', None, "ddp") ]) -def test_root_gpu_property_0_raising( - mocked_device_count_0, gpus, expected_root_gpu, distributed_backend): +def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend): with pytest.raises(MisconfigurationException): Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu @@ -325,11 +282,10 @@ def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus): @pytest.mark.gpus_param_tests -@pytest.mark.parametrize("gpus", ['']) -def test_parse_gpu_fail_on_empty_string(mocked_device_count, gpus): +def test_parse_gpu_fail_on_empty_string(mocked_device_count): # This currently results in a ValueError instead of MisconfigurationException with pytest.raises(ValueError): - parse_gpu_ids(gpus) + parse_gpu_ids('') @pytest.mark.gpus_param_tests @@ -350,7 +306,3 @@ def test_parse_gpu_fail_on_non_existant_id_2(mocked_device_count): def test_parse_gpu_returns_None_when_no_devices_are_available(mocked_device_count_0, gpus): with pytest.raises(MisconfigurationException): parse_gpu_ids(gpus) - - -# if __name__ == '__main__': -# pytest.main([__file__]) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index e4dd6ef27f..1165da6f78 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -16,8 +16,10 @@ from tests.base import ( ) +@pytest.mark.spawn +@pytest.mark.parametrize("backend", ['dp', 'ddp']) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_running_test_pretrained_model_ddp(tmpdir): +def test_running_test_pretrained_model_distrib(tmpdir, backend): """Verify `test()` on pretrained model.""" tutils.reset_seed() @@ -27,20 +29,20 @@ def test_running_test_pretrained_model_ddp(tmpdir): model = LightningTestModel(hparams) # exp file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) # exp file to get weights checkpoint = tutils.init_checkpoint_callback(logger) trainer_options = dict( progress_bar_refresh_rate=0, - max_epochs=1, + max_epochs=2, train_percent_check=0.4, val_percent_check=0.2, checkpoint_callback=checkpoint, logger=logger, gpus=[0, 1], - distributed_backend='ddp' + distributed_backend=backend, ) # fit model @@ -59,6 +61,9 @@ def test_running_test_pretrained_model_ddp(tmpdir): new_trainer = Trainer(**trainer_options) new_trainer.test(pretrained_model) + # test we have good test accuracy + tutils.assert_ok_model_acc(new_trainer) + dataloaders = model.test_dataloader() if not isinstance(dataloaders, list): dataloaders = [dataloaders] @@ -67,7 +72,7 @@ def test_running_test_pretrained_model_ddp(tmpdir): tutils.run_prediction(dataloader, pretrained_model) -def test_running_test_pretrained_model(tmpdir): +def test_running_test_pretrained_model_cpu(tmpdir): """Verify test() on pretrained model.""" tutils.reset_seed() @@ -75,7 +80,7 @@ def test_running_test_pretrained_model(tmpdir): model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -119,7 +124,6 @@ def test_load_model_from_checkpoint(tmpdir): train_percent_check=0.4, val_percent_check=0.2, checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), - logger=False, default_root_dir=tmpdir, ) @@ -150,47 +154,6 @@ def test_load_model_from_checkpoint(tmpdir): tutils.assert_ok_model_acc(new_trainer) -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_running_test_pretrained_model_dp(tmpdir): - """Verify test() on pretrained model.""" - tutils.reset_seed() - - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) - - # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) - - # logger file to get weights - checkpoint = tutils.init_checkpoint_callback(logger) - - trainer_options = dict( - max_epochs=2, - train_percent_check=0.4, - val_percent_check=0.2, - checkpoint_callback=checkpoint, - logger=logger, - gpus=[0, 1], - distributed_backend='dp' - ) - - # fit model - trainer = Trainer(**trainer_options) - result = trainer.fit(model) - - # correct result and ok accuracy - assert result == 1, 'training failed to complete' - pretrained_model = tutils.load_model(logger, - trainer.checkpoint_callback.dirpath, - module_class=LightningTestModel) - - new_trainer = Trainer(**trainer_options) - new_trainer.test(pretrained_model) - - # test we have good test accuracy - tutils.assert_ok_model_acc(new_trainer) - - @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_dp_resume(tmpdir): """Make sure DP continues training correctly.""" @@ -207,7 +170,7 @@ def test_dp_resume(tmpdir): ) # get logger - logger = tutils.get_default_testtube_logger(tmpdir, debug=False) + logger = tutils.get_default_logger(tmpdir) # exp file to get weights # logger file to get weights @@ -235,7 +198,7 @@ def test_dp_resume(tmpdir): trainer.hpc_save(tmpdir, logger) # init new trainer - new_logger = tutils.get_default_testtube_logger(tmpdir, version=logger.version) + new_logger = tutils.get_default_logger(tmpdir, version=logger.version) trainer_options['logger'] = new_logger trainer_options['checkpoint_callback'] = ModelCheckpoint(tmpdir) trainer_options['train_percent_check'] = 0.5 @@ -275,7 +238,7 @@ def test_model_saving_loading(tmpdir): model = LightningTestModel(hparams) # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) trainer_options = dict( max_epochs=1, @@ -356,7 +319,3 @@ def test_load_model_with_missing_hparams(tmpdir): # warn if user's model has hparams argument with pytest.warns(UserWarning, match=r".*Will pass in an empty Namespace instead."): LightningTestModelWithUnusedHyperparametersArg.load_from_checkpoint(last_checkpoint) - - -# if __name__ == '__main__': -# pytest.main([__file__]) diff --git a/tests/test_profiler.py b/tests/test_profiler.py index ae5dc3eb36..fa9fc103f0 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -63,7 +63,7 @@ def test_simple_profiler_iterable_durations(simple_profiler, action, expected): """Ensure the reported durations are reasonably accurate.""" iterable = _sleep_generator(expected) - for duration in simple_profiler.profile_iterable(iterable, action): + for _ in simple_profiler.profile_iterable(iterable, action): pass # we exclude the last item in the recorded durations since that's when StopIteration is raised @@ -135,7 +135,7 @@ def test_advanced_profiler_iterable_durations(advanced_profiler, action, expecte """Ensure the reported durations are reasonably accurate.""" iterable = _sleep_generator(expected) - for duration in advanced_profiler.profile_iterable(iterable, action): + for _ in advanced_profiler.profile_iterable(iterable, action): pass recored_total_duration = _get_python_cprofile_total_duration( diff --git a/tests/trainer/test_callbacks.py b/tests/trainer/test_callbacks.py index baeb073d39..ee208059ad 100644 --- a/tests/trainer/test_callbacks.py +++ b/tests/trainer/test_callbacks.py @@ -153,7 +153,7 @@ def test_trainer_callback_system(tmpdir): assert test_callback.on_test_end_called -def test_early_stopping_without_val_step(tmpdir): +def test_early_stopping_no_val_step(tmpdir): """Test that early stopping callback falls back to training metrics when no validation defined.""" tutils.reset_seed() diff --git a/tests/trainer/test_checks.py b/tests/trainer/test_checks.py index 8a402e22d9..2ee8037758 100755 --- a/tests/trainer/test_checks.py +++ b/tests/trainer/test_checks.py @@ -7,12 +7,9 @@ from tests.base import EvalModelTemplate from tests.base import ( TestModelBase, LightValidationDataloader, - LightTestDataloader, LightValidationStepMixin, LightValStepFitSingleDataloaderMixin, LightTrainDataloader, - LightTestStepMixin, - LightTestFitMultipleTestDataloadersMixin, ) diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index 91f0bcf0aa..d52e61eab9 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -17,7 +17,6 @@ from tests.base import ( LightValStepFitMultipleDataloadersMixin, LightValStepFitSingleDataloaderMixin, LightTrainDataloader, - LightValidationDataloader, LightInfTrainDataloader, LightInfValDataloader, LightInfTestDataloader, @@ -540,23 +539,17 @@ def test_dataloader_reinit_for_subclass(): batch_sampler=None, num_workers=0, collate_fn=None, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None, dummy_kwarg=None): - super().__init__(dataset, - batch_size, - shuffle, - sampler, - batch_sampler, - num_workers, - collate_fn, - pin_memory, - drop_last, - timeout, + super().__init__(dataset, batch_size, shuffle, sampler, batch_sampler, + num_workers, collate_fn, pin_memory, drop_last, timeout, worker_init_fn) self.dummy_kwarg = dummy_kwarg - trainer = Trainer(gpus=[0, 1], - num_nodes=1, - distributed_backend='ddp') + trainer = Trainer( + gpus=[0, 1], + num_nodes=1, + distributed_backend='ddp', + ) class CustomDummyObj: sampler = None diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index d5b19ce689..96fe461e99 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -1,6 +1,6 @@ import pytest - import torch + import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.utilities.exceptions import MisconfigurationException diff --git a/tests/trainer/test_optimizers.py b/tests/trainer/test_optimizers.py index 8d123a81e5..6ac9da23d8 100644 --- a/tests/trainer/test_optimizers.py +++ b/tests/trainer/test_optimizers.py @@ -3,7 +3,6 @@ import torch import tests.base.utils as tutils from pytorch_lightning import Trainer - from tests.base import ( TestModelBase, LightTrainDataloader, diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 5adab73259..91a02a3ac6 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1,15 +1,15 @@ import glob import math import os -from argparse import Namespace, ArgumentParser +from argparse import Namespace import pytest import torch import tests.base.utils as tutils +from pytorch_lightning import Callback from pytorch_lightning import Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint -from pytorch_lightning import Callback from pytorch_lightning.core.lightning import load_hparams_from_tags_csv from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -60,7 +60,7 @@ def test_no_val_module(tmpdir): model = CurrentTestModel(hparams) # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) trainer_options = dict( max_epochs=1, @@ -100,7 +100,7 @@ def test_no_val_end_module(tmpdir): model = CurrentTestModel(hparams) # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) trainer_options = dict( max_epochs=1, @@ -211,7 +211,7 @@ def test_loading_meta_tags(tmpdir): hparams = tutils.get_default_hparams() # save tags - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0)) logger.log_hyperparams(hparams) logger.save() @@ -335,7 +335,6 @@ def test_resume_from_checkpoint_epoch_restored(tmpdir): train_percent_check=0.65, val_percent_check=1, checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), - logger=False, default_root_dir=tmpdir, early_stop_callback=False, val_check_interval=1., diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py index 92bbd647b0..bfc67111a7 100644 --- a/tests/trainer/test_trainer_cli.py +++ b/tests/trainer/test_trainer_cli.py @@ -15,7 +15,7 @@ def test_default_args(tmpdir): tutils.reset_seed() # logger file to get meta - logger = tutils.get_default_testtube_logger(tmpdir, False) + logger = tutils.get_default_logger(tmpdir) parser = ArgumentParser(add_help=False) args = parser.parse_args()