default test logger (#1478)
* default test logger * fix tests * spawn * try * simplify tests * simplify tests * formatting * loggers * loggers * revert to TestTube * default * default * wraps * world size * optim imports
This commit is contained in:
parent
bafdeca42f
commit
c1c6e3b6c9
|
@ -102,10 +102,18 @@ class TrainerDataLoadingMixin(ABC):
|
|||
sampler = DistributedSampler(
|
||||
dataloader.dataset,
|
||||
num_replicas=xm.xrt_world_size(),
|
||||
rank=xm.get_ordinal()
|
||||
rank=xm.get_ordinal(),
|
||||
)
|
||||
else:
|
||||
sampler = DistributedSampler(dataloader.dataset)
|
||||
world_size = {
|
||||
'ddp': self.num_nodes * self.num_processes,
|
||||
'ddp2': self.num_nodes,
|
||||
}
|
||||
sampler = DistributedSampler(
|
||||
dataloader.dataset,
|
||||
num_replicas=world_size.get(self.distributed_backend, 0),
|
||||
rank=self.proc_rank,
|
||||
)
|
||||
|
||||
dl_args['sampler'] = sampler
|
||||
dataloader = type(dataloader)(**dl_args)
|
||||
|
|
|
@ -23,11 +23,7 @@ from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
|
|||
from pytorch_lightning.trainer.deprecated_api import TrainerDeprecatedAPITillVer0_8, TrainerDeprecatedAPITillVer0_9
|
||||
from pytorch_lightning.trainer.distrib_data_parallel import TrainerDDPMixin
|
||||
from pytorch_lightning.trainer.distrib_parts import (
|
||||
TrainerDPMixin,
|
||||
parse_gpu_ids,
|
||||
determine_root_gpu_device,
|
||||
pick_multiple_gpus,
|
||||
)
|
||||
TrainerDPMixin, parse_gpu_ids, determine_root_gpu_device, pick_multiple_gpus)
|
||||
from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin
|
||||
from pytorch_lightning.trainer.logging import TrainerLoggingMixin
|
||||
from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin
|
||||
|
@ -736,13 +732,10 @@ class Trainer(
|
|||
self.ddp_train(task, model)
|
||||
else:
|
||||
self.__set_random_port()
|
||||
|
||||
# track for predict
|
||||
self.model = model
|
||||
|
||||
# train
|
||||
mp.spawn(self.ddp_train, nprocs=self.num_processes, args=(model,))
|
||||
|
||||
# load weights if not interrupted
|
||||
self.load_spawn_weights(model)
|
||||
self.model = model
|
||||
|
|
|
@ -1,3 +1,18 @@
|
|||
import os
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
TEST_ROOT = os.path.dirname(__file__)
|
||||
PACKAGE_ROOT = os.path.dirname(TEST_ROOT)
|
||||
TEMP_PATH = os.path.join(PACKAGE_ROOT, 'test_temp')
|
||||
|
||||
# generate a list of random seeds for each test
|
||||
RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000))
|
||||
ROOT_SEED = 1234
|
||||
torch.manual_seed(ROOT_SEED)
|
||||
np.random.seed(ROOT_SEED)
|
||||
RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000))
|
||||
|
||||
if not os.path.isdir(TEMP_PATH):
|
||||
os.mkdir(TEMP_PATH)
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
import torch
|
||||
|
||||
from tests.base.models import TestModelBase, DictHparamsModel
|
||||
from tests.base.eval_model_template import EvalModelTemplate
|
||||
from tests.base.mixins import (
|
||||
LightEmptyTestStep,
|
||||
|
@ -31,6 +30,7 @@ from tests.base.mixins import (
|
|||
LightTestNoneOptimizerMixin,
|
||||
LightZeroLenDataloader
|
||||
)
|
||||
from tests.base.models import TestModelBase, DictHparamsModel
|
||||
|
||||
|
||||
class LightningTestModel(LightTrainDataloader,
|
||||
|
|
|
@ -7,10 +7,10 @@ import torch
|
|||
from torch import Tensor
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from tests import TEST_ROOT
|
||||
from tests import PACKAGE_ROOT
|
||||
|
||||
#: local path to test datasets
|
||||
PATH_DATASETS = os.path.join(TEST_ROOT, 'Datasets')
|
||||
PATH_DATASETS = os.path.join(PACKAGE_ROOT, 'Datasets')
|
||||
|
||||
|
||||
class MNIST(Dataset):
|
||||
|
|
|
@ -7,7 +7,7 @@ from tests.base.datasets import TrialMNIST
|
|||
|
||||
|
||||
# from test_models import assert_ok_test_acc, load_model, \
|
||||
# clear_save_dir, get_default_testtube_logger, get_default_hparams, init_save_dir, \
|
||||
# clear_save_dir, get_default_logger, get_default_hparams, init_save_dir, \
|
||||
# init_checkpoint_callback, reset_seed, set_random_master_port
|
||||
|
||||
|
||||
|
|
|
@ -2,18 +2,18 @@ import torch
|
|||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from tests.base.datasets import TrialMNIST
|
||||
from pytorch_lightning.core.lightning import LightningModule
|
||||
from tests.base.datasets import TrialMNIST
|
||||
from tests.base.eval_model_optimizers import ConfigureOptimizersPool
|
||||
from tests.base.eval_model_test_dataloaders import TestDataloaderVariations
|
||||
from tests.base.eval_model_test_epoch_ends import TestEpochEndVariations
|
||||
from tests.base.eval_model_test_steps import TestStepVariations
|
||||
from tests.base.eval_model_train_dataloaders import TrainDataloaderVariations
|
||||
from tests.base.eval_model_train_steps import TrainingStepVariations
|
||||
from tests.base.eval_model_utils import ModelTemplateUtils
|
||||
from tests.base.eval_model_valid_dataloaders import ValDataloaderVariations
|
||||
from tests.base.eval_model_valid_epoch_ends import ValidationEpochEndVariations
|
||||
from tests.base.eval_model_valid_steps import ValidationStepVariations
|
||||
from tests.base.eval_model_utils import ModelTemplateUtils
|
||||
|
||||
|
||||
class EvalModelTemplate(
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from torch.utils.data import DataLoader
|
||||
|
||||
from tests.base.datasets import TrialMNIST
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from abc import ABC
|
||||
from collections import OrderedDict
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import Dict
|
||||
|
||||
|
|
|
@ -7,18 +7,11 @@ import torch
|
|||
# from pl_examples import LightningTemplateModel
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import ModelCheckpoint
|
||||
from pytorch_lightning.loggers import TestTubeLogger, TensorBoardLogger
|
||||
from tests.base import LightningTestModel, EvalModelTemplate
|
||||
from pytorch_lightning.loggers import TensorBoardLogger
|
||||
from tests import TEMP_PATH, RANDOM_PORTS, RANDOM_SEEDS
|
||||
from tests.base import LightningTestModel
|
||||
from tests.base.datasets import PATH_DATASETS
|
||||
|
||||
# generate a list of random seeds for each test
|
||||
RANDOM_PORTS = list(np.random.randint(12000, 19000, 1000))
|
||||
ROOT_SEED = 1234
|
||||
torch.manual_seed(ROOT_SEED)
|
||||
np.random.seed(ROOT_SEED)
|
||||
RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000))
|
||||
ROOT_PATH = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
def assert_speed_parity(pl_times, pt_times, num_epochs):
|
||||
|
||||
|
@ -33,7 +26,7 @@ def assert_speed_parity(pl_times, pt_times, num_epochs):
|
|||
f"lightning was slower than PT (threshold {max_diff_per_epoch})"
|
||||
|
||||
|
||||
def run_model_test_no_loggers(trainer_options, model, min_acc=0.50):
|
||||
def run_model_test_without_loggers(trainer_options, model, min_acc=0.50):
|
||||
# save_dir = trainer_options['default_root_dir']
|
||||
|
||||
# fit model
|
||||
|
@ -66,14 +59,16 @@ def run_model_test(trainer_options, model, on_gpu=True):
|
|||
save_dir = trainer_options['default_root_dir']
|
||||
|
||||
# logger file to get meta
|
||||
logger = get_default_testtube_logger(save_dir, False)
|
||||
logger = get_default_logger(save_dir)
|
||||
|
||||
# logger file to get weights
|
||||
checkpoint = init_checkpoint_callback(logger)
|
||||
|
||||
# add these to the trainer options
|
||||
trainer_options['checkpoint_callback'] = checkpoint
|
||||
trainer_options['logger'] = logger
|
||||
trainer_options.update(
|
||||
checkpoint_callback=checkpoint,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
# fit model
|
||||
trainer = Trainer(**trainer_options)
|
||||
|
@ -118,8 +113,10 @@ def get_default_hparams(continue_training=False, hpc_exp_number=0):
|
|||
}
|
||||
|
||||
if continue_training:
|
||||
args['test_tube_do_checkpoint_load'] = True
|
||||
args['hpc_exp_number'] = hpc_exp_number
|
||||
args.update(
|
||||
test_tube_do_checkpoint_load=True,
|
||||
hpc_exp_number=hpc_exp_number,
|
||||
)
|
||||
|
||||
hparams = Namespace(**args)
|
||||
return hparams
|
||||
|
@ -137,9 +134,9 @@ def get_default_model(lbfgs=False):
|
|||
return model, hparams
|
||||
|
||||
|
||||
def get_default_testtube_logger(save_dir, debug=True, version=None):
|
||||
def get_default_logger(save_dir, version=None):
|
||||
# set up logger object without actually saving logs
|
||||
logger = TestTubeLogger(save_dir, name='lightning_logs', debug=debug, version=version)
|
||||
logger = TensorBoardLogger(save_dir, name='lightning_logs', version=version)
|
||||
return logger
|
||||
|
||||
|
||||
|
@ -153,7 +150,10 @@ def get_data_path(expt_logger, path_dir=None):
|
|||
return expt.get_data_path(name, version)
|
||||
# the other experiments...
|
||||
if not path_dir:
|
||||
path_dir = ROOT_PATH
|
||||
if hasattr(expt_logger, 'save_dir') and expt_logger.save_dir:
|
||||
path_dir = expt_logger.save_dir
|
||||
else:
|
||||
path_dir = TEMP_PATH
|
||||
path_expt = os.path.join(path_dir, name, 'version_%s' % version)
|
||||
# try if the new sub-folder exists, typical case for test-tube
|
||||
if not os.path.isdir(path_expt):
|
||||
|
@ -161,9 +161,9 @@ def get_data_path(expt_logger, path_dir=None):
|
|||
return path_expt
|
||||
|
||||
|
||||
def load_model(exp, root_weights_dir, module_class=LightningTestModel, path_expt=None):
|
||||
def load_model(logger, root_weights_dir, module_class=LightningTestModel, path_expt=None):
|
||||
# load trained model
|
||||
path_expt_dir = get_data_path(exp, path_dir=path_expt)
|
||||
path_expt_dir = get_data_path(logger, path_dir=path_expt)
|
||||
tags_path = os.path.join(path_expt_dir, TensorBoardLogger.NAME_CSV_TAGS)
|
||||
|
||||
checkpoints = [x for x in os.listdir(root_weights_dir) if '.ckpt' in x]
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
from functools import wraps
|
||||
|
||||
import pytest
|
||||
|
||||
import torch.multiprocessing as mp
|
||||
|
@ -7,10 +9,6 @@ def pytest_configure(config):
|
|||
config.addinivalue_line("markers", "spawn: spawn test in a separate process using torch.multiprocessing.spawn")
|
||||
|
||||
|
||||
def wrap(i, fn, args):
|
||||
return fn(*args)
|
||||
|
||||
|
||||
@pytest.mark.tryfirst
|
||||
def pytest_pyfunc_call(pyfuncitem):
|
||||
if pyfuncitem.get_closest_marker("spawn"):
|
||||
|
@ -18,5 +16,5 @@ def pytest_pyfunc_call(pyfuncitem):
|
|||
funcargs = pyfuncitem.funcargs
|
||||
testargs = tuple([funcargs[arg] for arg in pyfuncitem._fixtureinfo.argnames])
|
||||
|
||||
mp.spawn(wrap, (testfunction, testargs))
|
||||
mp.spawn(wraps, (testfunction, testargs))
|
||||
return True
|
||||
|
|
|
@ -7,7 +7,6 @@ import tests.base.utils as tutils
|
|||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.loggers import (
|
||||
TensorBoardLogger, MLFlowLogger, NeptuneLogger, TestTubeLogger, CometLogger)
|
||||
from tests.base import LightningTestModel
|
||||
|
||||
|
||||
def _get_logger_args(logger_class, save_dir):
|
||||
|
|
|
@ -11,66 +11,54 @@ from tests.base import (
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
|
||||
def test_amp_single_gpu(tmpdir):
|
||||
"""Make sure DDP + AMP work."""
|
||||
tutils.reset_seed()
|
||||
|
||||
hparams = tutils.get_default_hparams()
|
||||
model = LightningTestModel(hparams)
|
||||
|
||||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
max_epochs=1,
|
||||
gpus=1,
|
||||
distributed_backend='ddp',
|
||||
precision=16
|
||||
)
|
||||
|
||||
tutils.run_model_test(trainer_options, model)
|
||||
|
||||
|
||||
@pytest.mark.spawn
|
||||
@pytest.mark.parametrize("backend", ['dp', 'ddp'])
|
||||
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
|
||||
def test_no_amp_single_gpu(tmpdir):
|
||||
"""Make sure DDP + AMP work."""
|
||||
def test_amp_single_gpu(tmpdir, backend):
|
||||
"""Make sure DP/DDP + AMP work."""
|
||||
tutils.reset_seed()
|
||||
|
||||
hparams = tutils.get_default_hparams()
|
||||
model = LightningTestModel(hparams)
|
||||
model, hparams = tutils.get_default_model()
|
||||
|
||||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
max_epochs=1,
|
||||
gpus=1,
|
||||
distributed_backend='dp',
|
||||
distributed_backend=backend,
|
||||
precision=16
|
||||
)
|
||||
|
||||
# tutils.run_model_test(trainer_options, model)
|
||||
|
||||
trainer = Trainer(**trainer_options)
|
||||
result = trainer.fit(model)
|
||||
|
||||
assert result == 1
|
||||
|
||||
|
||||
@pytest.mark.spawn
|
||||
@pytest.mark.parametrize("backend", ['dp', 'ddp'])
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_amp_gpu_ddp(tmpdir):
|
||||
"""Make sure DDP + AMP work."""
|
||||
def test_amp_multi_gpu(tmpdir, backend):
|
||||
"""Make sure DP/DDP + AMP work."""
|
||||
tutils.reset_seed()
|
||||
tutils.set_random_master_port()
|
||||
|
||||
hparams = tutils.get_default_hparams()
|
||||
model = LightningTestModel(hparams)
|
||||
model, hparams = tutils.get_default_model()
|
||||
|
||||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
max_epochs=1,
|
||||
gpus=2,
|
||||
distributed_backend='ddp',
|
||||
# gpus=2,
|
||||
gpus='0, 1', # test init with gpu string
|
||||
distributed_backend=backend,
|
||||
precision=16
|
||||
)
|
||||
|
||||
tutils.run_model_test(trainer_options, model)
|
||||
# tutils.run_model_test(trainer_options, model)
|
||||
trainer = Trainer(**trainer_options)
|
||||
result = trainer.fit(model)
|
||||
assert result
|
||||
|
||||
|
||||
@pytest.mark.spawn
|
||||
|
@ -94,7 +82,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir):
|
|||
)
|
||||
|
||||
# exp file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
# exp file to get weights
|
||||
checkpoint = tutils.init_checkpoint_callback(logger)
|
||||
|
@ -125,7 +113,6 @@ def test_cpu_model_with_amp(tmpdir):
|
|||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
progress_bar_refresh_rate=0,
|
||||
logger=tutils.get_default_testtube_logger(tmpdir),
|
||||
max_epochs=1,
|
||||
train_percent_check=0.4,
|
||||
val_percent_check=0.4,
|
||||
|
@ -136,28 +123,3 @@ def test_cpu_model_with_amp(tmpdir):
|
|||
|
||||
with pytest.raises((MisconfigurationException, ModuleNotFoundError)):
|
||||
tutils.run_model_test(trainer_options, model, on_gpu=False)
|
||||
|
||||
|
||||
@pytest.mark.spawn
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_amp_gpu_dp(tmpdir):
|
||||
"""Make sure DP + AMP work."""
|
||||
tutils.reset_seed()
|
||||
|
||||
model, hparams = tutils.get_default_model()
|
||||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
max_epochs=1,
|
||||
gpus='0, 1', # test init with gpu string
|
||||
distributed_backend='dp',
|
||||
precision=16
|
||||
)
|
||||
|
||||
trainer = Trainer(**trainer_options)
|
||||
result = trainer.fit(model)
|
||||
|
||||
assert result == 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import platform
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
@ -29,7 +28,6 @@ def test_early_stopping_cpu_model(tmpdir):
|
|||
gradient_clip_val=1.0,
|
||||
overfit_pct=0.20,
|
||||
track_grad_norm=2,
|
||||
logger=tutils.get_default_testtube_logger(tmpdir),
|
||||
train_percent_check=0.1,
|
||||
val_percent_check=0.1,
|
||||
)
|
||||
|
@ -42,6 +40,7 @@ def test_early_stopping_cpu_model(tmpdir):
|
|||
model.unfreeze()
|
||||
|
||||
|
||||
@pytest.mark.spawn
|
||||
@pytest.mark.skipif(platform.system() == "Windows",
|
||||
reason="Distributed training is not supported on Windows")
|
||||
@pytest.mark.skipif((platform.system() == "Darwin" and
|
||||
|
@ -81,7 +80,7 @@ def test_lbfgs_cpu_model(tmpdir):
|
|||
)
|
||||
|
||||
model, hparams = tutils.get_default_model(lbfgs=True)
|
||||
tutils.run_model_test_no_loggers(trainer_options, model, min_acc=0.5)
|
||||
tutils.run_model_test_without_loggers(trainer_options, model, min_acc=0.5)
|
||||
|
||||
|
||||
def test_default_logger_callbacks_cpu_model(tmpdir):
|
||||
|
@ -99,7 +98,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir):
|
|||
)
|
||||
|
||||
model, hparams = tutils.get_default_model()
|
||||
tutils.run_model_test_no_loggers(trainer_options, model)
|
||||
tutils.run_model_test_without_loggers(trainer_options, model)
|
||||
|
||||
# test freeze on cpu
|
||||
model.freeze()
|
||||
|
@ -114,7 +113,7 @@ def test_running_test_after_fitting(tmpdir):
|
|||
model = LightningTestModel(hparams)
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
# logger file to get weights
|
||||
checkpoint = tutils.init_checkpoint_callback(logger)
|
||||
|
@ -142,7 +141,7 @@ def test_running_test_after_fitting(tmpdir):
|
|||
tutils.assert_ok_model_acc(trainer, thr=0.5)
|
||||
|
||||
|
||||
def test_running_test_without_val(tmpdir):
|
||||
def test_running_test_no_val(tmpdir):
|
||||
"""Verify `test()` works on a model with no `val_loader`."""
|
||||
tutils.reset_seed()
|
||||
|
||||
|
@ -153,7 +152,7 @@ def test_running_test_without_val(tmpdir):
|
|||
model = CurrentTestModel(hparams)
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
# logger file to get weights
|
||||
checkpoint = tutils.init_checkpoint_callback(logger)
|
||||
|
@ -253,7 +252,6 @@ def test_cpu_model(tmpdir):
|
|||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
progress_bar_refresh_rate=0,
|
||||
logger=tutils.get_default_testtube_logger(tmpdir),
|
||||
max_epochs=1,
|
||||
train_percent_check=0.4,
|
||||
val_percent_check=0.4
|
||||
|
@ -274,7 +272,6 @@ def test_all_features_cpu_model(tmpdir):
|
|||
overfit_pct=0.20,
|
||||
track_grad_norm=2,
|
||||
progress_bar_refresh_rate=0,
|
||||
logger=tutils.get_default_testtube_logger(tmpdir),
|
||||
accumulate_grad_batches=2,
|
||||
max_epochs=1,
|
||||
train_percent_check=0.4,
|
||||
|
|
|
@ -7,39 +7,17 @@ import tests.base.utils as tutils
|
|||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import ModelCheckpoint
|
||||
from pytorch_lightning.core import memory
|
||||
from pytorch_lightning.trainer.distrib_parts import (
|
||||
parse_gpu_ids,
|
||||
determine_root_gpu_device,
|
||||
)
|
||||
from pytorch_lightning.trainer.distrib_parts import parse_gpu_ids, determine_root_gpu_device
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from tests.base import LightningTestModel
|
||||
|
||||
PRETEND_N_OF_GPUS = 16
|
||||
|
||||
|
||||
@pytest.mark.spawn
|
||||
@pytest.mark.parametrize("backend", ['dp', 'ddp', 'ddp2'])
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_multi_gpu_model_ddp2(tmpdir):
|
||||
"""Make sure DDP2 works."""
|
||||
|
||||
tutils.reset_seed()
|
||||
tutils.set_random_master_port()
|
||||
|
||||
model, hparams = tutils.get_default_model()
|
||||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
max_epochs=1,
|
||||
train_percent_check=0.4,
|
||||
val_percent_check=0.2,
|
||||
gpus=2,
|
||||
weights_summary=None,
|
||||
distributed_backend='ddp2'
|
||||
)
|
||||
|
||||
tutils.run_model_test(trainer_options, model)
|
||||
|
||||
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_multi_gpu_model_ddp(tmpdir):
|
||||
def test_multi_gpu_model(tmpdir, backend):
|
||||
"""Make sure DDP works."""
|
||||
|
||||
tutils.reset_seed()
|
||||
|
@ -48,15 +26,20 @@ def test_multi_gpu_model_ddp(tmpdir):
|
|||
model, hparams = tutils.get_default_model()
|
||||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
progress_bar_refresh_rate=0,
|
||||
max_epochs=1,
|
||||
train_percent_check=0.4,
|
||||
val_percent_check=0.2,
|
||||
gpus=[0, 1],
|
||||
distributed_backend='ddp'
|
||||
distributed_backend=backend,
|
||||
)
|
||||
|
||||
tutils.run_model_test(trainer_options, model)
|
||||
# tutils.run_model_test(trainer_options, model)
|
||||
trainer = Trainer(**trainer_options)
|
||||
result = trainer.fit(model)
|
||||
assert result
|
||||
|
||||
# test memory helper functions
|
||||
memory.get_memory_profile('min_max')
|
||||
|
||||
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
|
@ -91,7 +74,7 @@ def test_cpu_slurm_save_load(tmpdir):
|
|||
model = LightningTestModel(hparams)
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
version = logger.version
|
||||
|
||||
trainer_options = dict(
|
||||
|
@ -106,7 +89,7 @@ def test_cpu_slurm_save_load(tmpdir):
|
|||
real_global_step = trainer.global_step
|
||||
|
||||
# traning complete
|
||||
assert result == 1, 'amp + ddp model failed to complete'
|
||||
assert result == 1, 'cpu model failed to complete'
|
||||
|
||||
# predict with trained model before saving
|
||||
# make a prediction
|
||||
|
@ -130,7 +113,7 @@ def test_cpu_slurm_save_load(tmpdir):
|
|||
assert os.path.exists(saved_filepath)
|
||||
|
||||
# new logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False, version=version)
|
||||
logger = tutils.get_default_logger(tmpdir, version=version)
|
||||
|
||||
trainer_options = dict(
|
||||
max_epochs=1,
|
||||
|
@ -175,28 +158,6 @@ def test_multi_gpu_none_backend(tmpdir):
|
|||
tutils.run_model_test(trainer_options, model)
|
||||
|
||||
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_multi_gpu_model_dp(tmpdir):
|
||||
"""Make sure DP works."""
|
||||
tutils.reset_seed()
|
||||
|
||||
model, hparams = tutils.get_default_model()
|
||||
trainer_options = dict(
|
||||
default_root_dir=tmpdir,
|
||||
progress_bar_refresh_rate=0,
|
||||
distributed_backend='dp',
|
||||
max_epochs=1,
|
||||
train_percent_check=0.1,
|
||||
val_percent_check=0.1,
|
||||
gpus='-1'
|
||||
)
|
||||
|
||||
tutils.run_model_test(trainer_options, model)
|
||||
|
||||
# test memory helper functions
|
||||
memory.get_memory_profile('min_max')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mocked_device_count(monkeypatch):
|
||||
def device_count():
|
||||
|
@ -249,21 +210,18 @@ def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distrib
|
|||
|
||||
|
||||
@pytest.mark.gpus_param_tests
|
||||
@pytest.mark.parametrize([
|
||||
'gpus', 'expected_root_gpu', "distributed_backend"], [
|
||||
@pytest.mark.parametrize(['gpus', 'expected_root_gpu', "distributed_backend"], [
|
||||
pytest.param(None, None, None, id="None is None"),
|
||||
pytest.param(None, None, "ddp", id="None is None"),
|
||||
pytest.param(0, None, "ddp", id="None is None"),
|
||||
])
|
||||
def test_root_gpu_property_0_passing(
|
||||
mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
|
||||
def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
|
||||
assert Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu == expected_root_gpu
|
||||
|
||||
|
||||
# Asking for a gpu when non are available will result in a MisconfigurationException
|
||||
@pytest.mark.gpus_param_tests
|
||||
@pytest.mark.parametrize([
|
||||
'gpus', 'expected_root_gpu', "distributed_backend"], [
|
||||
@pytest.mark.parametrize(['gpus', 'expected_root_gpu', "distributed_backend"], [
|
||||
pytest.param(1, None, "ddp"),
|
||||
pytest.param(3, None, "ddp"),
|
||||
pytest.param(3, None, "ddp"),
|
||||
|
@ -272,8 +230,7 @@ def test_root_gpu_property_0_passing(
|
|||
pytest.param(-1, None, "ddp"),
|
||||
pytest.param('-1', None, "ddp")
|
||||
])
|
||||
def test_root_gpu_property_0_raising(
|
||||
mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
|
||||
def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
|
||||
with pytest.raises(MisconfigurationException):
|
||||
Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu
|
||||
|
||||
|
@ -325,11 +282,10 @@ def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
|
|||
|
||||
|
||||
@pytest.mark.gpus_param_tests
|
||||
@pytest.mark.parametrize("gpus", [''])
|
||||
def test_parse_gpu_fail_on_empty_string(mocked_device_count, gpus):
|
||||
def test_parse_gpu_fail_on_empty_string(mocked_device_count):
|
||||
# This currently results in a ValueError instead of MisconfigurationException
|
||||
with pytest.raises(ValueError):
|
||||
parse_gpu_ids(gpus)
|
||||
parse_gpu_ids('')
|
||||
|
||||
|
||||
@pytest.mark.gpus_param_tests
|
||||
|
@ -350,7 +306,3 @@ def test_parse_gpu_fail_on_non_existant_id_2(mocked_device_count):
|
|||
def test_parse_gpu_returns_None_when_no_devices_are_available(mocked_device_count_0, gpus):
|
||||
with pytest.raises(MisconfigurationException):
|
||||
parse_gpu_ids(gpus)
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# pytest.main([__file__])
|
||||
|
|
|
@ -16,8 +16,10 @@ from tests.base import (
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.spawn
|
||||
@pytest.mark.parametrize("backend", ['dp', 'ddp'])
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_running_test_pretrained_model_ddp(tmpdir):
|
||||
def test_running_test_pretrained_model_distrib(tmpdir, backend):
|
||||
"""Verify `test()` on pretrained model."""
|
||||
|
||||
tutils.reset_seed()
|
||||
|
@ -27,20 +29,20 @@ def test_running_test_pretrained_model_ddp(tmpdir):
|
|||
model = LightningTestModel(hparams)
|
||||
|
||||
# exp file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
# exp file to get weights
|
||||
checkpoint = tutils.init_checkpoint_callback(logger)
|
||||
|
||||
trainer_options = dict(
|
||||
progress_bar_refresh_rate=0,
|
||||
max_epochs=1,
|
||||
max_epochs=2,
|
||||
train_percent_check=0.4,
|
||||
val_percent_check=0.2,
|
||||
checkpoint_callback=checkpoint,
|
||||
logger=logger,
|
||||
gpus=[0, 1],
|
||||
distributed_backend='ddp'
|
||||
distributed_backend=backend,
|
||||
)
|
||||
|
||||
# fit model
|
||||
|
@ -59,6 +61,9 @@ def test_running_test_pretrained_model_ddp(tmpdir):
|
|||
new_trainer = Trainer(**trainer_options)
|
||||
new_trainer.test(pretrained_model)
|
||||
|
||||
# test we have good test accuracy
|
||||
tutils.assert_ok_model_acc(new_trainer)
|
||||
|
||||
dataloaders = model.test_dataloader()
|
||||
if not isinstance(dataloaders, list):
|
||||
dataloaders = [dataloaders]
|
||||
|
@ -67,7 +72,7 @@ def test_running_test_pretrained_model_ddp(tmpdir):
|
|||
tutils.run_prediction(dataloader, pretrained_model)
|
||||
|
||||
|
||||
def test_running_test_pretrained_model(tmpdir):
|
||||
def test_running_test_pretrained_model_cpu(tmpdir):
|
||||
"""Verify test() on pretrained model."""
|
||||
tutils.reset_seed()
|
||||
|
||||
|
@ -75,7 +80,7 @@ def test_running_test_pretrained_model(tmpdir):
|
|||
model = LightningTestModel(hparams)
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
# logger file to get weights
|
||||
checkpoint = tutils.init_checkpoint_callback(logger)
|
||||
|
@ -119,7 +124,6 @@ def test_load_model_from_checkpoint(tmpdir):
|
|||
train_percent_check=0.4,
|
||||
val_percent_check=0.2,
|
||||
checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1),
|
||||
logger=False,
|
||||
default_root_dir=tmpdir,
|
||||
)
|
||||
|
||||
|
@ -150,47 +154,6 @@ def test_load_model_from_checkpoint(tmpdir):
|
|||
tutils.assert_ok_model_acc(new_trainer)
|
||||
|
||||
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_running_test_pretrained_model_dp(tmpdir):
|
||||
"""Verify test() on pretrained model."""
|
||||
tutils.reset_seed()
|
||||
|
||||
hparams = tutils.get_default_hparams()
|
||||
model = LightningTestModel(hparams)
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
|
||||
# logger file to get weights
|
||||
checkpoint = tutils.init_checkpoint_callback(logger)
|
||||
|
||||
trainer_options = dict(
|
||||
max_epochs=2,
|
||||
train_percent_check=0.4,
|
||||
val_percent_check=0.2,
|
||||
checkpoint_callback=checkpoint,
|
||||
logger=logger,
|
||||
gpus=[0, 1],
|
||||
distributed_backend='dp'
|
||||
)
|
||||
|
||||
# fit model
|
||||
trainer = Trainer(**trainer_options)
|
||||
result = trainer.fit(model)
|
||||
|
||||
# correct result and ok accuracy
|
||||
assert result == 1, 'training failed to complete'
|
||||
pretrained_model = tutils.load_model(logger,
|
||||
trainer.checkpoint_callback.dirpath,
|
||||
module_class=LightningTestModel)
|
||||
|
||||
new_trainer = Trainer(**trainer_options)
|
||||
new_trainer.test(pretrained_model)
|
||||
|
||||
# test we have good test accuracy
|
||||
tutils.assert_ok_model_acc(new_trainer)
|
||||
|
||||
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_dp_resume(tmpdir):
|
||||
"""Make sure DP continues training correctly."""
|
||||
|
@ -207,7 +170,7 @@ def test_dp_resume(tmpdir):
|
|||
)
|
||||
|
||||
# get logger
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, debug=False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
# exp file to get weights
|
||||
# logger file to get weights
|
||||
|
@ -235,7 +198,7 @@ def test_dp_resume(tmpdir):
|
|||
trainer.hpc_save(tmpdir, logger)
|
||||
|
||||
# init new trainer
|
||||
new_logger = tutils.get_default_testtube_logger(tmpdir, version=logger.version)
|
||||
new_logger = tutils.get_default_logger(tmpdir, version=logger.version)
|
||||
trainer_options['logger'] = new_logger
|
||||
trainer_options['checkpoint_callback'] = ModelCheckpoint(tmpdir)
|
||||
trainer_options['train_percent_check'] = 0.5
|
||||
|
@ -275,7 +238,7 @@ def test_model_saving_loading(tmpdir):
|
|||
model = LightningTestModel(hparams)
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
trainer_options = dict(
|
||||
max_epochs=1,
|
||||
|
@ -356,7 +319,3 @@ def test_load_model_with_missing_hparams(tmpdir):
|
|||
# warn if user's model has hparams argument
|
||||
with pytest.warns(UserWarning, match=r".*Will pass in an empty Namespace instead."):
|
||||
LightningTestModelWithUnusedHyperparametersArg.load_from_checkpoint(last_checkpoint)
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# pytest.main([__file__])
|
||||
|
|
|
@ -63,7 +63,7 @@ def test_simple_profiler_iterable_durations(simple_profiler, action, expected):
|
|||
"""Ensure the reported durations are reasonably accurate."""
|
||||
iterable = _sleep_generator(expected)
|
||||
|
||||
for duration in simple_profiler.profile_iterable(iterable, action):
|
||||
for _ in simple_profiler.profile_iterable(iterable, action):
|
||||
pass
|
||||
|
||||
# we exclude the last item in the recorded durations since that's when StopIteration is raised
|
||||
|
@ -135,7 +135,7 @@ def test_advanced_profiler_iterable_durations(advanced_profiler, action, expecte
|
|||
"""Ensure the reported durations are reasonably accurate."""
|
||||
iterable = _sleep_generator(expected)
|
||||
|
||||
for duration in advanced_profiler.profile_iterable(iterable, action):
|
||||
for _ in advanced_profiler.profile_iterable(iterable, action):
|
||||
pass
|
||||
|
||||
recored_total_duration = _get_python_cprofile_total_duration(
|
||||
|
|
|
@ -153,7 +153,7 @@ def test_trainer_callback_system(tmpdir):
|
|||
assert test_callback.on_test_end_called
|
||||
|
||||
|
||||
def test_early_stopping_without_val_step(tmpdir):
|
||||
def test_early_stopping_no_val_step(tmpdir):
|
||||
"""Test that early stopping callback falls back to training metrics when no validation defined."""
|
||||
tutils.reset_seed()
|
||||
|
||||
|
|
|
@ -7,12 +7,9 @@ from tests.base import EvalModelTemplate
|
|||
from tests.base import (
|
||||
TestModelBase,
|
||||
LightValidationDataloader,
|
||||
LightTestDataloader,
|
||||
LightValidationStepMixin,
|
||||
LightValStepFitSingleDataloaderMixin,
|
||||
LightTrainDataloader,
|
||||
LightTestStepMixin,
|
||||
LightTestFitMultipleTestDataloadersMixin,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@ from tests.base import (
|
|||
LightValStepFitMultipleDataloadersMixin,
|
||||
LightValStepFitSingleDataloaderMixin,
|
||||
LightTrainDataloader,
|
||||
LightValidationDataloader,
|
||||
LightInfTrainDataloader,
|
||||
LightInfValDataloader,
|
||||
LightInfTestDataloader,
|
||||
|
@ -540,23 +539,17 @@ def test_dataloader_reinit_for_subclass():
|
|||
batch_sampler=None, num_workers=0, collate_fn=None,
|
||||
pin_memory=False, drop_last=False, timeout=0,
|
||||
worker_init_fn=None, dummy_kwarg=None):
|
||||
super().__init__(dataset,
|
||||
batch_size,
|
||||
shuffle,
|
||||
sampler,
|
||||
batch_sampler,
|
||||
num_workers,
|
||||
collate_fn,
|
||||
pin_memory,
|
||||
drop_last,
|
||||
timeout,
|
||||
super().__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
|
||||
num_workers, collate_fn, pin_memory, drop_last, timeout,
|
||||
worker_init_fn)
|
||||
|
||||
self.dummy_kwarg = dummy_kwarg
|
||||
|
||||
trainer = Trainer(gpus=[0, 1],
|
||||
num_nodes=1,
|
||||
distributed_backend='ddp')
|
||||
trainer = Trainer(
|
||||
gpus=[0, 1],
|
||||
num_nodes=1,
|
||||
distributed_backend='ddp',
|
||||
)
|
||||
|
||||
class CustomDummyObj:
|
||||
sampler = None
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import pytest
|
||||
|
||||
import torch
|
||||
|
||||
import tests.base.utils as tutils
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
|
|
|
@ -3,7 +3,6 @@ import torch
|
|||
|
||||
import tests.base.utils as tutils
|
||||
from pytorch_lightning import Trainer
|
||||
|
||||
from tests.base import (
|
||||
TestModelBase,
|
||||
LightTrainDataloader,
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
import glob
|
||||
import math
|
||||
import os
|
||||
from argparse import Namespace, ArgumentParser
|
||||
from argparse import Namespace
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
import tests.base.utils as tutils
|
||||
from pytorch_lightning import Callback
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
|
||||
from pytorch_lightning import Callback
|
||||
from pytorch_lightning.core.lightning import load_hparams_from_tags_csv
|
||||
from pytorch_lightning.trainer.logging import TrainerLoggingMixin
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
|
@ -60,7 +60,7 @@ def test_no_val_module(tmpdir):
|
|||
model = CurrentTestModel(hparams)
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
trainer_options = dict(
|
||||
max_epochs=1,
|
||||
|
@ -100,7 +100,7 @@ def test_no_val_end_module(tmpdir):
|
|||
model = CurrentTestModel(hparams)
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
trainer_options = dict(
|
||||
max_epochs=1,
|
||||
|
@ -211,7 +211,7 @@ def test_loading_meta_tags(tmpdir):
|
|||
hparams = tutils.get_default_hparams()
|
||||
|
||||
# save tags
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0))
|
||||
logger.log_hyperparams(hparams)
|
||||
logger.save()
|
||||
|
@ -335,7 +335,6 @@ def test_resume_from_checkpoint_epoch_restored(tmpdir):
|
|||
train_percent_check=0.65,
|
||||
val_percent_check=1,
|
||||
checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1),
|
||||
logger=False,
|
||||
default_root_dir=tmpdir,
|
||||
early_stop_callback=False,
|
||||
val_check_interval=1.,
|
||||
|
|
|
@ -15,7 +15,7 @@ def test_default_args(tmpdir):
|
|||
tutils.reset_seed()
|
||||
|
||||
# logger file to get meta
|
||||
logger = tutils.get_default_testtube_logger(tmpdir, False)
|
||||
logger = tutils.get_default_logger(tmpdir)
|
||||
|
||||
parser = ArgumentParser(add_help=False)
|
||||
args = parser.parse_args()
|
||||
|
|
Loading…
Reference in New Issue