added test seeds (#306)

* added test seeds

* added test seeds

* updated docs
This commit is contained in:
William Falcon 2019-10-05 10:56:52 -04:00 committed by GitHub
parent 2ac9f1aea7
commit c9786cdef1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 102 additions and 8 deletions

View File

@ -3,15 +3,20 @@ import pickle
import shutil
import numpy as np
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.testing import LightningTestModel
from .test_models import get_hparams, get_test_tube_logger, init_save_dir, clear_save_dir
RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000))
def test_testtube_logger():
"""verify that basic functionality of test tube logger works"""
"""
verify that basic functionality of test tube logger works
"""
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -35,7 +40,11 @@ def test_testtube_logger():
def test_testtube_pickle():
"""Verify that pickling a trainer containing a test tube logger works"""
"""
Verify that pickling a trainer containing a test tube logger works
"""
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -58,7 +67,11 @@ def test_testtube_pickle():
def test_mlflow_logger():
"""verify that basic functionality of mlflow logger works"""
"""
verify that basic functionality of mlflow logger works
"""
reset_seed()
try:
from pytorch_lightning.logging import MLFlowLogger
except ModuleNotFoundError:
@ -90,7 +103,11 @@ def test_mlflow_logger():
def test_mlflow_pickle():
"""verify that pickling trainer with mlflow logger works"""
"""
verify that pickling trainer with mlflow logger works
"""
reset_seed()
try:
from pytorch_lightning.logging import MLFlowLogger
except ModuleNotFoundError:
@ -115,3 +132,9 @@ def test_mlflow_pickle():
pkl_bytes = pickle.dumps(trainer)
trainer2 = pickle.loads(pkl_bytes)
trainer2.logger.log_metrics({"acc": 1.0})
def reset_seed():
SEED = RANDOM_SEEDS.pop()
torch.manual_seed(SEED)
np.random.seed(SEED)

View File

@ -31,9 +31,7 @@ from pytorch_lightning.trainer import trainer_io
from pytorch_lightning.logging import TestTubeLogger
from examples import LightningTemplateModel
SEED = 2334
torch.manual_seed(SEED)
np.random.seed(SEED)
RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000))
# ------------------------------------------------------------------------
@ -44,6 +42,9 @@ def test_default_logger_callbacks_cpu_model():
Test each of the trainer options
:return:
"""
reset_seed()
trainer_options = dict(
max_nb_epochs=1,
gradient_clip_val=1.0,
@ -93,6 +94,9 @@ def test_multi_gpu_model_ddp2():
return
os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0])
reset_seed()
model, hparams = get_model()
trainer_options = dict(
show_progress_bar=True,
@ -115,6 +119,8 @@ def test_dp_resume():
if not can_run_gpu_test():
return
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -195,6 +201,8 @@ def test_running_test_pretrained_model_ddp():
if not can_run_gpu_test():
return
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -240,6 +248,8 @@ def test_running_test_pretrained_model_ddp():
def test_running_test_after_fitting():
"""Verify test() on fitted model"""
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -278,6 +288,8 @@ def test_running_test_after_fitting():
def test_running_test_without_val():
reset_seed()
"""Verify test() works on a model with no val_loader"""
class CurrentTestModel(LightningTestMixin, LightningTestModelBase):
pass
@ -319,6 +331,8 @@ def test_running_test_without_val():
def test_running_test_pretrained_model():
reset_seed()
"""Verify test() on pretrained model"""
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -361,6 +375,8 @@ def test_running_test_pretrained_model():
def test_running_test_pretrained_model_dp():
reset_seed()
"""Verify test() on pretrained model"""
if not can_run_gpu_test():
return
@ -407,6 +423,8 @@ def test_running_test_pretrained_model_dp():
def test_gradient_accumulation_scheduling():
reset_seed()
"""
Test grad accumulation by the freq of optimizer updates
"""
@ -487,6 +505,8 @@ def test_multi_gpu_model_ddp():
return
os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0])
reset_seed()
model, hparams = get_model()
trainer_options = dict(
show_progress_bar=False,
@ -501,6 +521,7 @@ def test_multi_gpu_model_ddp():
def test_optimizer_return_options():
reset_seed()
trainer = Trainer()
model, hparams = get_model()
@ -531,6 +552,8 @@ def test_optimizer_return_options():
def test_single_gpu_batch_parse():
reset_seed()
if not can_run_gpu_test():
return
@ -577,6 +600,7 @@ def test_early_stopping_cpu_model():
Test each of the trainer options
:return:
"""
reset_seed()
stopping = EarlyStopping(monitor='val_loss')
trainer_options = dict(
@ -604,6 +628,8 @@ def test_no_val_module():
Tests use case where trainer saves the model, and user loads it from tags independently
:return:
"""
reset_seed()
hparams = get_hparams()
class CurrentTestModel(LightningTestModelBase):
@ -650,6 +676,7 @@ def test_no_val_end_module():
Tests use case where trainer saves the model, and user loads it from tags independently
:return:
"""
reset_seed()
class CurrentTestModel(LightningValidationStepMixin, LightningTestModelBase):
pass
@ -696,6 +723,8 @@ def test_simple_cpu():
Verify continue training session on CPU
:return:
"""
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -723,6 +752,8 @@ def test_amp_single_gpu():
Make sure DDP + AMP work
:return:
"""
reset_seed()
if not torch.cuda.is_available():
warnings.warn('test_amp_gpu_ddp cannot run.'
'Rerun on a GPU node to run this test')
@ -751,6 +782,8 @@ def test_no_amp_single_gpu():
Make sure DDP + AMP work
:return:
"""
reset_seed()
if not torch.cuda.is_available():
warnings.warn('test_amp_gpu_ddp cannot run.'
'Rerun on a GPU node to run this test')
@ -780,6 +813,8 @@ def test_cpu_restore_training():
Verify continue training session on CPU
:return:
"""
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -852,6 +887,8 @@ def test_amp_gpu_ddp():
os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0])
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -871,6 +908,8 @@ def test_cpu_slurm_save_load():
Verify model save/load/checkpoint on CPU
:return:
"""
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -946,6 +985,8 @@ def test_cpu_slurm_save_load():
def test_loading_meta_tags():
reset_seed()
from argparse import Namespace
hparams = get_hparams()
@ -967,6 +1008,7 @@ def test_loading_meta_tags():
def test_dp_output_reduce():
reset_seed()
# test identity when we have a single gpu
out = torch.rand(3, 1)
@ -992,6 +1034,8 @@ def test_model_saving_loading():
Tests use case where trainer saves the model, and user loads it from tags independently
:return:
"""
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -1047,6 +1091,8 @@ def test_model_saving_loading():
def test_model_freeze_unfreeze():
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -1066,6 +1112,8 @@ def test_amp_gpu_ddp_slurm_managed():
os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0])
os.environ['SLURM_LOCALID'] = str(0)
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams)
@ -1132,6 +1180,7 @@ def test_cpu_model_with_amp():
Make sure model trains on CPU
:return:
"""
reset_seed()
trainer_options = dict(
show_progress_bar=False,
@ -1153,6 +1202,7 @@ def test_cpu_model():
Make sure model trains on CPU
:return:
"""
reset_seed()
trainer_options = dict(
show_progress_bar=False,
@ -1172,6 +1222,7 @@ def test_all_features_cpu_model():
Test each of the trainer options
:return:
"""
reset_seed()
trainer_options = dict(
gradient_clip_val=1.0,
@ -1195,6 +1246,8 @@ def test_single_gpu_model():
Make sure single GPU works (DP mode)
:return:
"""
reset_seed()
if not torch.cuda.is_available():
warnings.warn('test_single_gpu_model cannot run.'
' Rerun on a GPU node to run this test')
@ -1218,6 +1271,8 @@ def test_multi_gpu_none_backend():
distributed_backend = None
:return:
"""
reset_seed()
if not can_run_gpu_test():
return
@ -1239,6 +1294,8 @@ def test_multi_gpu_model_dp():
Make sure DP works
:return:
"""
reset_seed()
if not can_run_gpu_test():
return
@ -1263,6 +1320,8 @@ def test_amp_gpu_dp():
Make sure DP + AMP work
:return:
"""
reset_seed()
if not can_run_gpu_test():
return
@ -1287,6 +1346,8 @@ def test_ddp_sampler_error():
os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0])
reset_seed()
hparams = get_hparams()
model = LightningTestModel(hparams, force_remove_distributed_sampler=True)
@ -1313,6 +1374,8 @@ def test_multiple_val_dataloader():
Verify multiple val_dataloader
:return:
"""
reset_seed()
class CurrentTestModel(
LightningValidationMultipleDataloadersMixin,
LightningTestModelBase
@ -1348,6 +1411,8 @@ def test_multiple_test_dataloader():
Verify multiple test_dataloader
:return:
"""
reset_seed()
class CurrentTestModel(
LightningTestMultipleDataloadersMixin,
LightningTestModelBase
@ -1577,5 +1642,11 @@ def can_run_gpu_test():
return True
def reset_seed():
SEED = RANDOM_SEEDS.pop()
torch.manual_seed(SEED)
np.random.seed(SEED)
if __name__ == '__main__':
pytest.main([__file__])