From c9786cdef1bcb4f574f3f221e7b950f1902a7a0a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 5 Oct 2019 10:56:52 -0400 Subject: [PATCH] added test seeds (#306) * added test seeds * added test seeds * updated docs --- tests/test_logging.py | 33 ++++++++++++++++--- tests/test_models.py | 77 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 8 deletions(-) diff --git a/tests/test_logging.py b/tests/test_logging.py index 54f76e1d4f..b50f87f240 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -3,15 +3,20 @@ import pickle import shutil import numpy as np +import torch from pytorch_lightning import Trainer from pytorch_lightning.testing import LightningTestModel - from .test_models import get_hparams, get_test_tube_logger, init_save_dir, clear_save_dir +RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000)) + def test_testtube_logger(): - """verify that basic functionality of test tube logger works""" + """ + verify that basic functionality of test tube logger works + """ + reset_seed() hparams = get_hparams() model = LightningTestModel(hparams) @@ -35,7 +40,11 @@ def test_testtube_logger(): def test_testtube_pickle(): - """Verify that pickling a trainer containing a test tube logger works""" + """ + Verify that pickling a trainer containing a test tube logger works + """ + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -58,7 +67,11 @@ def test_testtube_pickle(): def test_mlflow_logger(): - """verify that basic functionality of mlflow logger works""" + """ + verify that basic functionality of mlflow logger works + """ + reset_seed() + try: from pytorch_lightning.logging import MLFlowLogger except ModuleNotFoundError: @@ -90,7 +103,11 @@ def test_mlflow_logger(): def test_mlflow_pickle(): - """verify that pickling trainer with mlflow logger works""" + """ + verify that pickling trainer with mlflow logger works + """ + reset_seed() + try: from pytorch_lightning.logging import MLFlowLogger except ModuleNotFoundError: @@ -115,3 +132,9 @@ def test_mlflow_pickle(): pkl_bytes = pickle.dumps(trainer) trainer2 = pickle.loads(pkl_bytes) trainer2.logger.log_metrics({"acc": 1.0}) + + +def reset_seed(): + SEED = RANDOM_SEEDS.pop() + torch.manual_seed(SEED) + np.random.seed(SEED) diff --git a/tests/test_models.py b/tests/test_models.py index 5cc290c2c2..b2aa539182 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -31,9 +31,7 @@ from pytorch_lightning.trainer import trainer_io from pytorch_lightning.logging import TestTubeLogger from examples import LightningTemplateModel -SEED = 2334 -torch.manual_seed(SEED) -np.random.seed(SEED) +RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000)) # ------------------------------------------------------------------------ @@ -44,6 +42,9 @@ def test_default_logger_callbacks_cpu_model(): Test each of the trainer options :return: """ + + reset_seed() + trainer_options = dict( max_nb_epochs=1, gradient_clip_val=1.0, @@ -93,6 +94,9 @@ def test_multi_gpu_model_ddp2(): return os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0]) + + reset_seed() + model, hparams = get_model() trainer_options = dict( show_progress_bar=True, @@ -115,6 +119,8 @@ def test_dp_resume(): if not can_run_gpu_test(): return + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -195,6 +201,8 @@ def test_running_test_pretrained_model_ddp(): if not can_run_gpu_test(): return + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -240,6 +248,8 @@ def test_running_test_pretrained_model_ddp(): def test_running_test_after_fitting(): """Verify test() on fitted model""" + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -278,6 +288,8 @@ def test_running_test_after_fitting(): def test_running_test_without_val(): + reset_seed() + """Verify test() works on a model with no val_loader""" class CurrentTestModel(LightningTestMixin, LightningTestModelBase): pass @@ -319,6 +331,8 @@ def test_running_test_without_val(): def test_running_test_pretrained_model(): + reset_seed() + """Verify test() on pretrained model""" hparams = get_hparams() model = LightningTestModel(hparams) @@ -361,6 +375,8 @@ def test_running_test_pretrained_model(): def test_running_test_pretrained_model_dp(): + reset_seed() + """Verify test() on pretrained model""" if not can_run_gpu_test(): return @@ -407,6 +423,8 @@ def test_running_test_pretrained_model_dp(): def test_gradient_accumulation_scheduling(): + reset_seed() + """ Test grad accumulation by the freq of optimizer updates """ @@ -487,6 +505,8 @@ def test_multi_gpu_model_ddp(): return os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0]) + reset_seed() + model, hparams = get_model() trainer_options = dict( show_progress_bar=False, @@ -501,6 +521,7 @@ def test_multi_gpu_model_ddp(): def test_optimizer_return_options(): + reset_seed() trainer = Trainer() model, hparams = get_model() @@ -531,6 +552,8 @@ def test_optimizer_return_options(): def test_single_gpu_batch_parse(): + reset_seed() + if not can_run_gpu_test(): return @@ -577,6 +600,7 @@ def test_early_stopping_cpu_model(): Test each of the trainer options :return: """ + reset_seed() stopping = EarlyStopping(monitor='val_loss') trainer_options = dict( @@ -604,6 +628,8 @@ def test_no_val_module(): Tests use case where trainer saves the model, and user loads it from tags independently :return: """ + reset_seed() + hparams = get_hparams() class CurrentTestModel(LightningTestModelBase): @@ -650,6 +676,7 @@ def test_no_val_end_module(): Tests use case where trainer saves the model, and user loads it from tags independently :return: """ + reset_seed() class CurrentTestModel(LightningValidationStepMixin, LightningTestModelBase): pass @@ -696,6 +723,8 @@ def test_simple_cpu(): Verify continue training session on CPU :return: """ + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -723,6 +752,8 @@ def test_amp_single_gpu(): Make sure DDP + AMP work :return: """ + reset_seed() + if not torch.cuda.is_available(): warnings.warn('test_amp_gpu_ddp cannot run.' 'Rerun on a GPU node to run this test') @@ -751,6 +782,8 @@ def test_no_amp_single_gpu(): Make sure DDP + AMP work :return: """ + reset_seed() + if not torch.cuda.is_available(): warnings.warn('test_amp_gpu_ddp cannot run.' 'Rerun on a GPU node to run this test') @@ -780,6 +813,8 @@ def test_cpu_restore_training(): Verify continue training session on CPU :return: """ + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -852,6 +887,8 @@ def test_amp_gpu_ddp(): os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0]) + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -871,6 +908,8 @@ def test_cpu_slurm_save_load(): Verify model save/load/checkpoint on CPU :return: """ + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -946,6 +985,8 @@ def test_cpu_slurm_save_load(): def test_loading_meta_tags(): + reset_seed() + from argparse import Namespace hparams = get_hparams() @@ -967,6 +1008,7 @@ def test_loading_meta_tags(): def test_dp_output_reduce(): + reset_seed() # test identity when we have a single gpu out = torch.rand(3, 1) @@ -992,6 +1034,8 @@ def test_model_saving_loading(): Tests use case where trainer saves the model, and user loads it from tags independently :return: """ + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -1047,6 +1091,8 @@ def test_model_saving_loading(): def test_model_freeze_unfreeze(): + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -1066,6 +1112,8 @@ def test_amp_gpu_ddp_slurm_managed(): os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0]) os.environ['SLURM_LOCALID'] = str(0) + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams) @@ -1132,6 +1180,7 @@ def test_cpu_model_with_amp(): Make sure model trains on CPU :return: """ + reset_seed() trainer_options = dict( show_progress_bar=False, @@ -1153,6 +1202,7 @@ def test_cpu_model(): Make sure model trains on CPU :return: """ + reset_seed() trainer_options = dict( show_progress_bar=False, @@ -1172,6 +1222,7 @@ def test_all_features_cpu_model(): Test each of the trainer options :return: """ + reset_seed() trainer_options = dict( gradient_clip_val=1.0, @@ -1195,6 +1246,8 @@ def test_single_gpu_model(): Make sure single GPU works (DP mode) :return: """ + reset_seed() + if not torch.cuda.is_available(): warnings.warn('test_single_gpu_model cannot run.' ' Rerun on a GPU node to run this test') @@ -1218,6 +1271,8 @@ def test_multi_gpu_none_backend(): distributed_backend = None :return: """ + reset_seed() + if not can_run_gpu_test(): return @@ -1239,6 +1294,8 @@ def test_multi_gpu_model_dp(): Make sure DP works :return: """ + reset_seed() + if not can_run_gpu_test(): return @@ -1263,6 +1320,8 @@ def test_amp_gpu_dp(): Make sure DP + AMP work :return: """ + reset_seed() + if not can_run_gpu_test(): return @@ -1287,6 +1346,8 @@ def test_ddp_sampler_error(): os.environ['MASTER_PORT'] = str(np.random.randint(12000, 19000, 1)[0]) + reset_seed() + hparams = get_hparams() model = LightningTestModel(hparams, force_remove_distributed_sampler=True) @@ -1313,6 +1374,8 @@ def test_multiple_val_dataloader(): Verify multiple val_dataloader :return: """ + reset_seed() + class CurrentTestModel( LightningValidationMultipleDataloadersMixin, LightningTestModelBase @@ -1348,6 +1411,8 @@ def test_multiple_test_dataloader(): Verify multiple test_dataloader :return: """ + reset_seed() + class CurrentTestModel( LightningTestMultipleDataloadersMixin, LightningTestModelBase @@ -1577,5 +1642,11 @@ def can_run_gpu_test(): return True +def reset_seed(): + SEED = RANDOM_SEEDS.pop() + torch.manual_seed(SEED) + np.random.seed(SEED) + + if __name__ == '__main__': pytest.main([__file__])