From 9fc01e3fd32c63a42c4a5848a576a0a0b7ae24a1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 5 Oct 2019 14:13:32 -0400 Subject: [PATCH] cleaned up demos --- examples/__init__.py | 2 +- .../__init__.py | 0 .../cpu_template.py} | 32 ++----- examples/basic_examples/gpu_template.py | 78 +++++++++++++++ .../lightning_module_template.py | 0 .../__init__.py | 0 .../{templates => domain_templates}/gan.py | 0 .../multi_node_examples/README.md | 0 .../__init__.py | 0 .../multi_node_examples/demo_script.sh | 0 .../minimal_multi_node_demo.py | 0 .../minimal_multi_node_demo_script.sh | 0 .../multi_node_cluster_auto_slurm.py | 2 +- .../multi_node_own_slurm_script.py | 2 +- .../single_cpu_template.py | 42 --------- .../single_gpu_node_ddp_template.py | 69 -------------- .../single_gpu_node_dp_template.py | 94 ------------------- .../trainer_cpu_template.py | 53 ----------- 18 files changed, 91 insertions(+), 283 deletions(-) rename examples/{new_project_templates => basic_examples}/__init__.py (100%) rename examples/{new_project_templates/single_gpu_node_16bit_template.py => basic_examples/cpu_template.py} (51%) create mode 100644 examples/basic_examples/gpu_template.py rename examples/{new_project_templates => basic_examples}/lightning_module_template.py (100%) rename examples/{new_project_templates/multi_node_examples => domain_templates}/__init__.py (100%) rename examples/{templates => domain_templates}/gan.py (100%) rename examples/{new_project_templates => }/multi_node_examples/README.md (100%) rename examples/{templates => multi_node_examples}/__init__.py (100%) rename examples/{new_project_templates => }/multi_node_examples/demo_script.sh (100%) rename examples/{new_project_templates => }/multi_node_examples/minimal_multi_node_demo.py (100%) rename examples/{new_project_templates => }/multi_node_examples/minimal_multi_node_demo_script.sh (100%) rename examples/{new_project_templates => }/multi_node_examples/multi_node_cluster_auto_slurm.py (97%) rename examples/{new_project_templates => }/multi_node_examples/multi_node_own_slurm_script.py (94%) delete mode 100644 examples/new_project_templates/single_cpu_template.py delete mode 100644 examples/new_project_templates/single_gpu_node_ddp_template.py delete mode 100644 examples/new_project_templates/single_gpu_node_dp_template.py delete mode 100644 examples/new_project_templates/trainer_cpu_template.py diff --git a/examples/__init__.py b/examples/__init__.py index 0d456dacb6..71f9d6f6fb 100644 --- a/examples/__init__.py +++ b/examples/__init__.py @@ -1,4 +1,4 @@ -from .new_project_templates.lightning_module_template import LightningTemplateModel +from .basic_examples.lightning_module_template import LightningTemplateModel __all__ = [ 'LightningTemplateModel' diff --git a/examples/new_project_templates/__init__.py b/examples/basic_examples/__init__.py similarity index 100% rename from examples/new_project_templates/__init__.py rename to examples/basic_examples/__init__.py diff --git a/examples/new_project_templates/single_gpu_node_16bit_template.py b/examples/basic_examples/cpu_template.py similarity index 51% rename from examples/new_project_templates/single_gpu_node_16bit_template.py rename to examples/basic_examples/cpu_template.py index cf51b2c7f2..51a36f87c3 100644 --- a/examples/new_project_templates/single_gpu_node_16bit_template.py +++ b/examples/basic_examples/cpu_template.py @@ -1,15 +1,13 @@ """ -16-bit single node, CPU example +Runs a model on a single node across N-gpus. """ import os import numpy as np import torch -from test_tube import HyperOptArgumentParser, Experiment +from argparse import ArgumentParser from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint - -from examples.new_project_templates.lightning_module_template import LightningTemplateModel +from examples.basic_examples.lightning_module_template import LightningTemplateModel SEED = 2334 torch.manual_seed(SEED) @@ -20,7 +18,6 @@ def main(hparams): """ Main training routine specific for this project :param hparams: - :return: """ # ------------------------ # 1 INIT LIGHTNING MODEL @@ -30,11 +27,7 @@ def main(hparams): # ------------------------ # 2 INIT TRAINER # ------------------------ - trainer = Trainer( - gpus=hparams.gpus, - use_amp=True, - distributed_backend='dp' - ) + trainer = Trainer() # ------------------------ # 3 START TRAINING @@ -43,19 +36,14 @@ def main(hparams): if __name__ == '__main__': - - # dirs + # ------------------------ + # TRAINING ARGUMENTS + # ------------------------ + # these are project-wide arguments root_dir = os.path.dirname(os.path.realpath(__file__)) + parent_parser = ArgumentParser(add_help=False) - # although we user hyperOptParser, we are using it only as argparse right now - parent_parser = HyperOptArgumentParser(strategy='grid_search', add_help=False) - - # gpu args - parent_parser.add_argument('--gpus', type=str, default='-1', - help='how many gpus to use in the node.' - 'value -1 uses all the gpus on the node') - - # allow model to overwrite or extend args + # each LightningModule defines arguments relevant to it parser = LightningTemplateModel.add_model_specific_args(parent_parser, root_dir) hyperparams = parser.parse_args() diff --git a/examples/basic_examples/gpu_template.py b/examples/basic_examples/gpu_template.py new file mode 100644 index 0000000000..b3f5360934 --- /dev/null +++ b/examples/basic_examples/gpu_template.py @@ -0,0 +1,78 @@ +""" +Runs a model on a single node across N-gpus. +""" +import os +import numpy as np +import torch + +from argparse import ArgumentParser +from pytorch_lightning import Trainer +from examples.basic_examples.lightning_module_template import LightningTemplateModel + +SEED = 2334 +torch.manual_seed(SEED) +np.random.seed(SEED) + + +def main(hparams): + """ + Main training routine specific for this project + :param hparams: + """ + # ------------------------ + # 1 INIT LIGHTNING MODEL + # ------------------------ + model = LightningTemplateModel(hparams) + + # ------------------------ + # 2 INIT TRAINER + # ------------------------ + trainer = Trainer( + gpus=hparams.gpus, + distributed_backend=hparams.distributed_backend, + use_amp=hparams.use_16bit + ) + + # ------------------------ + # 3 START TRAINING + # ------------------------ + trainer.fit(model) + + +if __name__ == '__main__': + # ------------------------ + # TRAINING ARGUMENTS + # ------------------------ + # these are project-wide arguments + + root_dir = os.path.dirname(os.path.realpath(__file__)) + parent_parser = ArgumentParser(add_help=False) + + # gpu args + parent_parser.add_argument( + '--gpus', + type=str, + default='-1', + help='any integer (number of GPUs to use) or -1 for all' + ) + parent_parser.add_argument( + '--distributed_backend', + type=str, + default=None, + help='supports three options dp, ddp, ddp2' + ) + parent_parser.add_argument( + '--use_16bit', + dest='use_16bit', + action='store_true', + help='if true uses 16 bit precision' + ) + + # each LightningModule defines arguments relevant to it + parser = LightningTemplateModel.add_model_specific_args(parent_parser, root_dir) + hyperparams = parser.parse_args() + + # --------------------- + # RUN TRAINING + # --------------------- + main(hyperparams) diff --git a/examples/new_project_templates/lightning_module_template.py b/examples/basic_examples/lightning_module_template.py similarity index 100% rename from examples/new_project_templates/lightning_module_template.py rename to examples/basic_examples/lightning_module_template.py diff --git a/examples/new_project_templates/multi_node_examples/__init__.py b/examples/domain_templates/__init__.py similarity index 100% rename from examples/new_project_templates/multi_node_examples/__init__.py rename to examples/domain_templates/__init__.py diff --git a/examples/templates/gan.py b/examples/domain_templates/gan.py similarity index 100% rename from examples/templates/gan.py rename to examples/domain_templates/gan.py diff --git a/examples/new_project_templates/multi_node_examples/README.md b/examples/multi_node_examples/README.md similarity index 100% rename from examples/new_project_templates/multi_node_examples/README.md rename to examples/multi_node_examples/README.md diff --git a/examples/templates/__init__.py b/examples/multi_node_examples/__init__.py similarity index 100% rename from examples/templates/__init__.py rename to examples/multi_node_examples/__init__.py diff --git a/examples/new_project_templates/multi_node_examples/demo_script.sh b/examples/multi_node_examples/demo_script.sh similarity index 100% rename from examples/new_project_templates/multi_node_examples/demo_script.sh rename to examples/multi_node_examples/demo_script.sh diff --git a/examples/new_project_templates/multi_node_examples/minimal_multi_node_demo.py b/examples/multi_node_examples/minimal_multi_node_demo.py similarity index 100% rename from examples/new_project_templates/multi_node_examples/minimal_multi_node_demo.py rename to examples/multi_node_examples/minimal_multi_node_demo.py diff --git a/examples/new_project_templates/multi_node_examples/minimal_multi_node_demo_script.sh b/examples/multi_node_examples/minimal_multi_node_demo_script.sh similarity index 100% rename from examples/new_project_templates/multi_node_examples/minimal_multi_node_demo_script.sh rename to examples/multi_node_examples/minimal_multi_node_demo_script.sh diff --git a/examples/new_project_templates/multi_node_examples/multi_node_cluster_auto_slurm.py b/examples/multi_node_examples/multi_node_cluster_auto_slurm.py similarity index 97% rename from examples/new_project_templates/multi_node_examples/multi_node_cluster_auto_slurm.py rename to examples/multi_node_examples/multi_node_cluster_auto_slurm.py index 90b759227b..352547d710 100644 --- a/examples/new_project_templates/multi_node_examples/multi_node_cluster_auto_slurm.py +++ b/examples/multi_node_examples/multi_node_cluster_auto_slurm.py @@ -10,7 +10,7 @@ from test_tube import HyperOptArgumentParser, Experiment, SlurmCluster from pytorch_lightning import Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint -from examples.new_project_templates.lightning_module_template import LightningTemplateModel +from examples.basic_examples.lightning_module_template import LightningTemplateModel PORT = np.random.randint(12000, 20000, 1)[0] SEED = 2334 diff --git a/examples/new_project_templates/multi_node_examples/multi_node_own_slurm_script.py b/examples/multi_node_examples/multi_node_own_slurm_script.py similarity index 94% rename from examples/new_project_templates/multi_node_examples/multi_node_own_slurm_script.py rename to examples/multi_node_examples/multi_node_own_slurm_script.py index d8a383a90b..43662d3f61 100644 --- a/examples/new_project_templates/multi_node_examples/multi_node_own_slurm_script.py +++ b/examples/multi_node_examples/multi_node_own_slurm_script.py @@ -7,7 +7,7 @@ import torch from test_tube import HyperOptArgumentParser, Experiment from pytorch_lightning import Trainer -from examples.new_project_templates.lightning_module_template import LightningTemplateModel +from examples.basic_examples.lightning_module_template import LightningTemplateModel SEED = 2334 torch.manual_seed(SEED) diff --git a/examples/new_project_templates/single_cpu_template.py b/examples/new_project_templates/single_cpu_template.py deleted file mode 100644 index e1bf1a815c..0000000000 --- a/examples/new_project_templates/single_cpu_template.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Runs a model on a single node on CPU only.. -""" -import os -import numpy as np -import torch - -from test_tube import HyperOptArgumentParser, Experiment -from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint - -from examples.new_project_templates.lightning_module_template import LightningTemplateModel - -SEED = 2334 -torch.manual_seed(SEED) -np.random.seed(SEED) - - -def main(hparams): - """ - Main training routine specific for this project - :param hparams: - :return: - """ - # ------------------------ - # 1 INIT LIGHTNING MODEL - # ------------------------ - model = LightningTemplateModel(hparams) - - # ------------------------ - # 2 INIT TRAINER - # ------------------------ - trainer = Trainer() - - # ------------------------ - # 3 START TRAINING - # ------------------------ - trainer.fit(model) - - -if __name__ == '__main__': - main(hyperparams) diff --git a/examples/new_project_templates/single_gpu_node_ddp_template.py b/examples/new_project_templates/single_gpu_node_ddp_template.py deleted file mode 100644 index b027cc1c99..0000000000 --- a/examples/new_project_templates/single_gpu_node_ddp_template.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Runs a model on a single node across N-gpus. -""" -import os -import numpy as np -import torch - -from test_tube import HyperOptArgumentParser, Experiment -from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint - -from examples.new_project_templates.lightning_module_template import LightningTemplateModel - -SEED = 2334 -torch.manual_seed(SEED) -np.random.seed(SEED) - - -def main(hparams): - """ - Main training routine specific for this project - :param hparams: - :return: - """ - # ------------------------ - # 1 INIT LIGHTNING MODEL - # ------------------------ - model = LightningTemplateModel(hparams) - - # ------------------------ - # 2 INIT TRAINER - # ------------------------ - trainer = Trainer( - gpus=hparams.gpus, - distributed_backend=hparams.dist_backend - ) - - # ------------------------ - # 3 START TRAINING - # ------------------------ - trainer.fit(model) - - -if __name__ == '__main__': - - # dirs - root_dir = os.path.dirname(os.path.realpath(__file__)) - demo_log_dir = os.path.join(root_dir, 'pt_lightning_demo_logs') - checkpoint_dir = os.path.join(demo_log_dir, 'model_weights') - test_tube_dir = os.path.join(demo_log_dir, 'test_tube_data') - - # although we user hyperOptParser, we are using it only as argparse right now - parent_parser = HyperOptArgumentParser(strategy='grid_search', add_help=False) - - # gpu args - parent_parser.add_argument('--gpus', type=str, default='-1', - help='how many gpus to use in the node.' - ' value -1 uses all the gpus on the node') - parent_parser.add_argument('--dist_backend', type=str, default='ddp', - help='When using multiple GPUs set Trainer(distributed_backend=dp) (or ddp)') - - # allow model to overwrite or extend args - parser = LightningTemplateModel.add_model_specific_args(parent_parser, root_dir) - hyperparams = parser.parse_args() - - # --------------------- - # RUN TRAINING - # --------------------- - main(hyperparams) diff --git a/examples/new_project_templates/single_gpu_node_dp_template.py b/examples/new_project_templates/single_gpu_node_dp_template.py deleted file mode 100644 index c6941009cc..0000000000 --- a/examples/new_project_templates/single_gpu_node_dp_template.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Runs a model on a single node across N-gpus using dataParallel -""" -import os -import numpy as np -import torch - -from test_tube import HyperOptArgumentParser, Experiment -from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint - -from examples.new_project_templates.lightning_module_template import LightningTemplateModel - -SEED = 2334 -torch.manual_seed(SEED) -np.random.seed(SEED) - - -def main(hparams): - """ - Main training routine specific for this project - :param hparams: - :return: - """ - # ------------------------ - # 1 INIT LIGHTNING MODEL - # ------------------------ - print('loading model...') - model = LightningTemplateModel(hparams) - print('model built') - - # ------------------------ - # 2 INIT Logger - # ------------------------ - # init experiment - exp = Experiment( - name=hyperparams.experiment_name, - save_dir=hyperparams.test_tube_save_path, - autosave=False, - description='test demo' - ) - - exp.argparse(hparams) - exp.save() - - # ------------------------ - # 3 INIT TRAINER - # ------------------------ - trainer = Trainer( - experiment=exp, - gpus=hparams.gpus, - distributed_backend=hparams.dist_backend, - ) - - # ------------------------ - # 4 START TRAINING - # ------------------------ - trainer.fit(model) - - -if __name__ == '__main__': - - # dirs - root_dir = os.path.dirname(os.path.realpath(__file__)) - demo_log_dir = os.path.join(root_dir, 'pt_lightning_demo_logs') - checkpoint_dir = os.path.join(demo_log_dir, 'model_weights') - test_tube_dir = os.path.join(demo_log_dir, 'test_tube_data') - - # although we user hyperOptParser, we are using it only as argparse right now - parent_parser = HyperOptArgumentParser(strategy='grid_search', add_help=False) - - # gpu args - parent_parser.add_argument('--gpus', type=str, default='-1', - help='how many gpus to use in the node.' - ' value -1 uses all the gpus on the node') - parent_parser.add_argument('--dist_backend', type=str, default='dp', - help='When using multiple GPUs set Trainer(distributed_backend=dp) (or ddp)') - parent_parser.add_argument('--test_tube_save_path', type=str, default=test_tube_dir, - help='where to save logs') - parent_parser.add_argument('--model_save_path', type=str, default=checkpoint_dir, - help='where to save model') - parent_parser.add_argument('--experiment_name', type=str, default='pt_lightning_exp_a', - help='test tube exp name') - - # allow model to overwrite or extend args - parser = LightningTemplateModel.add_model_specific_args(parent_parser, root_dir) - hyperparams = parser.parse_args() - - # --------------------- - # RUN TRAINING - # --------------------- - # run on HPC cluster - print(f'RUNNING INTERACTIVE MODE ON GPUS. gpu ids: {hyperparams.gpus}') - main(hyperparams) diff --git a/examples/new_project_templates/trainer_cpu_template.py b/examples/new_project_templates/trainer_cpu_template.py deleted file mode 100644 index f361161503..0000000000 --- a/examples/new_project_templates/trainer_cpu_template.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -import sys - -from test_tube import HyperOptArgumentParser, Experiment -from pytorch_lightning import Trainer -from pytorch_lightning.utilities.arg_parse import add_default_args -from pytorch_lightning.callbacks.pt_callbacks import EarlyStopping, ModelCheckpoint - -from examples.new_project_templates.lightning_module_template import LightningTemplateModel - - -def main(hparams): - """ - Main training routine specific for this project - :param hparams: - :return: - """ - # init experiment - exp = Experiment( - name=hparams.tt_name, - debug=hparams.debug, - save_dir=hparams.tt_save_path, - version=hparams.hpc_exp_number, - autosave=False, - description=hparams.tt_description - ) - - exp.argparse(hparams) - exp.save() - - # build model - model = LightningTemplateModel(hparams) - - # configure trainer - trainer = Trainer(experiment=exp) - - # train model - trainer.fit(model) - - -if __name__ == '__main__': - - # use default args given by lightning - root_dir = os.path.split(os.path.dirname(sys.modules['__main__'].__file__))[0] - parent_parser = HyperOptArgumentParser(strategy='random_search', add_help=False) - add_default_args(parent_parser, root_dir) - - # allow model to overwrite or extend args - parser = LightningTemplateModel.add_model_specific_args(parent_parser) - hyperparams = parser.parse_args() - - # train model - main(hyperparams)