From 97d730216ed6619ab60800799dcd3733c9a8483a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:21:10 -0400 Subject: [PATCH 01/12] Update README.md --- README.md | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/README.md b/README.md index 4622bc9732..6a55575e92 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,116 @@ Seed for ML research ## Usage +To use lightning, define a model that implements these 10 functions: + +#### Model definition +| Name | Description | Input | Return | +|---|---|---|---| +| training_step | Called with a batch of data during training | data from your dataloaders | tuple: scalar, dict | +| validation_step | Called with a batch of data during validation | data from your dataloaders | tuple: scalar, dict | +| validation_end | Collate metrics from all validation steps | outputs: array where each item is the output of a validation step | dict: for logging | +| get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | +| load_model_specific | | | | + +#### Model training +| Name | Description | Input | Return | +|---|---|---|---| +| configure_optimizers | called during training setup | None | list: optimizers you want to use | +| tng_dataloader | called during training | None | pytorch dataloader | +| val_dataloader | called during validation | None | pytorch dataloader | +| test_dataloader | called during testing | None | pytorch dataloader | +| add_model_specific_args | called with args you defined in your main. This lets you tailor args for each model and keep main the same | argparse | argparse | + +#### Model Saving/Loading +| Name | Description | Input | Return | +|---|---|---|---| +| get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | +| load_model_specific | called when loading a model | checkpoint: dict you created in get_save_dict | dict: modified in whatever way you want | + + +## Example +```python +import torch.nn as nn + +class ExampleModel(RootModule): + def __init__(self): + self.l1 = nn.Linear(100, 20) + + # TRAINING + def training_step(self, data_batch): + # your dataloader decides what each batch looks like + x, y = data_batch + y_hat = self.l1(x) + loss = some_loss(y_hat) + + tqdm_dic = {'train_loss': loss} + + # must return scalar, dict for logging + return loss_val, tqdm_dic + + def validation_step(self, data_batch): + # same as training... + x, y = data_batch + y_hat = self.l1(x) + loss = some_loss(y_hat) + + # val specific + acc = calculate_acc(y_hat, y) + + tqdm_dic = {'train_loss': loss, 'val_acc': acc, 'whatever_you_want': 'a'} + return loss_val, tqdm_dic + + def validation_end(self, outputs): + total_accs = [] + + # given to you by the framework with all validation outputs. + # chance to collate + for output in outputs: + total_accs.append(output['val_acc'].item()) + + # return a dict + return {'total_acc': np.mean(total_accs)} + + # SAVING + def get_save_dict(self): + # lightning saves for you. Here's your chance to say what you want to save + checkpoint = {'state_dict': self.state_dict()} + + return checkpoint + + def load_model_specific(self, checkpoint): + # lightning loads for you. Here's your chance to say what you want to load + self.load_state_dict(checkpoint['state_dict']) + pass + + # TRAINING CONFIG + def configure_optimizers(self): + # give lightning the list of optimizers you want to use. + # lightning will call automatically + optimizer = self.choose_optimizer(self.hparams.optimizer_name, self.parameters(), {'lr': self.hparams.learning_rate}, 'optimizer') + self.optimizers = [optimizer] + return self.optimizers + + # LIGHTING WILL USE THE LOADERS YOU DEFINE HERE + @property + def tng_dataloader(self): + return pytorch_dataloader('train') + + @property + def val_dataloader(self): + return pytorch_dataloader('val') + + @property + def test_dataloader(self): + return pytorch_dataloader('test') + + # MODIFY YOUR COMMAND LINE ARGS + @staticmethod + def add_model_specific_args(parent_parser): + parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) + parser.add_argument('--out_features', default=20) + return parser +``` ### Add new model 1. Create a new model under /models. From 985af56892b623a4e25cd89bc1efc53e82bc6585 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:22:38 -0400 Subject: [PATCH 02/12] Update README.md --- README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6a55575e92..03cee59505 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,7 @@ To use lightning, define a model that implements these 10 functions: | training_step | Called with a batch of data during training | data from your dataloaders | tuple: scalar, dict | | validation_step | Called with a batch of data during validation | data from your dataloaders | tuple: scalar, dict | | validation_end | Collate metrics from all validation steps | outputs: array where each item is the output of a validation step | dict: for logging | -| get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | -| load_model_specific | | | | +| get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | #### Model training | Name | Description | Input | Return | @@ -37,6 +36,7 @@ class ExampleModel(RootModule): def __init__(self): self.l1 = nn.Linear(100, 20) + # --------------- # TRAINING def training_step(self, data_batch): # your dataloader decides what each batch looks like @@ -71,7 +71,8 @@ class ExampleModel(RootModule): # return a dict return {'total_acc': np.mean(total_accs)} - + + # --------------- # SAVING def get_save_dict(self): # lightning saves for you. Here's your chance to say what you want to save @@ -84,6 +85,8 @@ class ExampleModel(RootModule): self.load_state_dict(checkpoint['state_dict']) pass + + # --------------- # TRAINING CONFIG def configure_optimizers(self): # give lightning the list of optimizers you want to use. @@ -104,7 +107,8 @@ class ExampleModel(RootModule): @property def test_dataloader(self): return pytorch_dataloader('test') - + + # --------------- # MODIFY YOUR COMMAND LINE ARGS @staticmethod def add_model_specific_args(parent_parser): From 649f4d5f4dbfb8e7e7278935774c3ad920e9d2d6 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:24:46 -0400 Subject: [PATCH 03/12] Update README.md --- README.md | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 03cee59505..c0fe260869 100644 --- a/README.md +++ b/README.md @@ -34,38 +34,27 @@ import torch.nn as nn class ExampleModel(RootModule): def __init__(self): - self.l1 = nn.Linear(100, 20) + # define model # --------------- # TRAINING def training_step(self, data_batch): - # your dataloader decides what each batch looks like x, y = data_batch y_hat = self.l1(x) loss = some_loss(y_hat) - tqdm_dic = {'train_loss': loss} - - # must return scalar, dict for logging - return loss_val, tqdm_dic + return loss_val, {'train_loss': loss} def validation_step(self, data_batch): - # same as training... x, y = data_batch y_hat = self.l1(x) loss = some_loss(y_hat) - # val specific - acc = calculate_acc(y_hat, y) - - tqdm_dic = {'train_loss': loss, 'val_acc': acc, 'whatever_you_want': 'a'} - return loss_val, tqdm_dic + return loss_val, {'val_loss': loss} def validation_end(self, outputs): total_accs = [] - # given to you by the framework with all validation outputs. - # chance to collate for output in outputs: total_accs.append(output['val_acc'].item()) @@ -83,17 +72,14 @@ class ExampleModel(RootModule): def load_model_specific(self, checkpoint): # lightning loads for you. Here's your chance to say what you want to load self.load_state_dict(checkpoint['state_dict']) - pass - # --------------- # TRAINING CONFIG def configure_optimizers(self): # give lightning the list of optimizers you want to use. # lightning will call automatically - optimizer = self.choose_optimizer(self.hparams.optimizer_name, self.parameters(), {'lr': self.hparams.learning_rate}, 'optimizer') - self.optimizers = [optimizer] - return self.optimizers + optimizer = self.choose_optimizer('adam', self.parameters(), {'lr': self.hparams.learning_rate}, 'optimizer') + return [optimizer] # LIGHTING WILL USE THE LOADERS YOU DEFINE HERE @property From 24255a9eabbf64d1bb3616a278c7763100b358c3 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:25:43 -0400 Subject: [PATCH 04/12] Update README.md --- README.md | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index c0fe260869..29d4d4fcb7 100644 --- a/README.md +++ b/README.md @@ -2,33 +2,8 @@ Seed for ML research ## Usage -To use lightning, define a model that implements these 10 functions: +To use lightning, define a model that implements these 10 functions: -#### Model definition -| Name | Description | Input | Return | -|---|---|---|---| -| training_step | Called with a batch of data during training | data from your dataloaders | tuple: scalar, dict | -| validation_step | Called with a batch of data during validation | data from your dataloaders | tuple: scalar, dict | -| validation_end | Collate metrics from all validation steps | outputs: array where each item is the output of a validation step | dict: for logging | -| get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | - -#### Model training -| Name | Description | Input | Return | -|---|---|---|---| -| configure_optimizers | called during training setup | None | list: optimizers you want to use | -| tng_dataloader | called during training | None | pytorch dataloader | -| val_dataloader | called during validation | None | pytorch dataloader | -| test_dataloader | called during testing | None | pytorch dataloader | -| add_model_specific_args | called with args you defined in your main. This lets you tailor args for each model and keep main the same | argparse | argparse | - -#### Model Saving/Loading -| Name | Description | Input | Return | -|---|---|---|---| -| get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | -| load_model_specific | called when loading a model | checkpoint: dict you created in get_save_dict | dict: modified in whatever way you want | - - -## Example ```python import torch.nn as nn @@ -102,7 +77,32 @@ class ExampleModel(RootModule): parser.add_argument('--out_features', default=20) return parser ``` +### Details +#### Model definition +| Name | Description | Input | Return | +|---|---|---|---| +| training_step | Called with a batch of data during training | data from your dataloaders | tuple: scalar, dict | +| validation_step | Called with a batch of data during validation | data from your dataloaders | tuple: scalar, dict | +| validation_end | Collate metrics from all validation steps | outputs: array where each item is the output of a validation step | dict: for logging | +| get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | + +#### Model training +| Name | Description | Input | Return | +|---|---|---|---| +| configure_optimizers | called during training setup | None | list: optimizers you want to use | +| tng_dataloader | called during training | None | pytorch dataloader | +| val_dataloader | called during validation | None | pytorch dataloader | +| test_dataloader | called during testing | None | pytorch dataloader | +| add_model_specific_args | called with args you defined in your main. This lets you tailor args for each model and keep main the same | argparse | argparse | + +#### Model Saving/Loading +| Name | Description | Input | Return | +|---|---|---|---| +| get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | +| load_model_specific | called when loading a model | checkpoint: dict you created in get_save_dict | dict: modified in whatever way you want | + + ### Add new model 1. Create a new model under /models. 2. Add model name to trainer_main From 97f82912420b1a44f5e43d8ebbcae79e42a9f1e4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:26:11 -0400 Subject: [PATCH 05/12] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 29d4d4fcb7..51027b8962 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,6 @@ class ExampleModel(RootModule): optimizer = self.choose_optimizer('adam', self.parameters(), {'lr': self.hparams.learning_rate}, 'optimizer') return [optimizer] - # LIGHTING WILL USE THE LOADERS YOU DEFINE HERE @property def tng_dataloader(self): return pytorch_dataloader('train') From 33841681115c948bd3aa1648eb91a3aedeb51fe6 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:27:11 -0400 Subject: [PATCH 06/12] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 51027b8962..0e31bd72b6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Pytorch-lightning -Seed for ML research +The Keras for ML-research in Pytorch. Simple to use, but not as abstracted out. ## Usage To use lightning, define a model that implements these 10 functions: From 959965d587cbf29febd2e1eeb788ff4a2ada5fee Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:32:37 -0400 Subject: [PATCH 07/12] Update README.md --- README.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0e31bd72b6..de99d41e42 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,61 @@ The Keras for ML-research in Pytorch. Simple to use, but not as abstracted out. ## Usage -To use lightning, define a model that implements these 10 functions: +To use lightning, first define a trainer function: +```python +# trainer.py + +from pytorch_lightning.models.trainer import Trainer +from pytorch_lightning.utils.pt_callbacks import EarlyStopping, ModelCheckpoint +from my_project import My_Model + +# -------------------- +# CALLBACKS +early_stop = EarlyStopping( + monitor='val_loss', + patience=3, + verbose=True, + mode='min' +) + +model_save_path = 'PATH/TO/SAVE' +checkpoint = ModelCheckpoint( + filepath=model_save_path, + save_function=None, + save_best_only=True, + verbose=True, + monitor='val_acc', + mode='min' +) + +# configure trainer +trainer = Trainer( + on_gpu=False, + enable_tqdm=True, + overfit_pct=None, + track_grad_norm=-1, + fast_dev_run=False, + check_val_every_n_epoch=1, + accumulate_grad_batches=2, + process_position=0, + current_gpu_name=0, + checkpoint_callback=checkpoint, + early_stop_callback=early_stop, + enable_early_stop=True, + max_nb_epochs=12, + min_nb_epochs=2, + train_percent_check=1.0, + val_percent_check=0.5, + test_percent_check=0.5, + val_check_interval=0.95, + log_save_interval=0.95, + add_log_row_interval=20, + lr_scheduler_milestones=None +) +``` + +next define a model that implements these 10 functions: ```python import torch.nn as nn From 86e72938fd2bdadd76f8bb0d5dafd243afd7c995 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:33:39 -0400 Subject: [PATCH 08/12] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index de99d41e42..8afd823ce7 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,11 @@ trainer = Trainer( log_save_interval=0.95, add_log_row_interval=20, lr_scheduler_milestones=None -) +) + +# init model +model = My_Model() +trainer.fit(model) ``` next define a model that implements these 10 functions: From d8640764d90c56cd21ce00cb3b93814b6d4929e4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:33:49 -0400 Subject: [PATCH 09/12] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8afd823ce7..783fd34256 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ trainer = Trainer( lr_scheduler_milestones=None ) -# init model +# init model and train model = My_Model() trainer.fit(model) ``` From 01efa4d78122931a8b00bdb9aadc63b210ba37de Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:38:53 -0400 Subject: [PATCH 10/12] Update README.md --- README.md | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 783fd34256..cb463dd11c 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,17 @@ To use lightning, first define a trainer function: from pytorch_lightning.models.trainer import Trainer from pytorch_lightning.utils.pt_callbacks import EarlyStopping, ModelCheckpoint from my_project import My_Model +from test_tube import HyperOptArgumentParser, Experiment, SlurmCluster + +# -------------- +# TEST TUBE INIT +exp = Experiment( + name='my_exp', + debug=True, + save_dir='/some/path', + autosave=False, + description='my desc' +) # -------------------- # CALLBACKS @@ -32,27 +43,10 @@ checkpoint = ModelCheckpoint( # configure trainer trainer = Trainer( - on_gpu=False, - enable_tqdm=True, - overfit_pct=None, - track_grad_norm=-1, - fast_dev_run=False, - check_val_every_n_epoch=1, - accumulate_grad_batches=2, - process_position=0, - current_gpu_name=0, + experiment=experiment, + cluster=cluster, checkpoint_callback=checkpoint, - early_stop_callback=early_stop, - enable_early_stop=True, - max_nb_epochs=12, - min_nb_epochs=2, - train_percent_check=1.0, - val_percent_check=0.5, - test_percent_check=0.5, - val_check_interval=0.95, - log_save_interval=0.95, - add_log_row_interval=20, - lr_scheduler_milestones=None + early_stop_callback=early_stop ) # init model and train From a7406bb75228d3fb64591977174e6f3b83a7d3b2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:41:24 -0400 Subject: [PATCH 11/12] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cb463dd11c..259f63cae6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Pytorch-lightning -The Keras for ML-research in Pytorch. Simple to use, but not as abstracted out. +The Keras for ML-researchers in PyTorch. ## Usage To use lightning, first define a trainer function: From 49de7499458162e4ff0f3f3089dcd4968b6b09de Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 30 Mar 2019 21:42:33 -0400 Subject: [PATCH 12/12] Update README.md --- README.md | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 259f63cae6..9d6e01eb97 100644 --- a/README.md +++ b/README.md @@ -152,26 +152,9 @@ class ExampleModel(RootModule): | get_save_dict | called when your model needs to be saved (checkpoints, hpc save, etc...) | None | dict to be saved | | load_model_specific | called when loading a model | checkpoint: dict you created in get_save_dict | dict: modified in whatever way you want | - -### Add new model -1. Create a new model under /models. -2. Add model name to trainer_main -```python -AVAILABLE_MODELS = { - 'model_1': ExampleModel1 -} -``` +## Optional model hooks. +Add these to the model whenever you want to configure training behavior. -### Model methods that can be implemented - -| Method | Purpose | Input | Output | Required | -|---|---|---|---|---| -| forward() | Forward pass | model_in tuple with your data | model_out tuple to be passed to loss | Y | -| loss() | calculate model loss | model_out tuple from forward() | A scalar | Y | -| check_performance() | run a full loop through val data to check for metrics | dataloader, nb_tests | metrics tuple to be tracked | Y | -| tng_dataloader | Computed option, used to feed tng data | - | Pytorch DataLoader subclass | Y | -| val_dataloader | Computed option, used to feed tng data | - | Pytorch DataLoader subclass | Y | -| test_dataloader | Computed option, used to feed tng data | - | Pytorch DataLoader subclass | Y | ### Model lifecycle hooks Use these hooks to customize functionality