From 532604f0561019f15b89c0d82043bc297218f9ce Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 27 Jul 2019 14:26:08 -0400 Subject: [PATCH 1/5] Update README.md --- README.md | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 85b27c204d..315de45f83 100644 --- a/README.md +++ b/README.md @@ -40,21 +40,24 @@ With lightning, you guarantee those parts of your code work so you can focus on To use lightning do 2 things: 1. [Define a LightningModel](https://williamfalcon.github.io/pytorch-lightning/LightningModule/RequiredTrainerInterface/) ```python -import pytorch_lightning as ptl +import os import torch from torch.nn import functional as F from torch.utils.data import DataLoader from torchvision.datasets import MNIST +import torchvision.transforms as transforms + +import pytorch_lightning as ptl class CoolModel(ptl.LightningModule): - def __init(self): + def __init__(self): super(CoolModel, self).__init__() # not the best model... self.l1 = torch.nn.Linear(28 * 28, 10) def forward(self, x): - return torch.relu(self.l1(x)) + return torch.relu(self.l1(x.view(x.size(0), -1))) def my_loss(self, y_hat, y): return F.cross_entropy(y_hat, y) @@ -62,7 +65,7 @@ class CoolModel(ptl.LightningModule): def training_step(self, batch, batch_nb): x, y = batch y_hat = self.forward(x) - return {'tng_loss': self.my_loss(y_hat, y)} + return {'loss': self.my_loss(y_hat, y)} def validation_step(self, batch, batch_nb): x, y = batch @@ -70,23 +73,25 @@ class CoolModel(ptl.LightningModule): return {'val_loss': self.my_loss(y_hat, y)} def validation_end(self, outputs): - avg_loss = torch.stack([x for x in outputs['val_loss']]).mean() - return avg_loss + avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() + return {'avg_val_loss': avg_loss} def configure_optimizers(self): - return [torch.optim.Adam(self.parameters(), lr=0.02)] + optim = torch.optim.Adam(self.parameters(), lr=0.02) + self.optimizers = [optim] + return self.optimizers @ptl.data_loader def tng_dataloader(self): - return DataLoader(MNIST('path/to/save', train=True), batch_size=32) + return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=32) @ptl.data_loader def val_dataloader(self): - return DataLoader(MNIST('path/to/save', train=False), batch_size=32) + return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=32) @ptl.data_loader def test_dataloader(self): - return DataLoader(MNIST('path/to/save', train=False), batch_size=32) + return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=32) ``` 2. Fit with a [trainer](https://williamfalcon.github.io/pytorch-lightning/Trainer/) @@ -97,13 +102,15 @@ from test_tube import Experiment model = CoolModel() # fit on 32 gpus across 4 nodes -exp = Experiment(save_dir='some/dir') -trainer = Trainer(experiment=exp, nb_gpu_nodes=4, gpus=[0,1,2,3,4,5,6,7]) +model = CoolModel() +exp = Experiment(save_dir=os.getcwd()) +trainer = Trainer(experiment=exp, max_nb_epochs=1) +# train (1 epoch only here for demo) trainer.fit(model) -# see all experiment metrics here -# tensorboard --log_dir some/dir +# view tensorflow logs +print(f'View tensorboard logs by running\ntensorboard --logdir {os.getcwd()}') ``` @@ -305,4 +312,4 @@ python multi_node_cluster_template.py --nb_gpu_nodes 4 --gpus '0,1,2,3,4,5,6,7' If you can't wait for the next release, install the most up to date code with: ```bash pip install git+https://github.com/williamFalcon/pytorch-lightning.git@master --upgrade -``` \ No newline at end of file +``` From d6b5f37a7b553ab4b1254dad7ecae1eefb4a6cd6 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 27 Jul 2019 14:28:44 -0400 Subject: [PATCH 2/5] Update README.md --- README.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 315de45f83..d2488324c9 100644 --- a/README.md +++ b/README.md @@ -99,13 +99,18 @@ class CoolModel(ptl.LightningModule): from pytorch_lightning import Trainer from test_tube import Experiment -model = CoolModel() - -# fit on 32 gpus across 4 nodes model = CoolModel() exp = Experiment(save_dir=os.getcwd()) + +# train on cpu trainer = Trainer(experiment=exp, max_nb_epochs=1) +# train on 4 gpus +# trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=[0, 1, 2, 3]) + +# train on 32 gpus across 4 nodes (make sure to submit appropriate SLURM job) +# trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=[0, 1, 2, 3, 4, 5, 6, 7], nb_gpu_nodes=4) + # train (1 epoch only here for demo) trainer.fit(model) From 90b14977a494e8fe971027d2c8381a68a2ed6d7b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 27 Jul 2019 14:31:22 -0400 Subject: [PATCH 3/5] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d2488324c9..9b3bb98502 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,8 @@ trainer = Trainer(experiment=exp, max_nb_epochs=1) trainer.fit(model) # view tensorflow logs -print(f'View tensorboard logs by running\ntensorboard --logdir {os.getcwd()}') +print(f'View tensorboard logs by running\ntensorboard --logdir {os.getcwd()}') +print('and going to http://localhost:6006 on your browser') ``` From 66188209b545a8a98dc2d49a916eb4b11bf6a29f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 27 Jul 2019 14:33:48 -0400 Subject: [PATCH 4/5] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9b3bb98502..40c2f0da84 100644 --- a/README.md +++ b/README.md @@ -102,8 +102,8 @@ from test_tube import Experiment model = CoolModel() exp = Experiment(save_dir=os.getcwd()) -# train on cpu -trainer = Trainer(experiment=exp, max_nb_epochs=1) +# train on cpu using only 10% of the data (for demo purposes) +trainer = Trainer(experiment=exp, max_nb_epochs=1, train_percent_check=0.1) # train on 4 gpus # trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=[0, 1, 2, 3]) From a48cccdc68cae5a4ba3fe465c936b812770c3ad8 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 27 Jul 2019 14:38:33 -0400 Subject: [PATCH 5/5] Update README.md --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 40c2f0da84..fadb2ec1ba 100644 --- a/README.md +++ b/README.md @@ -77,9 +77,7 @@ class CoolModel(ptl.LightningModule): return {'avg_val_loss': avg_loss} def configure_optimizers(self): - optim = torch.optim.Adam(self.parameters(), lr=0.02) - self.optimizers = [optim] - return self.optimizers + return [torch.optim.Adam(self.parameters(), lr=0.02)] @ptl.data_loader def tng_dataloader(self):