From f78223041283450c98fb34bb23bea6706247d937 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 26 Jan 2021 10:44:54 +0100 Subject: [PATCH] docs cleaning - testcode (#5595) * testcode - python * revert * simple * testcode @rst * pl * fix * pip * update * conf * conf * nn. * typo --- .circleci/config.yml | 5 +- docs/source/conf.py | 8 +- docs/source/converting.rst | 4 +- docs/source/early_stopping.rst | 2 +- docs/source/hyperparameters.rst | 14 +- docs/source/introduction_guide.rst | 16 +- docs/source/lightning_module.rst | 2 +- docs/source/metrics.rst | 19 +- docs/source/multi_gpu.rst | 2 +- docs/source/new-project.rst | 76 ++++--- docs/source/slurm.rst | 2 +- docs/source/style_guide.rst | 17 +- docs/source/trainer.rst | 8 +- docs/source/transfer_learning.rst | 2 +- docs/source/weights_loading.rst | 8 +- pytorch_lightning/core/decorators.py | 28 ++- pytorch_lightning/core/hooks.py | 105 +++++---- pytorch_lightning/core/lightning.py | 325 +++++++++++++-------------- pytorch_lightning/core/saving.py | 49 ++-- pytorch_lightning/loggers/neptune.py | 4 +- pytorch_lightning/plugins/apex.py | 15 +- 21 files changed, 355 insertions(+), 356 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dc9d220f41..296b4c3df6 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,7 +18,10 @@ references: pyenv global 3.7.3 python --version pip install -r requirements/docs.txt - cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W" + pip list + cd docs + make clean + make html --jobs 2 SPHINXOPTS="-W" checkout_ml_testing: &checkout_ml_testing run: diff --git a/docs/source/conf.py b/docs/source/conf.py index f608f960d1..aeb63cbf15 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -25,9 +25,9 @@ PATH_HERE = os.path.abspath(os.path.dirname(__file__)) PATH_ROOT = os.path.join(PATH_HERE, '..', '..') sys.path.insert(0, os.path.abspath(PATH_ROOT)) -builtins.__LIGHTNING_SETUP__ = True - SPHINX_MOCK_REQUIREMENTS = int(os.environ.get('SPHINX_MOCK_REQUIREMENTS', True)) +if SPHINX_MOCK_REQUIREMENTS: + builtins.__LIGHTNING_SETUP__ = True import pytorch_lightning # noqa: E402 @@ -360,7 +360,10 @@ doctest_global_setup = """ import importlib import os import torch +from torch import nn +import pytorch_lightning as pl +from pytorch_lightning import LightningDataModule, LightningModule, Trainer from pytorch_lightning.utilities import ( _NATIVE_AMP_AVAILABLE, _APEX_AVAILABLE, @@ -369,6 +372,5 @@ from pytorch_lightning.utilities import ( ) _TORCHVISION_AVAILABLE = importlib.util.find_spec("torchvision") is not None - """ coverage_skip_undoc_in_source = True diff --git a/docs/source/converting.rst b/docs/source/converting.rst index b292197588..6b281031e1 100644 --- a/docs/source/converting.rst +++ b/docs/source/converting.rst @@ -24,8 +24,8 @@ Move the model architecture and forward pass to your :ref:`lightning_module`. def __init__(self): super().__init__() - self.layer_1 = torch.nn.Linear(28 * 28, 128) - self.layer_2 = torch.nn.Linear(128, 10) + self.layer_1 = nn.Linear(28 * 28, 128) + self.layer_2 = nn.Linear(128, 10) def forward(self, x): x = x.view(x.size(0), -1) diff --git a/docs/source/early_stopping.rst b/docs/source/early_stopping.rst index e286c0e3e1..53bafbf116 100644 --- a/docs/source/early_stopping.rst +++ b/docs/source/early_stopping.rst @@ -49,7 +49,7 @@ To enable it: - You can customize the callbacks behaviour by changing its parameters. -.. code-block:: python +.. testcode:: early_stop_callback = EarlyStopping( monitor='val_accuracy', diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index fb27636b9b..4f8ca71af5 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -161,9 +161,9 @@ improve readability and reproducibility. def __init__(self, hparams, *args, **kwargs): super().__init__() self.hparams = hparams - self.layer_1 = torch.nn.Linear(28 * 28, self.hparams.layer_1_dim) - self.layer_2 = torch.nn.Linear(self.hparams.layer_1_dim, self.hparams.layer_2_dim) - self.layer_3 = torch.nn.Linear(self.hparams.layer_2_dim, 10) + self.layer_1 = nn.Linear(28 * 28, self.hparams.layer_1_dim) + self.layer_2 = nn.Linear(self.hparams.layer_1_dim, self.hparams.layer_2_dim) + self.layer_3 = nn.Linear(self.hparams.layer_2_dim, 10) def train_dataloader(self): return DataLoader(mnist_train, batch_size=self.hparams.batch_size) @@ -182,9 +182,9 @@ improve readability and reproducibility. super().__init__() self.save_hyperparameters(conf) - self.layer_1 = torch.nn.Linear(28 * 28, self.hparams.layer_1_dim) - self.layer_2 = torch.nn.Linear(self.hparams.layer_1_dim, self.hparams.layer_2_dim) - self.layer_3 = torch.nn.Linear(self.hparams.layer_2_dim, 10) + self.layer_1 = nn.Linear(28 * 28, self.hparams.layer_1_dim) + self.layer_2 = nn.Linear(self.hparams.layer_1_dim, self.hparams.layer_2_dim) + self.layer_3 = nn.Linear(self.hparams.layer_2_dim, 10) conf = OmegaConf.create(...) model = LitMNIST(conf) @@ -225,7 +225,7 @@ polluting the ``main.py`` file, the ``LightningModule`` lets you define argument def __init__(self, layer_1_dim, **kwargs): super().__init__() - self.layer_1 = torch.nn.Linear(28 * 28, layer_1_dim) + self.layer_1 = nn.Linear(28 * 28, layer_1_dim) @staticmethod def add_model_specific_args(parent_parser): diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst index d306c3b3bb..a2a8340b34 100644 --- a/docs/source/introduction_guide.rst +++ b/docs/source/introduction_guide.rst @@ -80,9 +80,9 @@ Let's first start with the model. In this case, we'll design a 3-layer neural ne super().__init__() # mnist images are (1, 28, 28) (channels, width, height) - self.layer_1 = torch.nn.Linear(28 * 28, 128) - self.layer_2 = torch.nn.Linear(128, 256) - self.layer_3 = torch.nn.Linear(256, 10) + self.layer_1 = nn.Linear(28 * 28, 128) + self.layer_2 = nn.Linear(128, 256) + self.layer_3 = nn.Linear(256, 10) def forward(self, x): batch_size, channels, width, height = x.size() @@ -118,7 +118,7 @@ equivalent to a pure PyTorch Module except it has added functionality. However, Now we add the training_step which has all our training loop logic -.. testcode:: python +.. testcode:: class LitMNIST(LightningModule): @@ -225,7 +225,7 @@ In this case, it's better to group the full definition of a dataset into a `Data - Val dataloader(s) - Test dataloader(s) -.. testcode:: python +.. testcode:: class MyDataModule(LightningDataModule): @@ -420,9 +420,9 @@ For clarity, we'll recall that the full LightningModule now looks like this. class LitMNIST(LightningModule): def __init__(self): super().__init__() - self.layer_1 = torch.nn.Linear(28 * 28, 128) - self.layer_2 = torch.nn.Linear(128, 256) - self.layer_3 = torch.nn.Linear(256, 10) + self.layer_1 = nn.Linear(28 * 28, 128) + self.layer_2 = nn.Linear(128, 256) + self.layer_3 = nn.Linear(256, 10) def forward(self, x): batch_size, channels, width, height = x.size() diff --git a/docs/source/lightning_module.rst b/docs/source/lightning_module.rst index b6010409d6..30842deba8 100644 --- a/docs/source/lightning_module.rst +++ b/docs/source/lightning_module.rst @@ -96,7 +96,7 @@ Here are the only required methods. ... ... def __init__(self): ... super().__init__() - ... self.l1 = torch.nn.Linear(28 * 28, 10) + ... self.l1 = nn.Linear(28 * 28, 10) ... ... def forward(self, x): ... return torch.relu(self.l1(x.view(x.size(0), -1))) diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst index ad9604e1ce..12f26b5726 100644 --- a/docs/source/metrics.rst +++ b/docs/source/metrics.rst @@ -141,9 +141,11 @@ This metrics API is independent of PyTorch Lightning. Metrics can directly be us .. testcode:: + from pytorch_lightning.metrics import Accuracy + def __init__(self): ... - metric = pl.metrics.Accuracy() + metric = Accuracy() self.train_acc = metric.clone() self.val_acc = metric.clone() self.test_acc = metric.clone() @@ -164,7 +166,6 @@ be moved to the same device as the input of the metric: .. code-block:: python - import torch from pytorch_lightning.metrics import Accuracy target = torch.tensor([1, 1, 0, 0], device=torch.device("cuda", 0)) @@ -186,13 +187,15 @@ as child modules. Instead of ``list`` use :class:`~torch.nn.ModuleList` and inst .. testcode:: + from pytorch_lightning.metrics import Accuracy + class MyModule(LightningModule): def __init__(self): ... # valid ways metrics will be identified as child modules - self.metric1 = pl.metrics.Accuracy() - self.metric2 = torch.nn.ModuleList(pl.metrics.Accuracy()) - self.metric3 = torch.nn.ModuleDict({'accuracy': Accuracy()}) + self.metric1 = Accuracy() + self.metric2 = nn.ModuleList(Accuracy()) + self.metric3 = nn.ModuleDict({'accuracy': Accuracy()}) def training_step(self, batch, batch_idx): # all metrics will be on the same device as the input batch @@ -222,7 +225,7 @@ from the base ``Metric`` class. Example implementation: -.. code-block:: python +.. testcode:: from pytorch_lightning.metrics import Metric @@ -281,8 +284,8 @@ Example: .. testoutput:: :options: +NORMALIZE_WHITESPACE - {'Accuracy': tensor(0.1250), - 'Precision': tensor(0.0667), + {'Accuracy': tensor(0.1250), + 'Precision': tensor(0.0667), 'Recall': tensor(0.1111)} Similarly it can also reduce the amount of code required to log multiple metrics diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst index 9d868406e2..b822d25d6b 100644 --- a/docs/source/multi_gpu.rst +++ b/docs/source/multi_gpu.rst @@ -698,7 +698,7 @@ This should be kept within the ``sequential_module`` variable within your ``Ligh class MyModel(LightningModule): def __init__(self): ... - self.sequential_module = torch.nn.Sequential(my_layers) + self.sequential_module = nn.Sequential(my_layers) # Split my module across 4 gpus, one layer each model = MyModel() diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 30e06f76ae..6586be4141 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -65,7 +65,8 @@ You could also use conda environments Import the following: -.. code-block:: python +.. testcode:: + :skipif: not _TORCHVISION_AVAILABLE import os import torch @@ -80,9 +81,9 @@ Import the following: Step 1: Define LightningModule ****************************** -.. code-block:: +.. testcode:: - class LitAutoEncoder(pl.LightningModule): + class LitAutoEncoder(LightningModule): def __init__(self): super().__init__() @@ -147,9 +148,9 @@ Under the hood a LightningModule is still just a :class:`torch.nn.Module` that g You can customize any part of training (such as the backward pass) by overriding any of the 20+ hooks found in :ref:`hooks` -.. code-block:: python +.. testcode:: - class LitAutoEncoder(pl.LightningModule): + class LitAutoEncoder(LightningModule): def backward(self, loss, optimizer, optimizer_idx): loss.backward() @@ -259,7 +260,7 @@ or an inner loop, you can turn off automatic optimization and fully control the First, turn off automatic optimization: -.. code-block:: python +.. testcode:: trainer = Trainer(automatic_optimization=False) @@ -310,17 +311,21 @@ Option 2: Forward ----------------- You can also add a forward method to do predictions however you want. -.. code-block:: python +.. testcode:: # ---------------------------------- # using the AE to extract embeddings # ---------------------------------- - class LitAutoEncoder(pl.LightningModule): + class LitAutoEncoder(LightningModule): + def __init__(self): + super().__init__() + self.encoder = nn.Sequential() + def forward(self, x): embedding = self.encoder(x) return embedding - autoencoder = LitAutoencoder() + autoencoder = LitAutoEncoder() autoencoder = autoencoder(torch.rand(1, 28 * 28)) @@ -329,14 +334,18 @@ You can also add a forward method to do predictions however you want. # ---------------------------------- # or using the AE to generate images # ---------------------------------- - class LitAutoEncoder(pl.LightningModule): + class LitAutoEncoder(LightningModule): + def __init__(self): + super().__init__() + self.decoder = nn.Sequential() + def forward(self): z = torch.rand(1, 3) image = self.decoder(z) image = image.view(1, 1, 28, 28) return image - autoencoder = LitAutoencoder() + autoencoder = LitAutoEncoder() image_sample = autoencoder() Option 3: Production @@ -370,15 +379,15 @@ Using CPUs/GPUs/TPUs ==================== It's trivial to use CPUs, GPUs or TPUs in Lightning. There's **NO NEED** to change your code, simply change the :class:`~pytorch_lightning.trainer.Trainer` options. -.. code-block:: python +.. testcode:: # train on CPU - trainer = pl.Trainer() + trainer = Trainer() -.. code-block:: python +.. testcode:: # train on 8 CPUs - trainer = pl.Trainer(num_processes=8) + trainer = Trainer(num_processes=8) .. code-block:: python @@ -583,7 +592,9 @@ Here's an example adding a not-so-fancy learning rate decay rule: .. testcode:: - class DecayLearningRate(pl.callbacks.Callback): + from pytorch_lightning.callbacks import Callback + + class DecayLearningRate(Callback): def __init__(self): self.old_lrs = [] @@ -605,10 +616,7 @@ Here's an example adding a not-so-fancy learning rate decay rule: param_group['lr'] = new_lr self.old_lrs[opt_idx] = new_lr_group -And pass the callback to the Trainer - -.. code-block:: python - + # And pass the callback to the Trainer decay_callback = DecayLearningRate() trainer = Trainer(callbacks=[decay_callback]) @@ -629,9 +637,9 @@ LightningDataModules DataLoaders and data processing code tends to end up scattered around. Make your data code reusable by organizing it into a :class:`~pytorch_lightning.core.datamodule.LightningDataModule`. -.. code-block:: python +.. testcode:: - class MNISTDataModule(pl.LightningDataModule): + class MNISTDataModule(LightningDataModule): def __init__(self, batch_size=32): super().__init__() @@ -679,7 +687,7 @@ tokenizing, processing etc. Now you can simply pass your :class:`~pytorch_lightning.core.datamodule.LightningDataModule` to the :class:`~pytorch_lightning.trainer.Trainer`: -.. code-block:: +.. code-block:: python # init model model = LitModel() @@ -702,33 +710,33 @@ Debugging ========= Lightning has many tools for debugging. Here is an example of just a few of them: -.. code-block:: python +.. testcode:: # use only 10 train batches and 3 val batches - trainer = pl.Trainer(limit_train_batches=10, limit_val_batches=3) + trainer = Trainer(limit_train_batches=10, limit_val_batches=3) -.. code-block:: python +.. testcode:: # Automatically overfit the sane batch of your model for a sanity test - trainer = pl.Trainer(overfit_batches=1) + trainer = Trainer(overfit_batches=1) -.. code-block:: python +.. testcode:: # unit test all the code- hits every line of your code once to see if you have bugs, # instead of waiting hours to crash on validation - trainer = pl.Trainer(fast_dev_run=True) + trainer = Trainer(fast_dev_run=True) -.. code-block:: python +.. testcode:: # train only 20% of an epoch - trainer = pl.Trainer(limit_train_batches=0.2) + trainer = Trainer(limit_train_batches=0.2) -.. code-block:: python +.. testcode:: # run validation every 25% of a training epoch - trainer = pl.Trainer(val_check_interval=0.25) + trainer = Trainer(val_check_interval=0.25) -.. code-block:: python +.. testcode:: # Profile your code to find speed/memory bottlenecks Trainer(profiler=True) diff --git a/docs/source/slurm.rst b/docs/source/slurm.rst index be40810c3f..d9cb508df1 100644 --- a/docs/source/slurm.rst +++ b/docs/source/slurm.rst @@ -34,7 +34,7 @@ To train a model using multiple nodes, do the following: def main(hparams): model = LightningTemplateModel(hparams) - trainer = pl.Trainer( + trainer = Trainer( gpus=8, num_nodes=4, accelerator='ddp' diff --git a/docs/source/style_guide.rst b/docs/source/style_guide.rst index c6e06395f8..54611a1d48 100644 --- a/docs/source/style_guide.rst +++ b/docs/source/style_guide.rst @@ -46,10 +46,10 @@ Here's a LightningModule that defines a model: Here's a lightningModule that defines a system: -.. code-block:: python +.. testcode:: - class LitModel(pl.LightningModule): - def __init__(self, encoder: nn.Module = None, decoder: nn.Module = None) + class LitModel(LightningModule): + def __init__(self, encoder: nn.Module = None, decoder: nn.Module = None): super().__init__() self.encoder = encoder self.decoder = decoder @@ -74,9 +74,9 @@ sensible defaults in the init so that the user doesn't have to guess. Here's an example where a user will have to go hunt through files to figure out how to init this LightningModule. -.. code-block:: python +.. testcode:: - class LitModel(pl.LightningModule): + class LitModel(LightningModule): def __init__(self, params): self.lr = params.lr self.coef_x = params.coef_x @@ -85,10 +85,11 @@ Models defined as such leave you with many questions; what is coef_x? is it a st Instead, be explicit in your init -.. code-block:: python +.. testcode:: - class LitModel(pl.LightningModule): - def __init__(self, encoder: nn.Module, coeff_x: float = 0.2, lr: float = 1e-3) + class LitModel(LightningModule): + def __init__(self, encoder: nn.Module, coeff_x: float = 0.2, lr: float = 1e-3): + ... Now the user doesn't have to guess. Instead they know the value type and the model has a sensible default where the user can see the value immediately. diff --git a/docs/source/trainer.rst b/docs/source/trainer.rst index c2c972b730..ecbe241f9f 100644 --- a/docs/source/trainer.rst +++ b/docs/source/trainer.rst @@ -1501,10 +1501,10 @@ override :meth:`pytorch_lightning.core.LightningModule.tbptt_split_batch`: .. testcode:: - class LitMNIST(LightningModule): - def tbptt_split_batch(self, batch, split_size): - # do your own splitting on the batch - return splits + class LitMNIST(LightningModule): + def tbptt_split_batch(self, batch, split_size): + # do your own splitting on the batch + return splits val_check_interval ^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/transfer_learning.rst b/docs/source/transfer_learning.rst index 5e885dbf3e..e35220764c 100644 --- a/docs/source/transfer_learning.rst +++ b/docs/source/transfer_learning.rst @@ -58,7 +58,7 @@ Example: Imagenet (computer Vision) backbone = models.resnet50(pretrained=True) num_filters = backbone.fc.in_features layers = list(backbone.children())[:-1] - self.feature_extractor = torch.nn.Sequential(*layers) + self.feature_extractor = nn.Sequential(*layers) # use the pretrained model to classify cifar-10 (10 image classes) num_target_classes = 10 diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index f22e355a09..77570260fe 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -48,11 +48,11 @@ You can customize the checkpointing behavior to monitor any quantity of your tra 3. Initializing the :class:`~pytorch_lightning.callbacks.ModelCheckpoint` callback, and set `monitor` to be the key of your quantity. 4. Pass the callback to the `callbacks` :class:`~pytorch_lightning.trainer.Trainer` flag. -.. code-block:: python +.. testcode:: from pytorch_lightning.callbacks import ModelCheckpoint - class LitAutoEncoder(pl.LightningModule): + class LitAutoEncoder(LightningModule): def validation_step(self, batch, batch_idx): x, y = batch y_hat = self.backbone(x) @@ -71,11 +71,11 @@ You can customize the checkpointing behavior to monitor any quantity of your tra You can also control more advanced options, like `save_top_k`, to save the best k models and the `mode` of the monitored quantity (min/max), `save_weights_only` or `period` to set the interval of epochs between checkpoints, to avoid slowdowns. -.. code-block:: python +.. testcode:: from pytorch_lightning.callbacks import ModelCheckpoint - class LitAutoEncoder(pl.LightningModule): + class LitAutoEncoder(LightningModule): def validation_step(self, batch, batch_idx): x, y = batch y_hat = self.backbone(x) diff --git a/pytorch_lightning/core/decorators.py b/pytorch_lightning/core/decorators.py index 938db9cc20..47643c6f32 100644 --- a/pytorch_lightning/core/decorators.py +++ b/pytorch_lightning/core/decorators.py @@ -32,26 +32,24 @@ def auto_move_data(fn: Callable) -> Callable: fn: A LightningModule method for which the arguments should be moved to the device the parameters are on. - Example: + Example:: - .. code-block:: python + # directly in the source code + class LitModel(LightningModule): - # directly in the source code - class LitModel(LightningModule): + @auto_move_data + def forward(self, x): + return x - @auto_move_data - def forward(self, x): - return x + # or outside + LitModel.forward = auto_move_data(LitModel.forward) - # or outside - LitModel.forward = auto_move_data(LitModel.forward) + model = LitModel() + model = model.to('cuda') + model(torch.zeros(1, 3)) - model = LitModel() - model = model.to('cuda') - model(torch.zeros(1, 3)) - - # input gets moved to device - # tensor([[0., 0., 0.]], device='cuda:0') + # input gets moved to device + # tensor([[0., 0., 0.]], device='cuda:0') """ @wraps(fn) diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py index f27c185138..e6e29ce9ea 100644 --- a/pytorch_lightning/core/hooks.py +++ b/pytorch_lightning/core/hooks.py @@ -391,20 +391,19 @@ class DataHooks: Lightning adds the correct sampler for distributed and arbitrary hardware. There is no need to set it yourself. - Example: - .. code-block:: python + Example:: - def train_dataloader(self): - transform = transforms.Compose([transforms.ToTensor(), - transforms.Normalize((0.5,), (1.0,))]) - dataset = MNIST(root='/path/to/mnist/', train=True, transform=transform, - download=True) - loader = torch.utils.data.DataLoader( - dataset=dataset, - batch_size=self.batch_size, - shuffle=True - ) - return loader + def train_dataloader(self): + transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5,), (1.0,))]) + dataset = MNIST(root='/path/to/mnist/', train=True, transform=transform, + download=True) + loader = torch.utils.data.DataLoader( + dataset=dataset, + batch_size=self.batch_size, + shuffle=True + ) + return loader """ rank_zero_warn( @@ -443,25 +442,24 @@ class DataHooks: Return: Single or multiple PyTorch DataLoaders. - Example: - .. code-block:: python + Example:: - def test_dataloader(self): - transform = transforms.Compose([transforms.ToTensor(), - transforms.Normalize((0.5,), (1.0,))]) - dataset = MNIST(root='/path/to/mnist/', train=False, transform=transform, - download=True) - loader = torch.utils.data.DataLoader( - dataset=dataset, - batch_size=self.batch_size, - shuffle=False - ) + def test_dataloader(self): + transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5,), (1.0,))]) + dataset = MNIST(root='/path/to/mnist/', train=False, transform=transform, + download=True) + loader = torch.utils.data.DataLoader( + dataset=dataset, + batch_size=self.batch_size, + shuffle=False + ) - return loader + return loader - # can also return multiple dataloaders - def test_dataloader(self): - return [loader_a, loader_b, ..., loader_n] + # can also return multiple dataloaders + def test_dataloader(self): + return [loader_a, loader_b, ..., loader_n] Note: If you don't need a test dataset and a :meth:`test_step`, you don't need to implement @@ -495,25 +493,24 @@ class DataHooks: Return: Single or multiple PyTorch DataLoaders. - Examples: - .. code-block:: python + Examples:: - def val_dataloader(self): - transform = transforms.Compose([transforms.ToTensor(), - transforms.Normalize((0.5,), (1.0,))]) - dataset = MNIST(root='/path/to/mnist/', train=False, - transform=transform, download=True) - loader = torch.utils.data.DataLoader( - dataset=dataset, - batch_size=self.batch_size, - shuffle=False - ) + def val_dataloader(self): + transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5,), (1.0,))]) + dataset = MNIST(root='/path/to/mnist/', train=False, + transform=transform, download=True) + loader = torch.utils.data.DataLoader( + dataset=dataset, + batch_size=self.batch_size, + shuffle=False + ) - return loader + return loader - # can also return multiple dataloaders - def val_dataloader(self): - return [loader_a, loader_b, ..., loader_n] + # can also return multiple dataloaders + def val_dataloader(self): + return [loader_a, loader_b, ..., loader_n] Note: If you don't need a validation dataset and a :meth:`validation_step`, you don't need to @@ -586,12 +583,11 @@ class CheckpointHooks: checkpoint: Loaded checkpoint - Example: - .. code-block:: python + Example:: - def on_load_checkpoint(self, checkpoint): - # 99% of the time you don't need to implement this method - self.something_cool_i_want_to_save = checkpoint['something_cool_i_want_to_save'] + def on_load_checkpoint(self, checkpoint): + # 99% of the time you don't need to implement this method + self.something_cool_i_want_to_save = checkpoint['something_cool_i_want_to_save'] Note: Lightning auto-restores global step, epoch, and train state including amp scaling. @@ -606,12 +602,11 @@ class CheckpointHooks: Args: checkpoint: Checkpoint to be saved - Example: - .. code-block:: python + Example:: - def on_save_checkpoint(self, checkpoint): - # 99% of use cases you don't need to implement this method - checkpoint['something_cool_i_want_to_save'] = my_cool_pickable_object + def on_save_checkpoint(self, checkpoint): + # 99% of use cases you don't need to implement this method + checkpoint['something_cool_i_want_to_save'] = my_cool_pickable_object Note: Lightning saves all aspects of training (epoch, global step, etc...) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index dd5691d6e4..5e8407f79a 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -161,12 +161,10 @@ class LightningModule( *args: The thing to print. Will be passed to Python's built-in print function. **kwargs: Will be passed to Python's built-in print function. - Example: + Example:: - .. code-block:: python - - def forward(self, x): - self.print(x, 'in forward') + def forward(self, x): + self.print(x, 'in forward') """ if self.trainer.is_global_zero: @@ -409,36 +407,35 @@ class LightningModule( Return: Predicted output - Examples: - .. code-block:: python + Examples:: - # example if we were using this model as a feature extractor - def forward(self, x): - feature_maps = self.convnet(x) - return feature_maps + # example if we were using this model as a feature extractor + def forward(self, x): + feature_maps = self.convnet(x) + return feature_maps - def training_step(self, batch, batch_idx): - x, y = batch - feature_maps = self(x) - logits = self.classifier(feature_maps) + def training_step(self, batch, batch_idx): + x, y = batch + feature_maps = self(x) + logits = self.classifier(feature_maps) - # ... - return loss + # ... + return loss - # splitting it this way allows model to be used a feature extractor - model = MyModelAbove() + # splitting it this way allows model to be used a feature extractor + model = MyModelAbove() - inputs = server.get_request() - results = model(inputs) - server.write_results(results) + inputs = server.get_request() + results = model(inputs) + server.write_results(results) - # ------------- - # This is in stark contrast to torch.nn.Module where normally you would have this: - def forward(self, batch): - x, y = batch - feature_maps = self.convnet(x) - logits = self.classifier(feature_maps) - return logits + # ------------- + # This is in stark contrast to torch.nn.Module where normally you would have this: + def forward(self, batch): + x, y = batch + feature_maps = self.convnet(x) + logits = self.classifier(feature_maps) + return logits """ return super().forward(*args, **kwargs) @@ -655,37 +652,36 @@ class LightningModule( # if you have multiple val dataloaders: def validation_step(self, batch, batch_idx, dataloader_idx) - Examples: - .. code-block:: python + Examples:: - # CASE 1: A single validation dataset - def validation_step(self, batch, batch_idx): - x, y = batch + # CASE 1: A single validation dataset + def validation_step(self, batch, batch_idx): + x, y = batch - # implement your own - out = self(x) - loss = self.loss(out, y) + # implement your own + out = self(x) + loss = self.loss(out, y) - # log 6 example images - # or generated text... or whatever - sample_imgs = x[:6] - grid = torchvision.utils.make_grid(sample_imgs) - self.logger.experiment.add_image('example_images', grid, 0) + # log 6 example images + # or generated text... or whatever + sample_imgs = x[:6] + grid = torchvision.utils.make_grid(sample_imgs) + self.logger.experiment.add_image('example_images', grid, 0) - # calculate acc - labels_hat = torch.argmax(out, dim=1) - val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) + # calculate acc + labels_hat = torch.argmax(out, dim=1) + val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) - # log the outputs! - self.log_dict({'val_loss': loss, 'val_acc': val_acc}) + # log the outputs! + self.log_dict({'val_loss': loss, 'val_acc': val_acc}) - If you pass in multiple val datasets, validation_step will have an additional argument. + If you pass in multiple val datasets, validation_step will have an additional argument. - .. code-block:: python + .. code-block:: python - # CASE 2: multiple validation datasets - def validation_step(self, batch, batch_idx, dataloader_idx): - # dataloader_idx tells you which dataset this is. + # CASE 2: multiple validation datasets + def validation_step(self, batch, batch_idx, dataloader_idx): + # dataloader_idx tells you which dataset this is. Note: If you don't need to validate you don't need to implement this method. @@ -831,38 +827,37 @@ class LightningModule( # if you have multiple test dataloaders: def test_step(self, batch, batch_idx, dataloader_idx) - Examples: - .. code-block:: python + Examples:: - # CASE 1: A single test dataset - def test_step(self, batch, batch_idx): - x, y = batch + # CASE 1: A single test dataset + def test_step(self, batch, batch_idx): + x, y = batch - # implement your own - out = self(x) - loss = self.loss(out, y) + # implement your own + out = self(x) + loss = self.loss(out, y) - # log 6 example images - # or generated text... or whatever - sample_imgs = x[:6] - grid = torchvision.utils.make_grid(sample_imgs) - self.logger.experiment.add_image('example_images', grid, 0) + # log 6 example images + # or generated text... or whatever + sample_imgs = x[:6] + grid = torchvision.utils.make_grid(sample_imgs) + self.logger.experiment.add_image('example_images', grid, 0) - # calculate acc - labels_hat = torch.argmax(out, dim=1) - test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) + # calculate acc + labels_hat = torch.argmax(out, dim=1) + test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) - # log the outputs! - self.log_dict({'test_loss': loss, 'test_acc': test_acc}) + # log the outputs! + self.log_dict({'test_loss': loss, 'test_acc': test_acc}) - If you pass in multiple validation datasets, :meth:`test_step` will have an additional - argument. + If you pass in multiple validation datasets, :meth:`test_step` will have an additional + argument. - .. code-block:: python + .. code-block:: python - # CASE 2: multiple test datasets - def test_step(self, batch, batch_idx, dataloader_idx): - # dataloader_idx tells you which dataset this is. + # CASE 2: multiple test datasets + def test_step(self, batch, batch_idx, dataloader_idx): + # dataloader_idx tells you which dataset this is. Note: If you don't need to validate you don't need to implement this method. @@ -1023,47 +1018,46 @@ class LightningModule( Only the ``scheduler`` key is required, the rest will be set to the defaults above. - Examples: - .. code-block:: python + Examples:: - # most cases - def configure_optimizers(self): - opt = Adam(self.parameters(), lr=1e-3) - return opt + # most cases + def configure_optimizers(self): + opt = Adam(self.parameters(), lr=1e-3) + return opt - # multiple optimizer case (e.g.: GAN) - def configure_optimizers(self): - generator_opt = Adam(self.model_gen.parameters(), lr=0.01) - disriminator_opt = Adam(self.model_disc.parameters(), lr=0.02) - return generator_opt, disriminator_opt + # multiple optimizer case (e.g.: GAN) + def configure_optimizers(self): + generator_opt = Adam(self.model_gen.parameters(), lr=0.01) + disriminator_opt = Adam(self.model_disc.parameters(), lr=0.02) + return generator_opt, disriminator_opt - # example with learning rate schedulers - def configure_optimizers(self): - generator_opt = Adam(self.model_gen.parameters(), lr=0.01) - disriminator_opt = Adam(self.model_disc.parameters(), lr=0.02) - discriminator_sched = CosineAnnealing(discriminator_opt, T_max=10) - return [generator_opt, disriminator_opt], [discriminator_sched] + # example with learning rate schedulers + def configure_optimizers(self): + generator_opt = Adam(self.model_gen.parameters(), lr=0.01) + disriminator_opt = Adam(self.model_disc.parameters(), lr=0.02) + discriminator_sched = CosineAnnealing(discriminator_opt, T_max=10) + return [generator_opt, disriminator_opt], [discriminator_sched] - # example with step-based learning rate schedulers - def configure_optimizers(self): - gen_opt = Adam(self.model_gen.parameters(), lr=0.01) - dis_opt = Adam(self.model_disc.parameters(), lr=0.02) - gen_sched = {'scheduler': ExponentialLR(gen_opt, 0.99), - 'interval': 'step'} # called after each training step - dis_sched = CosineAnnealing(discriminator_opt, T_max=10) # called every epoch - return [gen_opt, dis_opt], [gen_sched, dis_sched] + # example with step-based learning rate schedulers + def configure_optimizers(self): + gen_opt = Adam(self.model_gen.parameters(), lr=0.01) + dis_opt = Adam(self.model_disc.parameters(), lr=0.02) + gen_sched = {'scheduler': ExponentialLR(gen_opt, 0.99), + 'interval': 'step'} # called after each training step + dis_sched = CosineAnnealing(discriminator_opt, T_max=10) # called every epoch + return [gen_opt, dis_opt], [gen_sched, dis_sched] - # example with optimizer frequencies - # see training procedure in `Improved Training of Wasserstein GANs`, Algorithm 1 - # https://arxiv.org/abs/1704.00028 - def configure_optimizers(self): - gen_opt = Adam(self.model_gen.parameters(), lr=0.01) - dis_opt = Adam(self.model_disc.parameters(), lr=0.02) - n_critic = 5 - return ( - {'optimizer': dis_opt, 'frequency': n_critic}, - {'optimizer': gen_opt, 'frequency': 1} - ) + # example with optimizer frequencies + # see training procedure in `Improved Training of Wasserstein GANs`, Algorithm 1 + # https://arxiv.org/abs/1704.00028 + def configure_optimizers(self): + gen_opt = Adam(self.model_gen.parameters(), lr=0.01) + dis_opt = Adam(self.model_disc.parameters(), lr=0.02) + n_critic = 5 + return ( + {'optimizer': dis_opt, 'frequency': n_critic}, + {'optimizer': gen_opt, 'frequency': 1} + ) Note: @@ -1211,50 +1205,49 @@ class LightningModule( using_native_amp: True if using native amp using_lbfgs: True if the matching optimizer is lbfgs - Examples: - .. code-block:: python + Examples:: - # DEFAULT - def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, - optimizer_closure, on_tpu, using_native_amp, using_lbfgs): - optimizer.step(closure=optimizer_closure) + # DEFAULT + def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, + optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + optimizer.step(closure=optimizer_closure) - # Alternating schedule for optimizer steps (i.e.: GANs) - def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, - optimizer_closure, on_tpu, using_native_amp, using_lbfgs): - # update generator opt every 2 steps - if optimizer_idx == 0: - if batch_idx % 2 == 0 : - optimizer.step(closure=optimizer_closure) - optimizer.zero_grad() + # Alternating schedule for optimizer steps (i.e.: GANs) + def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, + optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + # update generator opt every 2 steps + if optimizer_idx == 0: + if batch_idx % 2 == 0 : + optimizer.step(closure=optimizer_closure) + optimizer.zero_grad() - # update discriminator opt every 4 steps - if optimizer_idx == 1: - if batch_idx % 4 == 0 : - optimizer.step(closure=optimizer_closure) - optimizer.zero_grad() + # update discriminator opt every 4 steps + if optimizer_idx == 1: + if batch_idx % 4 == 0 : + optimizer.step(closure=optimizer_closure) + optimizer.zero_grad() - # ... - # add as many optimizers as you want + # ... + # add as many optimizers as you want - Here's another example showing how to use this for more advanced things such as - learning rate warm-up: + Here's another example showing how to use this for more advanced things such as + learning rate warm-up: - .. code-block:: python + .. code-block:: python - # learning rate warm-up - def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, - optimizer_closure, on_tpu, using_native_amp, using_lbfgs): - # warm up lr - if self.trainer.global_step < 500: - lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) - for pg in optimizer.param_groups: - pg['lr'] = lr_scale * self.learning_rate + # learning rate warm-up + def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, + optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + # warm up lr + if self.trainer.global_step < 500: + lr_scale = min(1., float(self.trainer.global_step + 1) / 500.) + for pg in optimizer.param_groups: + pg['lr'] = lr_scale * self.learning_rate - # update params - optimizer.step(closure=optimizer_closure) - optimizer.zero_grad() + # update params + optimizer.step(closure=optimizer_closure) + optimizer.zero_grad() """ if not isinstance(optimizer, LightningOptimizer): @@ -1282,26 +1275,25 @@ class LightningModule( back propagation through time. The default implementation splits root level Tensors and Sequences at dim=1 (i.e. time dim). It assumes that each time dim is the same length. - Examples: - .. code-block:: python + Examples:: - def tbptt_split_batch(self, batch, split_size): - splits = [] - for t in range(0, time_dims[0], split_size): - batch_split = [] - for i, x in enumerate(batch): - if isinstance(x, torch.Tensor): - split_x = x[:, t:t + split_size] - elif isinstance(x, collections.Sequence): - split_x = [None] * len(x) - for batch_idx in range(len(x)): - split_x[batch_idx] = x[batch_idx][t:t + split_size] + def tbptt_split_batch(self, batch, split_size): + splits = [] + for t in range(0, time_dims[0], split_size): + batch_split = [] + for i, x in enumerate(batch): + if isinstance(x, torch.Tensor): + split_x = x[:, t:t + split_size] + elif isinstance(x, collections.Sequence): + split_x = [None] * len(x) + for batch_idx in range(len(x)): + split_x[batch_idx] = x[batch_idx][t:t + split_size] - batch_split.append(split_x) + batch_split.append(split_x) - splits.append(batch_split) + splits.append(batch_split) - return splits + return splits Note: Called in the training loop after @@ -1354,11 +1346,10 @@ class LightningModule( r""" Freeze all params for inference. - Example: - .. code-block:: python + Example:: - model = MyLightningModule(...) - model.freeze() + model = MyLightningModule(...) + model.freeze() """ for param in self.parameters(): diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py index 2637d2c367..1761fc0135 100644 --- a/pytorch_lightning/core/saving.py +++ b/pytorch_lightning/core/saving.py @@ -93,36 +93,35 @@ class ModelIO(object): Return: :class:`LightningModule` with loaded weights and hyperparameters (if available). - Example: - .. code-block:: python + Example:: - # load weights without mapping ... - MyLightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') + # load weights without mapping ... + MyLightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') - # or load weights mapping all weights from GPU 1 to GPU 0 ... - map_location = {'cuda:1':'cuda:0'} - MyLightningModule.load_from_checkpoint( - 'path/to/checkpoint.ckpt', - map_location=map_location - ) + # or load weights mapping all weights from GPU 1 to GPU 0 ... + map_location = {'cuda:1':'cuda:0'} + MyLightningModule.load_from_checkpoint( + 'path/to/checkpoint.ckpt', + map_location=map_location + ) - # or load weights and hyperparameters from separate files. - MyLightningModule.load_from_checkpoint( - 'path/to/checkpoint.ckpt', - hparams_file='/path/to/hparams_file.yaml' - ) + # or load weights and hyperparameters from separate files. + MyLightningModule.load_from_checkpoint( + 'path/to/checkpoint.ckpt', + hparams_file='/path/to/hparams_file.yaml' + ) - # override some of the params with new values - MyLightningModule.load_from_checkpoint( - PATH, - num_layers=128, - pretrained_ckpt_path: NEW_PATH, - ) + # override some of the params with new values + MyLightningModule.load_from_checkpoint( + PATH, + num_layers=128, + pretrained_ckpt_path: NEW_PATH, + ) - # predict - pretrained_model.eval() - pretrained_model.freeze() - y_hat = pretrained_model(x) + # predict + pretrained_model.eval() + pretrained_model.freeze() + y_hat = pretrained_model(x) """ if map_location is not None: checkpoint = pl_load(checkpoint_path, map_location=map_location) diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py index ae1c619860..803302f875 100644 --- a/pytorch_lightning/loggers/neptune.py +++ b/pytorch_lightning/loggers/neptune.py @@ -52,7 +52,7 @@ class NeptuneLogger(LightningLoggerBase): **ONLINE MODE** - .. code-block:: python + .. testcode:: from pytorch_lightning import Trainer from pytorch_lightning.loggers import NeptuneLogger @@ -70,7 +70,7 @@ class NeptuneLogger(LightningLoggerBase): **OFFLINE MODE** - .. code-block:: python + .. testcode:: from pytorch_lightning.loggers import NeptuneLogger diff --git a/pytorch_lightning/plugins/apex.py b/pytorch_lightning/plugins/apex.py index eb8dd0bfc0..6b26a8b3a8 100644 --- a/pytorch_lightning/plugins/apex.py +++ b/pytorch_lightning/plugins/apex.py @@ -85,16 +85,15 @@ class ApexPlugin(PrecisionPlugin): Return: Apex wrapped model and optimizers - Examples: - .. code-block:: python + Examples:: - # Default implementation used by Trainer. - def configure_apex(self, amp, model, optimizers, amp_level): - model, optimizers = amp.initialize( - model, optimizers, opt_level=amp_level, - ) + # Default implementation used by Trainer. + def configure_apex(self, amp, model, optimizers, amp_level): + model, optimizers = amp.initialize( + model, optimizers, opt_level=amp_level, + ) - return model, optimizers + return model, optimizers """ model, optimizers = amp.initialize(model, optimizers, opt_level=amp_level) return model, optimizers