fixing docs (#2227)
This commit is contained in:
parent
62029c17ee
commit
a5f7c6368e
|
@ -6,7 +6,7 @@ Debugging
|
|||
=========
|
||||
The following are flags that make debugging much easier.
|
||||
|
||||
-----------------
|
||||
---
|
||||
|
||||
fast_dev_run
|
||||
------------
|
||||
|
@ -21,7 +21,7 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
|||
|
||||
trainer = Trainer(fast_dev_run=True)
|
||||
|
||||
-----------------
|
||||
---
|
||||
|
||||
Inspect gradient norms
|
||||
----------------------
|
||||
|
@ -35,7 +35,7 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
|||
# the 2-norm
|
||||
trainer = Trainer(track_grad_norm=2)
|
||||
|
||||
-----------------
|
||||
---
|
||||
|
||||
Log GPU usage
|
||||
-------------
|
||||
|
@ -48,7 +48,7 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
|||
|
||||
trainer = Trainer(log_gpu_memory=True)
|
||||
|
||||
-----------------
|
||||
---
|
||||
|
||||
Make model overfit on subset of data
|
||||
------------------------------------
|
||||
|
@ -70,7 +70,7 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
|||
With this flag, the train, val, and test sets will all be the same train set. We will also replace the sampler
|
||||
in the training set to turn off shuffle for you.
|
||||
|
||||
-----------------
|
||||
---
|
||||
|
||||
Print a summary of your LightningModule
|
||||
---------------------------------------
|
||||
|
@ -99,7 +99,7 @@ See Also:
|
|||
- :paramref:`~pytorch_lightning.trainer.trainer.Trainer.weights_summary` Trainer argument
|
||||
- :class:`~pytorch_lightning.core.memory.ModelSummary`
|
||||
|
||||
-----------------
|
||||
---
|
||||
|
||||
Shorten epochs
|
||||
--------------
|
||||
|
@ -116,7 +116,7 @@ On larger datasets like Imagenet, this can help you debug or test a few things f
|
|||
# use 10 batches of train and 5 batches of val
|
||||
trainer = Trainer(limit_train_batches=10, limit_val_batches=5)
|
||||
|
||||
-----------------
|
||||
---
|
||||
|
||||
Set the number of validation sanity steps
|
||||
-----------------------------------------
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
Experiment Logging
|
||||
==================
|
||||
|
||||
-------------------
|
||||
---
|
||||
|
||||
Comet.ml
|
||||
^^^^^^^^
|
||||
|
@ -49,7 +49,7 @@ The :class:`~pytorch_lightning.loggers.CometLogger` is available anywhere except
|
|||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.CometLogger` docs.
|
||||
|
||||
-------------------
|
||||
---
|
||||
|
||||
MLflow
|
||||
^^^^^^
|
||||
|
@ -76,7 +76,7 @@ Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.
|
|||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.MLFlowLogger` docs.
|
||||
|
||||
-------------------
|
||||
---
|
||||
|
||||
Neptune.ai
|
||||
^^^^^^^^^^
|
||||
|
@ -116,7 +116,7 @@ The :class:`~pytorch_lightning.loggers.NeptuneLogger` is available anywhere exce
|
|||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.NeptuneLogger` docs.
|
||||
|
||||
-------------------
|
||||
---
|
||||
|
||||
allegro.ai TRAINS
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
@ -160,7 +160,7 @@ The :class:`~pytorch_lightning.loggers.TrainsLogger` is available anywhere in yo
|
|||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.TrainsLogger` docs.
|
||||
|
||||
-------------------
|
||||
---
|
||||
|
||||
Tensorboard
|
||||
^^^^^^^^^^^
|
||||
|
@ -186,7 +186,7 @@ The :class:`~pytorch_lightning.loggers.TensorBoardLogger` is available anywhere
|
|||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.TensorBoardLogger` docs.
|
||||
|
||||
-------------------
|
||||
---
|
||||
|
||||
Test Tube
|
||||
^^^^^^^^^
|
||||
|
@ -221,7 +221,7 @@ The :class:`~pytorch_lightning.loggers.TestTubeLogger` is available anywhere exc
|
|||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.TestTubeLogger` docs.
|
||||
|
||||
-------------------
|
||||
---
|
||||
|
||||
Weights and Biases
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
@ -257,7 +257,7 @@ The :class:`~pytorch_lightning.loggers.WandbLogger` is available anywhere except
|
|||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.WandbLogger` docs.
|
||||
|
||||
-------------------
|
||||
---
|
||||
|
||||
Multiple Loggers
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
|
|
@ -104,6 +104,7 @@ Here we show the validation loss in the progress bar
|
|||
|
||||
Snapshot hyperparameters
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
When training a model, it's useful to know what hyperparams went into that model.
|
||||
When Lightning creates a checkpoint, it stores a key "hparams" with the hyperparams.
|
||||
|
||||
|
@ -118,6 +119,7 @@ in the `hparams tab <https://pytorch.org/docs/stable/tensorboard.html#torch.util
|
|||
|
||||
Snapshot code
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Loggers also allow you to snapshot a copy of the code used in this experiment.
|
||||
For example, TestTubeLogger does this with a flag:
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ Fast Training
|
|||
There are multiple options to speed up different parts of the training by choosing to train
|
||||
on a subset of data. This could be done for speed or debugging purposes.
|
||||
|
||||
----------------------
|
||||
---
|
||||
|
||||
Check validation every n epochs
|
||||
-------------------------------
|
||||
|
@ -19,7 +19,7 @@ If you have a small dataset you might want to check validation every n epochs
|
|||
# DEFAULT
|
||||
trainer = Trainer(check_val_every_n_epoch=1)
|
||||
|
||||
----------------------
|
||||
---
|
||||
|
||||
Force training for min or max epochs
|
||||
------------------------------------
|
||||
|
@ -33,7 +33,7 @@ It can be useful to force training for a minimum number of epochs or limit to a
|
|||
# DEFAULT
|
||||
trainer = Trainer(min_epochs=1, max_epochs=1000)
|
||||
|
||||
----------------------
|
||||
---
|
||||
|
||||
Set validation check frequency within 1 training epoch
|
||||
------------------------------------------------------
|
||||
|
@ -52,7 +52,7 @@ Must use an int if using an IterableDataset.
|
|||
# check every 100 train batches (ie: for IterableDatasets or fixed frequency)
|
||||
trainer = Trainer(val_check_interval=100)
|
||||
|
||||
----------------------
|
||||
---
|
||||
|
||||
Use data subset for training, validation and test
|
||||
-------------------------------------------------
|
||||
|
|
|
@ -17,7 +17,7 @@ To illustrate, here's the typical PyTorch project structure organized in a Light
|
|||
As your project grows in complexity with things like 16-bit precision, distributed training, etc... the part in blue
|
||||
quickly becomes onerous and starts distracting from the core research code.
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Goal of this guide
|
||||
------------------
|
||||
|
@ -32,7 +32,7 @@ to use inheritance to very quickly create an AutoEncoder.
|
|||
.. note:: Any DL/ML PyTorch project fits into the Lightning structure. Here we just focus on 3 types
|
||||
of research to illustrate.
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Installing Lightning
|
||||
--------------------
|
||||
|
@ -55,8 +55,7 @@ Or with conda
|
|||
|
||||
conda install pytorch-lightning -c conda-forge
|
||||
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Lightning Philosophy
|
||||
--------------------
|
||||
|
@ -118,7 +117,7 @@ In Lightning this code is abstracted out by `Callbacks`.
|
|||
generated = decoder(z)
|
||||
self.experiment.log('images', generated)
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Elements of a research project
|
||||
------------------------------
|
||||
|
@ -381,7 +380,7 @@ in the LightningModule
|
|||
Again, this is the same PyTorch code except that it has been organized by the LightningModule.
|
||||
This code is not restricted which means it can be as complicated as a full seq-2-seq, RL loop, GAN, etc...
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Training
|
||||
--------
|
||||
|
@ -587,11 +586,11 @@ Notice the epoch is MUCH faster!
|
|||
.. figure:: /_images/mnist_imgs/tpu_fast.png
|
||||
:alt: TPU speed
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
.. include:: hyperparameters.rst
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Validating
|
||||
----------
|
||||
|
@ -670,7 +669,7 @@ in the validation loop, you won't need to potentially wait a full epoch to find
|
|||
|
||||
.. note:: Lightning disables gradients, puts model in eval mode and does everything needed for validation.
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
@ -741,7 +740,7 @@ You can also run the test from a saved lightning model
|
|||
|
||||
.. warning:: .test() is not stable yet on TPUs. We're working on getting around the multiprocessing challenges.
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Predicting
|
||||
----------
|
||||
|
@ -842,7 +841,7 @@ Or maybe we have a model that we use to do generation
|
|||
How you split up what goes in `forward` vs `training_step` depends on how you want to use this model for
|
||||
prediction.
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Extensibility
|
||||
-------------
|
||||
|
@ -903,7 +902,7 @@ you could do your own:
|
|||
Every single part of training is configurable this way.
|
||||
For a full list look at `LightningModule <lightning-module.rst>`_.
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
Callbacks
|
||||
---------
|
||||
|
@ -940,10 +939,10 @@ And pass the callbacks into the trainer
|
|||
.. note::
|
||||
See full list of 12+ hooks in the :ref:`callbacks`.
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
.. include:: child_modules.rst
|
||||
|
||||
---------
|
||||
---
|
||||
|
||||
.. include:: transfer_learning.rst
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
.. testsetup:: *
|
||||
|
||||
import torch
|
||||
from torch.nn import Module
|
||||
from pytorch_lightning.core.lightning import LightningModule
|
||||
from pytorch_lightning.metrics import TensorMetric, NumpyMetric
|
||||
|
@ -25,10 +26,6 @@ Example::
|
|||
# calculates accuracy across all GPUs and all Nodes used in training
|
||||
accuracy(pred, target)
|
||||
|
||||
Out::
|
||||
|
||||
tensor(0.7500)
|
||||
|
||||
.. warning::
|
||||
The metrics package is still in development! If we're missing a metric or you find a mistake, please send a PR!
|
||||
to a few metrics. Please feel free to create an issue/PR if you have a proposed
|
||||
|
@ -228,7 +225,7 @@ Functional Metrics
|
|||
------------------
|
||||
Functional metrics can be called anywhere (even used with just plain PyTorch).
|
||||
|
||||
.. testcode::
|
||||
.. code-block:: python
|
||||
|
||||
from pytorch_lightning.metrics.functional import accuracy
|
||||
|
||||
|
@ -238,10 +235,6 @@ Functional metrics can be called anywhere (even used with just plain PyTorch).
|
|||
# calculates accuracy across all GPUs and all Nodes used in training
|
||||
accuracy(pred, target)
|
||||
|
||||
.. testoutput::
|
||||
|
||||
tensor(0.7500)
|
||||
|
||||
These metrics even work when using distributed training:
|
||||
|
||||
.. code-block:: python
|
||||
|
|
|
@ -2,7 +2,7 @@ Optimization
|
|||
===============
|
||||
|
||||
Learning rate scheduling
|
||||
-------------------------------------
|
||||
------------------------
|
||||
Every optimizer you use can be paired with any `LearningRateScheduler <https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate>`_.
|
||||
|
||||
.. testcode::
|
||||
|
@ -41,7 +41,7 @@ Every optimizer you use can be paired with any `LearningRateScheduler <https://p
|
|||
|
||||
|
||||
Use multiple optimizers (like GANs)
|
||||
-------------------------------------
|
||||
-----------------------------------
|
||||
To use multiple optimizers return > 1 optimizers from :meth:`pytorch_lightning.core.LightningModule.configure_optimizers`
|
||||
|
||||
.. testcode::
|
||||
|
@ -73,7 +73,7 @@ Lightning will call each optimizer sequentially:
|
|||
|
||||
|
||||
Step optimizers at arbitrary intervals
|
||||
----------------------------------------
|
||||
--------------------------------------
|
||||
To do more interesting things with your optimizers such as learning rate warm-up or odd scheduling,
|
||||
override the :meth:`optimizer_step` function.
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ Lightning has built in support for dealing with sequential data.
|
|||
|
||||
|
||||
Packed sequences as inputs
|
||||
----------------------------
|
||||
--------------------------
|
||||
When using PackedSequence, do 2 things:
|
||||
|
||||
1. return either a padded tensor in dataset or a list of variable length tensors in the dataloader collate_fn (example above shows the list implementation).
|
||||
|
@ -29,7 +29,7 @@ When using PackedSequence, do 2 things:
|
|||
y = rnn.pack_sequence(batch[1], enforce_sorted=False)
|
||||
|
||||
Truncated Backpropagation Through Time
|
||||
---------------------------------------
|
||||
--------------------------------------
|
||||
There are times when multiple backwards passes are needed for each batch.
|
||||
For example, it may save memory to use Truncated Backpropagation Through Time when training RNNs.
|
||||
|
||||
|
@ -50,7 +50,7 @@ Lightning can handle TBTT automatically via this flag.
|
|||
a `hiddens` arg.
|
||||
|
||||
Iterable Datasets
|
||||
---------------------------------------
|
||||
-----------------
|
||||
Lightning supports using IterableDatasets as well as map-style Datasets. IterableDatasets provide a more natural
|
||||
option when using sequential data.
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
from pytorch_lightning.trainer.trainer import Trainer
|
||||
|
||||
Single GPU Training
|
||||
====================
|
||||
===================
|
||||
Make sure you are running on a machine that has at least one GPU. Lightning handles all the NVIDIA flags for you,
|
||||
there's no need to set them yourself.
|
||||
|
||||
|
|
|
@ -5,13 +5,13 @@ Lightning supports running on TPUs. At this moment, TPUs are available
|
|||
on Google Cloud (GCP), Google Colab and Kaggle Environments. For more information on TPUs
|
||||
`watch this video <https://www.youtube.com/watch?v=kPMpmcl_Pyw>`_.
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
Live demo
|
||||
----------
|
||||
Check out this `Google Colab <https://colab.research.google.com/drive/1-_LKx4HwAxl5M6xPJmqAAu444LTDQoa3>`_ to see how to train MNIST on TPUs.
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
TPU Terminology
|
||||
---------------
|
||||
|
@ -23,17 +23,17 @@ A TPU pod hosts many TPUs on it. Currently, TPU pod v2 has 2048 cores!
|
|||
You can request a full pod from Google cloud or a "slice" which gives you
|
||||
some subset of those 2048 cores.
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
How to access TPUs
|
||||
-------------------
|
||||
------------------
|
||||
To access TPUs there are two main ways.
|
||||
|
||||
1. Using google colab.
|
||||
2. Using Google Cloud (GCP).
|
||||
3. Using Kaggle.
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
Colab TPUs
|
||||
-----------
|
||||
|
@ -65,7 +65,7 @@ To get a TPU on colab, follow these steps:
|
|||
|
||||
6. Then set up your LightningModule as normal.
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
DistributedSamplers
|
||||
-------------------
|
||||
|
@ -122,27 +122,27 @@ To use a full TPU pod skip to the TPU pod section.
|
|||
|
||||
That's it! Your model will train on all 8 TPU cores.
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
Single TPU core training
|
||||
----------------------------
|
||||
------------------------
|
||||
Lightning supports training on a single TPU core. Just pass the TPU core ID [1-8] in a list.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
trainer = pl.Trainer(tpu_cores=[1])
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
Distributed Backend with TPU
|
||||
----------------------------
|
||||
The ```distributed_backend``` option used for GPUs does not apply to TPUs.
|
||||
TPUs work in DDP mode by default (distributing over each core)
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
TPU Pod
|
||||
--------
|
||||
-------
|
||||
To train on more than 8 cores, your code actually doesn't change!
|
||||
All you need to do is submit the following command:
|
||||
|
||||
|
@ -153,7 +153,7 @@ All you need to do is submit the following command:
|
|||
--conda-env=torch-xla-nightly
|
||||
-- python /usr/share/torch-xla-0.5/pytorch/xla/test/test_train_imagenet.py --fake_data
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
16 bit precision
|
||||
-----------------
|
||||
|
@ -171,7 +171,7 @@ set the 16-bit flag.
|
|||
|
||||
Under the hood the xla library will use the `bfloat16 type <https://en.wikipedia.org/wiki/Bfloat16_floating-point_format>`_.
|
||||
|
||||
---------------
|
||||
---
|
||||
|
||||
About XLA
|
||||
----------
|
||||
|
|
|
@ -8,7 +8,7 @@ Training Tricks
|
|||
Lightning implements various tricks to help during training
|
||||
|
||||
Accumulate gradients
|
||||
-------------------------------------
|
||||
--------------------
|
||||
Accumulated gradients runs K small batches of size N before doing a backwards pass.
|
||||
The effect is a large effective batch size of size KxN.
|
||||
|
||||
|
@ -21,7 +21,7 @@ The effect is a large effective batch size of size KxN.
|
|||
|
||||
|
||||
Gradient Clipping
|
||||
-------------------------------------
|
||||
-----------------
|
||||
Gradient clipping may be enabled to avoid exploding gradients. Specifically, this will `clip the gradient
|
||||
norm <https://pytorch.org/docs/stable/nn.html#torch.nn.utils.clip_grad_norm_>`_ computed over all model parameters together.
|
||||
|
||||
|
|
|
@ -209,14 +209,14 @@ def accuracy(
|
|||
|
||||
Example:
|
||||
|
||||
>>> x = torch.tensor([1, 2, 3])
|
||||
>>> y = torch.tensor([0, 2, 3])
|
||||
>>> x = torch.tensor([0, 1, 2, 3])
|
||||
>>> y = torch.tensor([0, 1, 2, 2])
|
||||
>>> accuracy(x, y)
|
||||
tensor(0.6667)
|
||||
tensor(0.7500)
|
||||
|
||||
"""
|
||||
tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred=pred, target=target,
|
||||
num_classes=num_classes)
|
||||
tps, fps, tns, fns, sups = stat_scores_multiple_classes(
|
||||
pred=pred, target=target, num_classes=num_classes)
|
||||
|
||||
if not (target > 0).any() and num_classes is None:
|
||||
raise RuntimeError("cannot infer num_classes when target is all zero")
|
||||
|
@ -539,7 +539,7 @@ def roc(
|
|||
|
||||
>>> x = torch.tensor([0, 1, 2, 3])
|
||||
>>> y = torch.tensor([0, 1, 2, 2])
|
||||
>>> fpr, tpr, thresholds = roc(x,y)
|
||||
>>> fpr, tpr, thresholds = roc(x, y)
|
||||
>>> fpr
|
||||
tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000])
|
||||
>>> tpr
|
||||
|
|
Loading…
Reference in New Issue