From a5f7c6368ea506bde4c5e64f7e8eb048c13a4dd9 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Wed, 17 Jun 2020 23:44:11 +0200
Subject: [PATCH] fixing docs (#2227)

---
 docs/source/debugging.rst                     | 14 +++++-----
 docs/source/experiment_logging.rst            | 16 +++++------
 docs/source/experiment_reporting.rst          |  2 ++
 docs/source/fast_training.rst                 |  8 +++---
 docs/source/introduction_guide.rst            | 27 +++++++++----------
 docs/source/metrics.rst                       | 11 ++------
 docs/source/optimizers.rst                    |  6 ++---
 docs/source/sequences.rst                     |  6 ++---
 docs/source/single_gpu.rst                    |  2 +-
 docs/source/tpu.rst                           | 26 +++++++++---------
 docs/source/training_tricks.rst               |  4 +--
 .../metrics/functional/classification.py      | 12 ++++-----
 12 files changed, 64 insertions(+), 70 deletions(-)

diff --git a/docs/source/debugging.rst b/docs/source/debugging.rst
index 96edc8ab81..bad72541f7 100644
--- a/docs/source/debugging.rst
+++ b/docs/source/debugging.rst
@@ -6,7 +6,7 @@ Debugging
 =========
 The following are flags that make debugging much easier.
 
------------------
+---
 
 fast_dev_run
 ------------
@@ -21,7 +21,7 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
 
     trainer = Trainer(fast_dev_run=True)
 
------------------
+---
 
 Inspect gradient norms
 ----------------------
@@ -35,7 +35,7 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
     # the 2-norm
     trainer = Trainer(track_grad_norm=2)
 
------------------
+---
 
 Log GPU usage
 -------------
@@ -48,7 +48,7 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
 
     trainer = Trainer(log_gpu_memory=True)
 
------------------
+---
 
 Make model overfit on subset of data
 ------------------------------------
@@ -70,7 +70,7 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
 With this flag, the train, val, and test sets will all be the same train set. We will also replace the sampler
 in the training set to turn off shuffle for you.
 
------------------
+---
 
 Print a summary of your LightningModule
 ---------------------------------------
@@ -99,7 +99,7 @@ See Also:
     - :paramref:`~pytorch_lightning.trainer.trainer.Trainer.weights_summary` Trainer argument
     - :class:`~pytorch_lightning.core.memory.ModelSummary`
 
------------------
+---
 
 Shorten epochs
 --------------
@@ -116,7 +116,7 @@ On larger datasets like Imagenet, this can help you debug or test a few things f
     # use 10 batches of train and 5 batches of val
     trainer = Trainer(limit_train_batches=10, limit_val_batches=5)
 
------------------
+---
 
 Set the number of validation sanity steps
 -----------------------------------------
diff --git a/docs/source/experiment_logging.rst b/docs/source/experiment_logging.rst
index 199e81c4a0..6d6d96a415 100644
--- a/docs/source/experiment_logging.rst
+++ b/docs/source/experiment_logging.rst
@@ -7,7 +7,7 @@
 Experiment Logging
 ==================
 
--------------------
+---
 
 Comet.ml
 ^^^^^^^^
@@ -49,7 +49,7 @@ The :class:`~pytorch_lightning.loggers.CometLogger` is available anywhere except
 .. seealso::
     :class:`~pytorch_lightning.loggers.CometLogger` docs.
 
--------------------
+---
 
 MLflow
 ^^^^^^
@@ -76,7 +76,7 @@ Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.
 .. seealso::
     :class:`~pytorch_lightning.loggers.MLFlowLogger` docs.
 
--------------------
+---
 
 Neptune.ai
 ^^^^^^^^^^
@@ -116,7 +116,7 @@ The :class:`~pytorch_lightning.loggers.NeptuneLogger` is available anywhere exce
 .. seealso::
     :class:`~pytorch_lightning.loggers.NeptuneLogger` docs.
 
--------------------
+---
 
 allegro.ai TRAINS
 ^^^^^^^^^^^^^^^^^
@@ -160,7 +160,7 @@ The :class:`~pytorch_lightning.loggers.TrainsLogger` is available anywhere in yo
 .. seealso::
     :class:`~pytorch_lightning.loggers.TrainsLogger` docs.
 
--------------------
+---
 
 Tensorboard
 ^^^^^^^^^^^
@@ -186,7 +186,7 @@ The :class:`~pytorch_lightning.loggers.TensorBoardLogger` is available anywhere
 .. seealso::
     :class:`~pytorch_lightning.loggers.TensorBoardLogger` docs.
 
--------------------
+---
 
 Test Tube
 ^^^^^^^^^
@@ -221,7 +221,7 @@ The :class:`~pytorch_lightning.loggers.TestTubeLogger` is available anywhere exc
 .. seealso::
     :class:`~pytorch_lightning.loggers.TestTubeLogger` docs.
 
--------------------
+---
 
 Weights and Biases
 ^^^^^^^^^^^^^^^^^^
@@ -257,7 +257,7 @@ The :class:`~pytorch_lightning.loggers.WandbLogger` is available anywhere except
 .. seealso::
     :class:`~pytorch_lightning.loggers.WandbLogger` docs.
 
--------------------
+---
 
 Multiple Loggers
 ^^^^^^^^^^^^^^^^
diff --git a/docs/source/experiment_reporting.rst b/docs/source/experiment_reporting.rst
index 8e534f4cc6..6ced7bc786 100644
--- a/docs/source/experiment_reporting.rst
+++ b/docs/source/experiment_reporting.rst
@@ -104,6 +104,7 @@ Here we show the validation loss in the progress bar
 
 Snapshot hyperparameters
 ^^^^^^^^^^^^^^^^^^^^^^^^
+
 When training a model, it's useful to know what hyperparams went into that model.
 When Lightning creates a checkpoint, it stores a key "hparams" with the hyperparams.
 
@@ -118,6 +119,7 @@ in the `hparams tab <https://pytorch.org/docs/stable/tensorboard.html#torch.util
 
 Snapshot code
 ^^^^^^^^^^^^^
+
 Loggers  also allow you to snapshot a copy of the code used in this experiment.
 For example, TestTubeLogger does this with a flag:
 
diff --git a/docs/source/fast_training.rst b/docs/source/fast_training.rst
index ae4d4e3aa0..895e8d9662 100644
--- a/docs/source/fast_training.rst
+++ b/docs/source/fast_training.rst
@@ -8,7 +8,7 @@ Fast Training
 There are multiple options to speed up different parts of the training by choosing to train
 on a subset of data. This could be done for speed or debugging purposes.
 
-----------------------
+---
 
 Check validation every n epochs
 -------------------------------
@@ -19,7 +19,7 @@ If you have a small dataset you might want to check validation every n epochs
     # DEFAULT
     trainer = Trainer(check_val_every_n_epoch=1)
 
-----------------------
+---
 
 Force training for min or max epochs
 ------------------------------------
@@ -33,7 +33,7 @@ It can be useful to force training for a minimum number of epochs or limit to a
     # DEFAULT
     trainer = Trainer(min_epochs=1, max_epochs=1000)
 
-----------------------
+---
 
 Set validation check frequency within 1 training epoch
 ------------------------------------------------------
@@ -52,7 +52,7 @@ Must use an int if using an IterableDataset.
     # check every 100 train batches (ie: for IterableDatasets or fixed frequency)
     trainer = Trainer(val_check_interval=100)
 
-----------------------
+---
 
 Use data subset for training, validation and test
 -------------------------------------------------
diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst
index d56ac08dae..9329fd45c6 100644
--- a/docs/source/introduction_guide.rst
+++ b/docs/source/introduction_guide.rst
@@ -17,7 +17,7 @@ To illustrate, here's the typical PyTorch project structure organized in a Light
 As your project grows in complexity with things like 16-bit precision, distributed training, etc... the part in blue
 quickly becomes onerous and starts distracting from the core research code.
 
----------
+---
 
 Goal of this guide
 ------------------
@@ -32,7 +32,7 @@ to use inheritance to very quickly create an AutoEncoder.
 .. note:: Any DL/ML PyTorch project fits into the Lightning structure. Here we just focus on 3 types
     of research to illustrate.
 
----------
+---
 
 Installing Lightning
 --------------------
@@ -55,8 +55,7 @@ Or with conda
 
     conda install pytorch-lightning -c conda-forge
 
-
----------
+---
 
 Lightning Philosophy
 --------------------
@@ -118,7 +117,7 @@ In Lightning this code is abstracted out by `Callbacks`.
     generated = decoder(z)
     self.experiment.log('images', generated)
 
----------
+---
 
 Elements of a research project
 ------------------------------
@@ -381,7 +380,7 @@ in the LightningModule
 Again, this is the same PyTorch code except that it has been organized by the LightningModule.
 This code is not restricted which means it can be as complicated as a full seq-2-seq, RL loop, GAN, etc...
 
----------
+---
 
 Training
 --------
@@ -587,11 +586,11 @@ Notice the epoch is MUCH faster!
 .. figure:: /_images/mnist_imgs/tpu_fast.png
     :alt: TPU speed
 
----------
+---
 
 .. include:: hyperparameters.rst
 
----------
+---
 
 Validating
 ----------
@@ -670,7 +669,7 @@ in the validation loop, you won't need to potentially wait a full epoch to find
 
 .. note:: Lightning disables gradients, puts model in eval mode and does everything needed for validation.
 
----------
+---
 
 Testing
 -------
@@ -741,7 +740,7 @@ You can also run the test from a saved lightning model
 
 .. warning:: .test() is not stable yet on TPUs. We're working on getting around the multiprocessing challenges.
 
----------
+---
 
 Predicting
 ----------
@@ -842,7 +841,7 @@ Or maybe we have a model that we use to do generation
 How you split up what goes in `forward` vs `training_step` depends on how you want to use this model for
 prediction.
 
----------
+---
 
 Extensibility
 -------------
@@ -903,7 +902,7 @@ you could do your own:
 Every single part of training is configurable this way.
 For a full list look at `LightningModule <lightning-module.rst>`_.
 
----------
+---
 
 Callbacks
 ---------
@@ -940,10 +939,10 @@ And pass the callbacks into the trainer
 .. note::
     See full list of 12+ hooks in the :ref:`callbacks`.
 
----------
+---
 
 .. include:: child_modules.rst
 
----------
+---
 
 .. include:: transfer_learning.rst
diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
index 38c52dfb85..a712059f40 100644
--- a/docs/source/metrics.rst
+++ b/docs/source/metrics.rst
@@ -1,5 +1,6 @@
 .. testsetup:: *
 
+    import torch
     from torch.nn import Module
     from pytorch_lightning.core.lightning import LightningModule
     from pytorch_lightning.metrics import TensorMetric, NumpyMetric
@@ -25,10 +26,6 @@ Example::
     # calculates accuracy across all GPUs and all Nodes used in training
     accuracy(pred, target)
 
-Out::
-
-    tensor(0.7500)
-
 .. warning::
     The metrics package is still in development! If we're missing a metric or you find a mistake, please send a PR!
     to a few metrics. Please feel free to create an issue/PR if you have a proposed 
@@ -228,7 +225,7 @@ Functional Metrics
 ------------------
 Functional metrics can be called anywhere (even used with just plain PyTorch).
 
-.. testcode::
+.. code-block:: python
 
     from pytorch_lightning.metrics.functional import accuracy
 
@@ -238,10 +235,6 @@ Functional metrics can be called anywhere (even used with just plain PyTorch).
     # calculates accuracy across all GPUs and all Nodes used in training
     accuracy(pred, target)
 
-.. testoutput::
-
-    tensor(0.7500)
-
 These metrics even work when using distributed training:
 
 .. code-block:: python
diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst
index d4cb6fe5a9..b0d7c5949e 100644
--- a/docs/source/optimizers.rst
+++ b/docs/source/optimizers.rst
@@ -2,7 +2,7 @@ Optimization
 ===============
 
 Learning rate scheduling
--------------------------------------
+------------------------
 Every optimizer you use can be paired with any `LearningRateScheduler <https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate>`_.
 
 .. testcode::
@@ -41,7 +41,7 @@ Every optimizer you use can be paired with any `LearningRateScheduler <https://p
 
 
 Use multiple optimizers (like GANs)
--------------------------------------
+-----------------------------------
 To use multiple optimizers return > 1 optimizers from :meth:`pytorch_lightning.core.LightningModule.configure_optimizers`
 
 .. testcode::
@@ -73,7 +73,7 @@ Lightning will call each optimizer sequentially:
 
 
 Step optimizers at arbitrary intervals
-----------------------------------------
+--------------------------------------
 To do more interesting things with your optimizers such as learning rate warm-up or odd scheduling,
 override the :meth:`optimizer_step` function.
 
diff --git a/docs/source/sequences.rst b/docs/source/sequences.rst
index 857fd08198..6b75f323ed 100644
--- a/docs/source/sequences.rst
+++ b/docs/source/sequences.rst
@@ -9,7 +9,7 @@ Lightning has built in support for dealing with sequential data.
 
 
 Packed sequences as inputs
-----------------------------
+--------------------------
 When using PackedSequence, do 2 things:
 
 1. return either a padded tensor in dataset or a list of variable length tensors in the dataloader collate_fn (example above shows the list implementation).
@@ -29,7 +29,7 @@ When using PackedSequence, do 2 things:
         y = rnn.pack_sequence(batch[1], enforce_sorted=False)
 
 Truncated Backpropagation Through Time
----------------------------------------
+--------------------------------------
 There are times when multiple backwards passes are needed for each batch.
 For example, it may save memory to use Truncated Backpropagation Through Time when training RNNs.
 
@@ -50,7 +50,7 @@ Lightning can handle TBTT automatically via this flag.
     a `hiddens` arg.
 
 Iterable Datasets
----------------------------------------
+-----------------
 Lightning supports using IterableDatasets as well as map-style Datasets. IterableDatasets provide a more natural
 option when using sequential data.
 
diff --git a/docs/source/single_gpu.rst b/docs/source/single_gpu.rst
index c6fa1b9af9..4348197fba 100644
--- a/docs/source/single_gpu.rst
+++ b/docs/source/single_gpu.rst
@@ -3,7 +3,7 @@
     from pytorch_lightning.trainer.trainer import Trainer
 
 Single GPU Training
-====================
+===================
 Make sure you are running on a machine that has at least one GPU. Lightning handles all the NVIDIA flags for you,
 there's no need to set them yourself.
 
diff --git a/docs/source/tpu.rst b/docs/source/tpu.rst
index 774af763c7..ddc633b275 100644
--- a/docs/source/tpu.rst
+++ b/docs/source/tpu.rst
@@ -5,13 +5,13 @@ Lightning supports running on TPUs. At this moment, TPUs are available
 on Google Cloud (GCP), Google Colab and Kaggle Environments. For more information on TPUs
 `watch this video <https://www.youtube.com/watch?v=kPMpmcl_Pyw>`_.
 
----------------
+---
 
 Live demo
 ----------
 Check out this `Google Colab <https://colab.research.google.com/drive/1-_LKx4HwAxl5M6xPJmqAAu444LTDQoa3>`_ to see how to train MNIST on TPUs.
 
----------------
+---
 
 TPU Terminology
 ---------------
@@ -23,17 +23,17 @@ A TPU pod hosts many TPUs on it. Currently, TPU pod v2 has 2048 cores!
 You can request a full pod from Google cloud or a "slice" which gives you
 some subset of those 2048 cores.
 
----------------
+---
 
 How to access TPUs
--------------------
+------------------
 To access TPUs there are two main ways.
 
 1. Using google colab.
 2. Using Google Cloud (GCP).
 3. Using Kaggle.
 
----------------
+---
 
 Colab TPUs
 -----------
@@ -65,7 +65,7 @@ To get a TPU on colab, follow these steps:
 
 6. Then set up your LightningModule as normal.
 
----------------
+---
 
 DistributedSamplers
 -------------------
@@ -122,27 +122,27 @@ To use a full TPU pod skip to the TPU pod section.
 
 That's it! Your model will train on all 8 TPU cores.
 
----------------
+---
 
 Single TPU core training
-----------------------------
+------------------------
 Lightning supports training on a single TPU core. Just pass the TPU core ID [1-8] in a list.
 
 .. code-block:: python
 
     trainer = pl.Trainer(tpu_cores=[1])
 
----------------
+---
 
 Distributed Backend with TPU
 ----------------------------
 The ```distributed_backend``` option used for GPUs does not apply to TPUs.
 TPUs work in DDP mode by default (distributing over each core)
 
----------------
+---
 
 TPU Pod
---------
+-------
 To train on more than 8 cores, your code actually doesn't change!
 All you need to do is submit the following command:
 
@@ -153,7 +153,7 @@ All you need to do is submit the following command:
     --conda-env=torch-xla-nightly
     -- python /usr/share/torch-xla-0.5/pytorch/xla/test/test_train_imagenet.py --fake_data
 
----------------
+---
 
 16 bit precision
 -----------------
@@ -171,7 +171,7 @@ set the 16-bit flag.
 
 Under the hood the xla library will use the `bfloat16 type <https://en.wikipedia.org/wiki/Bfloat16_floating-point_format>`_.
 
----------------
+---
 
 About XLA
 ----------
diff --git a/docs/source/training_tricks.rst b/docs/source/training_tricks.rst
index 9140f52aba..1d15981a09 100644
--- a/docs/source/training_tricks.rst
+++ b/docs/source/training_tricks.rst
@@ -8,7 +8,7 @@ Training Tricks
 Lightning implements various tricks to help during training
 
 Accumulate gradients
--------------------------------------
+--------------------
 Accumulated gradients runs K small batches of size N before doing a backwards pass.
 The effect is a large effective batch size of size KxN.
 
@@ -21,7 +21,7 @@ The effect is a large effective batch size of size KxN.
 
 
 Gradient Clipping
--------------------------------------
+-----------------
 Gradient clipping may be enabled to avoid exploding gradients. Specifically, this will `clip the gradient
 norm <https://pytorch.org/docs/stable/nn.html#torch.nn.utils.clip_grad_norm_>`_ computed over all model parameters together.
 
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index 4578851cf6..74d8cf3d62 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -209,14 +209,14 @@ def accuracy(
 
     Example:
 
-        >>> x = torch.tensor([1, 2, 3])
-        >>> y = torch.tensor([0, 2, 3])
+        >>> x = torch.tensor([0, 1, 2, 3])
+        >>> y = torch.tensor([0, 1, 2, 2])
         >>> accuracy(x, y)
-        tensor(0.6667)
+        tensor(0.7500)
 
     """
-    tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred=pred, target=target,
-                                                            num_classes=num_classes)
+    tps, fps, tns, fns, sups = stat_scores_multiple_classes(
+        pred=pred, target=target, num_classes=num_classes)
 
     if not (target > 0).any() and num_classes is None:
         raise RuntimeError("cannot infer num_classes when target is all zero")
@@ -539,7 +539,7 @@ def roc(
 
         >>> x = torch.tensor([0, 1, 2, 3])
         >>> y = torch.tensor([0, 1, 2, 2])
-        >>> fpr, tpr, thresholds = roc(x,y)
+        >>> fpr, tpr, thresholds = roc(x, y)
         >>> fpr
         tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000])
         >>> tpr