diff --git a/docs/source/conf.py b/docs/source/conf.py
index 7a44acd730..58258af1fc 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -304,12 +304,12 @@ def setup(app):
 
 
 # copy all notebooks to local folder
-path_nbs = os.path.join(PATH_HERE, 'notebooks')
-if not os.path.isdir(path_nbs):
-    os.mkdir(path_nbs)
-for path_ipynb in glob.glob(os.path.join(PATH_ROOT, 'notebooks', '*.ipynb')):
-    path_ipynb2 = os.path.join(path_nbs, os.path.basename(path_ipynb))
-    shutil.copy(path_ipynb, path_ipynb2)
+# path_nbs = os.path.join(PATH_HERE, 'notebooks')
+# if not os.path.isdir(path_nbs):
+#     os.mkdir(path_nbs)
+# for path_ipynb in glob.glob(os.path.join(PATH_ROOT, 'notebooks', '*.ipynb')):
+#     path_ipynb2 = os.path.join(path_nbs, os.path.basename(path_ipynb))
+#     shutil.copy(path_ipynb, path_ipynb2)
 
 
 # Ignoring Third-party packages
diff --git a/notebooks/01-mnist-hello-world.ipynb b/notebooks/01-mnist-hello-world.ipynb
new file mode 100644
index 0000000000..c9e81cc990
--- /dev/null
+++ b/notebooks/01-mnist-hello-world.ipynb
@@ -0,0 +1,401 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "01-mnist-hello-world.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "authorship_tag": "ABX9TyOtAKVa5POQ6Xg3UcTQqXDJ",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/01-mnist-hello-world.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "i7XbLCXGkll9",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Introduction to Pytorch Lightning ⚡\n",
+        "\n",
+        "In this notebook, we'll go over the basics of lightning by preparing models to train on the [MNIST Handwritten Digits dataset](https://en.wikipedia.org/wiki/MNIST_database).\n",
+        "\n",
+        "---\n",
+        "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
+        "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
+        "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-f6bl2l0l-JYMK3tbAgAmGRrlNr00f1A)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2LODD6w9ixlT",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Setup  \n",
+        "Lightning is easy to install. Simply ```pip install pytorch-lightning```"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zK7-Gg69kMnG",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "! pip install pytorch-lightning --quiet"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "w4_TYnt_keJi",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import os\n",
+        "\n",
+        "import torch\n",
+        "from torch import nn\n",
+        "from torch.nn import functional as F\n",
+        "from torch.utils.data import DataLoader, random_split\n",
+        "from torchvision.datasets import MNIST\n",
+        "from torchvision import transforms\n",
+        "import pytorch_lightning as pl\n",
+        "from pytorch_lightning.metrics.functional import accuracy"
+      ],
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EHpyMPKFkVbZ",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Simplest example\n",
+        "\n",
+        "Here's the simplest most minimal example with just a training loop (no validation, no testing).\n",
+        "\n",
+        "**Keep in Mind** - A `LightningModule` *is* a PyTorch `nn.Module` - it just has a few more helpful features."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "V7ELesz1kVQo",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class MNISTModel(pl.LightningModule):\n",
+        "\n",
+        "    def __init__(self):\n",
+        "        super(MNISTModel, self).__init__()\n",
+        "        self.l1 = torch.nn.Linear(28 * 28, 10)\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        return torch.relu(self.l1(x.view(x.size(0), -1)))\n",
+        "\n",
+        "    def training_step(self, batch, batch_nb):\n",
+        "        x, y = batch\n",
+        "        loss = F.cross_entropy(self(x), y)\n",
+        "        return pl.TrainResult(loss)\n",
+        "\n",
+        "    def configure_optimizers(self):\n",
+        "        return torch.optim.Adam(self.parameters(), lr=0.02)"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hIrtHg-Dv8TJ",
+        "colab_type": "text"
+      },
+      "source": [
+        "By using the `Trainer` you automatically get:\n",
+        "1. Tensorboard logging\n",
+        "2. Model checkpointing\n",
+        "3. Training and validation loop\n",
+        "4. early-stopping"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4Dk6Ykv8lI7X",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Init our model\n",
+        "mnist_model = MNISTModel()\n",
+        "\n",
+        "# Init DataLoader from MNIST Dataset\n",
+        "train_ds = MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor())\n",
+        "train_loader = DataLoader(train_ds, batch_size=32)\n",
+        "\n",
+        "# Initialize a trainer\n",
+        "trainer = pl.Trainer(gpus=1, max_epochs=3, progress_bar_refresh_rate=20)\n",
+        "\n",
+        "# Train the model ⚡\n",
+        "trainer.fit(mnist_model, train_loader)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KNpOoBeIjscS",
+        "colab_type": "text"
+      },
+      "source": [
+        "## A more complete MNIST Lightning Module Example\n",
+        "\n",
+        "That wasn't so hard was it?\n",
+        "\n",
+        "Now that we've got our feet wet, let's dive in a bit deeper and write a more complete `LightningModule` for MNIST...\n",
+        "\n",
+        "This time, we'll bake in all the dataset specific pieces directly in the `LightningModule`. This way, we can avoid writing extra code at the beginning of our script every time we want to run it.\n",
+        "\n",
+        "---\n",
+        "\n",
+        "### Note what the following built-in functions are doing:\n",
+        "\n",
+        "1. [prepare_data()](https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.core.lightning.html#pytorch_lightning.core.lightning.LightningModule.prepare_data) 💾\n",
+        "    - This is where we can download the dataset. We point to our desired dataset and ask torchvision's `MNIST` dataset class to download if the dataset isn't found there.\n",
+        "    - **Note we do not make any state assignments in this function** (i.e. `self.something = ...`)\n",
+        "\n",
+        "2. [setup(stage)](https://pytorch-lightning.readthedocs.io/en/latest/lightning-module.html#setup) ⚙️\n",
+        "    - Loads in data from file and prepares PyTorch tensor datasets for each split (train, val, test). \n",
+        "    - Setup expects a 'stage' arg which is used to separate logic for 'fit' and 'test'.\n",
+        "    - If you don't mind loading all your datasets at once, you can set up a condition to allow for both 'fit' related setup and 'test' related setup to run whenever `None` is passed to `stage` (or ignore it altogether and exclude any conditionals).\n",
+        "    - **Note this runs across all GPUs and it *is* safe to make state assignments here**\n",
+        "\n",
+        "3. [x_dataloader()](https://pytorch-lightning.readthedocs.io/en/latest/lightning-module.html#data-hooks) ♻️\n",
+        "    - `train_dataloader()`, `val_dataloader()`, and `test_dataloader()` all return PyTorch `DataLoader` instances that are created by wrapping their respective datasets that we prepared in `setup()`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4DNItffri95Q",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class LitMNIST(pl.LightningModule):\n",
+        "    \n",
+        "    def __init__(self, data_dir='./', hidden_size=64, learning_rate=2e-4):\n",
+        "\n",
+        "        super().__init__()\n",
+        "\n",
+        "        # Set our init args as class attributes\n",
+        "        self.data_dir = data_dir\n",
+        "        self.hidden_size = hidden_size\n",
+        "        self.learning_rate = learning_rate\n",
+        "\n",
+        "        # Hardcode some dataset specific attributes\n",
+        "        self.num_classes = 10\n",
+        "        self.dims = (1, 28, 28)\n",
+        "        channels, width, height = self.dims\n",
+        "        self.transform = transforms.Compose([\n",
+        "            transforms.ToTensor(),\n",
+        "            transforms.Normalize((0.1307,), (0.3081,))\n",
+        "        ])\n",
+        "\n",
+        "        # Define PyTorch model\n",
+        "        self.model = nn.Sequential(\n",
+        "            nn.Flatten(),\n",
+        "            nn.Linear(channels * width * height, hidden_size),\n",
+        "            nn.ReLU(),\n",
+        "            nn.Dropout(0.1),\n",
+        "            nn.Linear(hidden_size, hidden_size),\n",
+        "            nn.ReLU(),\n",
+        "            nn.Dropout(0.1),\n",
+        "            nn.Linear(hidden_size, self.num_classes)\n",
+        "        )\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        x = self.model(x)\n",
+        "        return F.log_softmax(x, dim=1)\n",
+        "\n",
+        "    def training_step(self, batch, batch_idx):\n",
+        "        x, y = batch\n",
+        "        logits = self(x)\n",
+        "        loss = F.nll_loss(logits, y)\n",
+        "        return pl.TrainResult(loss)\n",
+        "\n",
+        "    def validation_step(self, batch, batch_idx):\n",
+        "        x, y = batch\n",
+        "        logits = self(x)\n",
+        "        loss = F.nll_loss(logits, y)\n",
+        "        preds = torch.argmax(logits, dim=1)\n",
+        "        acc = accuracy(preds, y)\n",
+        "        result = pl.EvalResult(checkpoint_on=loss)\n",
+        "\n",
+        "        # Calling result.log will surface up scalars for you in TensorBoard\n",
+        "        result.log('val_loss', loss, prog_bar=True)\n",
+        "        result.log('val_acc', acc, prog_bar=True)\n",
+        "        return result\n",
+        "\n",
+        "    def test_step(self, batch, batch_idx):\n",
+        "        # Here we just reuse the validation_step for testing\n",
+        "        return self.validation_step(batch, batch_idx)\n",
+        "\n",
+        "    def configure_optimizers(self):\n",
+        "        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)\n",
+        "        return optimizer\n",
+        "\n",
+        "    ####################\n",
+        "    # DATA RELATED HOOKS\n",
+        "    ####################\n",
+        "\n",
+        "    def prepare_data(self):\n",
+        "        # download\n",
+        "        MNIST(self.data_dir, train=True, download=True)\n",
+        "        MNIST(self.data_dir, train=False, download=True)\n",
+        "\n",
+        "    def setup(self, stage=None):\n",
+        "\n",
+        "        # Assign train/val datasets for use in dataloaders\n",
+        "        if stage == 'fit' or stage is None:\n",
+        "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
+        "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
+        "\n",
+        "        # Assign test dataset for use in dataloader(s)\n",
+        "        if stage == 'test' or stage is None:\n",
+        "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
+        "\n",
+        "    def train_dataloader(self):\n",
+        "        return DataLoader(self.mnist_train, batch_size=32)\n",
+        "\n",
+        "    def val_dataloader(self):\n",
+        "        return DataLoader(self.mnist_val, batch_size=32)\n",
+        "\n",
+        "    def test_dataloader(self):\n",
+        "        return DataLoader(self.mnist_test, batch_size=32)"
+      ],
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Mb0U5Rk2kLBy",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "model = LitMNIST()\n",
+        "trainer = pl.Trainer(gpus=1, max_epochs=3, progress_bar_refresh_rate=20)\n",
+        "trainer.fit(model)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nht8AvMptY6I",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Testing\n",
+        "\n",
+        "To test a model, call `trainer.test(model)`.\n",
+        "\n",
+        "Or, if you've just trained a model, you can just call `trainer.test()` and Lightning will automatically test using the best saved checkpoint (conditioned on val_loss)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "PA151FkLtprO",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "trainer.test()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T3-3lbbNtr5T",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Bonus Tip\n",
+        "\n",
+        "You can keep calling `trainer.fit(model)` as many times as you'd like to continue training"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "IFBwCbLet2r6",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "trainer.fit(model)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8TRyS5CCt3n9",
+        "colab_type": "text"
+      },
+      "source": [
+        "In Colab, you can use the TensorBoard magic function to view the logs that Lightning has created for you!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "wizS-QiLuAYo",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Start tensorboard.\n",
+        "%load_ext tensorboard\n",
+        "%tensorboard --logdir lightning_logs/"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/notebooks/02-datamodules.ipynb b/notebooks/02-datamodules.ipynb
new file mode 100644
index 0000000000..53468d2c72
--- /dev/null
+++ b/notebooks/02-datamodules.ipynb
@@ -0,0 +1,542 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "02-datamodules.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true,
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/02-datamodules.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2O5r7QvP8-rt",
+        "colab_type": "text"
+      },
+      "source": [
+        "# PyTorch Lightning DataModules ⚡\n",
+        "\n",
+        "With the release of `pytorch-lightning` version 0.9.0, we have included a new class called `LightningDataModule` to help you decouple data related hooks from your `LightningModule`.\n",
+        "\n",
+        "This notebook will walk you through how to start using Datamodules.\n",
+        "\n",
+        "The most up to date documentation on datamodules can be found [here](https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html).\n",
+        "\n",
+        "---\n",
+        "\n",
+        "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
+        "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
+        "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-f6bl2l0l-JYMK3tbAgAmGRrlNr00f1A)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6RYMhmfA9ATN",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Setup\n",
+        "Lightning is easy to install. Simply ```pip install pytorch-lightning```"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lj2zD-wsbvGr",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "! pip install pytorch-lightning --quiet"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8g2mbvy-9xDI",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Introduction\n",
+        "\n",
+        "First, we'll go over a regular `LightningModule` implementation without the use of a `LightningDataModule`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "eg-xDlmDdAwy",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import pytorch_lightning as pl\n",
+        "from pytorch_lightning.metrics.functional import accuracy\n",
+        "import torch\n",
+        "from torch import nn\n",
+        "import torch.nn.functional as F\n",
+        "from torch.utils.data import random_split, DataLoader\n",
+        "\n",
+        "# Note - you must have torchvision installed for this example\n",
+        "from torchvision.datasets import MNIST, CIFAR10\n",
+        "from torchvision import transforms"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DzgY7wi88UuG",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Defining the LitMNISTModel\n",
+        "\n",
+        "Below, we reuse a `LightningModule` from our hello world tutorial that classifies MNIST Handwritten Digits.\n",
+        "\n",
+        "Unfortunately, we have hardcoded dataset-specific items within the model, forever limiting it to working with MNIST Data. 😢\n",
+        "\n",
+        "This is fine if you don't plan on training/evaluating your model on different datasets. However, in many cases, this can become bothersome when you want to try out your architecture with different datasets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "IQkW8_FF5nU2",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class LitMNIST(pl.LightningModule):\n",
+        "    \n",
+        "    def __init__(self, data_dir='./', hidden_size=64, learning_rate=2e-4):\n",
+        "\n",
+        "        super().__init__()\n",
+        "\n",
+        "        # We hardcode dataset specific stuff here.\n",
+        "        self.data_dir = data_dir\n",
+        "        self.num_classes = 10\n",
+        "        self.dims = (1, 28, 28)\n",
+        "        channels, width, height = self.dims\n",
+        "        self.transform = transforms.Compose([\n",
+        "            transforms.ToTensor(),\n",
+        "            transforms.Normalize((0.1307,), (0.3081,))\n",
+        "        ])\n",
+        "\n",
+        "        self.hidden_size = hidden_size\n",
+        "        self.learning_rate = learning_rate\n",
+        "\n",
+        "        # Build model\n",
+        "        self.model = nn.Sequential(\n",
+        "            nn.Flatten(),\n",
+        "            nn.Linear(channels * width * height, hidden_size),\n",
+        "            nn.ReLU(),\n",
+        "            nn.Dropout(0.1),\n",
+        "            nn.Linear(hidden_size, hidden_size),\n",
+        "            nn.ReLU(),\n",
+        "            nn.Dropout(0.1),\n",
+        "            nn.Linear(hidden_size, self.num_classes)\n",
+        "        )\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        x = self.model(x)\n",
+        "        return F.log_softmax(x, dim=1)\n",
+        "\n",
+        "    def training_step(self, batch, batch_idx):\n",
+        "        x, y = batch\n",
+        "        logits = self(x)\n",
+        "        loss = F.nll_loss(logits, y)\n",
+        "        return pl.TrainResult(loss)\n",
+        "\n",
+        "    def validation_step(self, batch, batch_idx):\n",
+        "        x, y = batch\n",
+        "        logits = self(x)\n",
+        "        loss = F.nll_loss(logits, y)\n",
+        "        preds = torch.argmax(logits, dim=1)\n",
+        "        acc = accuracy(preds, y)\n",
+        "        result = pl.EvalResult(checkpoint_on=loss)\n",
+        "        result.log('val_loss', loss, prog_bar=True)\n",
+        "        result.log('val_acc', acc, prog_bar=True)\n",
+        "        return result\n",
+        "\n",
+        "    def configure_optimizers(self):\n",
+        "        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)\n",
+        "        return optimizer\n",
+        "\n",
+        "    ####################\n",
+        "    # DATA RELATED HOOKS\n",
+        "    ####################\n",
+        "\n",
+        "    def prepare_data(self):\n",
+        "        # download\n",
+        "        MNIST(self.data_dir, train=True, download=True)\n",
+        "        MNIST(self.data_dir, train=False, download=True)\n",
+        "\n",
+        "    def setup(self, stage=None):\n",
+        "\n",
+        "        # Assign train/val datasets for use in dataloaders\n",
+        "        if stage == 'fit' or stage is None:\n",
+        "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
+        "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
+        "\n",
+        "        # Assign test dataset for use in dataloader(s)\n",
+        "        if stage == 'test' or stage is None:\n",
+        "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
+        "\n",
+        "    def train_dataloader(self):\n",
+        "        return DataLoader(self.mnist_train, batch_size=32)\n",
+        "\n",
+        "    def val_dataloader(self):\n",
+        "        return DataLoader(self.mnist_val, batch_size=32)\n",
+        "\n",
+        "    def test_dataloader(self):\n",
+        "        return DataLoader(self.mnist_test, batch_size=32)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "K7sg9KQd-QIO",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Training the ListMNIST Model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "QxDNDaus6byD",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "model = LitMNIST()\n",
+        "trainer = pl.Trainer(max_epochs=2, gpus=1, progress_bar_refresh_rate=20)\n",
+        "trainer.fit(model)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dY8d6GxmB0YU",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Using DataModules\n",
+        "\n",
+        "DataModules are a way of decoupling data-related hooks from the `LightningModule` so you can develop dataset agnostic models."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eJeT5bW081wn",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Defining The MNISTDataModule\n",
+        "\n",
+        "Let's go over each function in the class below and talk about what they're doing:\n",
+        "\n",
+        "1. ```__init__```\n",
+        "    - Takes in a `data_dir` arg that points to where you have downloaded/wish to download the MNIST dataset.\n",
+        "    - Defines a transform that will be applied across train, val, and test dataset splits.\n",
+        "    - Defines default `self.dims`, which is a tuple returned from `datamodule.size()` that can help you initialize models.\n",
+        "\n",
+        "\n",
+        "2. ```prepare_data```\n",
+        "    - This is where we can download the dataset. We point to our desired dataset and ask torchvision's `MNIST` dataset class to download if the dataset isn't found there.\n",
+        "    - **Note we do not make any state assignments in this function** (i.e. `self.something = ...`)\n",
+        "\n",
+        "3. ```setup```\n",
+        "    - Loads in data from file and prepares PyTorch tensor datasets for each split (train, val, test). \n",
+        "    - Setup expects a 'stage' arg which is used to separate logic for 'fit' and 'test'.\n",
+        "    - If you don't mind loading all your datasets at once, you can set up a condition to allow for both 'fit' related setup and 'test' related setup to run whenever `None` is passed to `stage`.\n",
+        "    - **Note this runs across all GPUs and it *is* safe to make state assignments here**\n",
+        "\n",
+        "\n",
+        "4. ```x_dataloader```\n",
+        "    - `train_dataloader()`, `val_dataloader()`, and `test_dataloader()` all return PyTorch `DataLoader` instances that are created by wrapping their respective datasets that we prepared in `setup()`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DfGKyGwG_X9v",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class MNISTDataModule(pl.LightningDataModule):\n",
+        "\n",
+        "    def __init__(self, data_dir: str = './'):\n",
+        "        super().__init__()\n",
+        "        self.data_dir = data_dir\n",
+        "        self.transform = transforms.Compose([\n",
+        "            transforms.ToTensor(),\n",
+        "            transforms.Normalize((0.1307,), (0.3081,))\n",
+        "        ])\n",
+        "\n",
+        "        # self.dims is returned when you call dm.size()\n",
+        "        # Setting default dims here because we know them.\n",
+        "        # Could optionally be assigned dynamically in dm.setup()\n",
+        "        self.dims = (1, 28, 28)\n",
+        "        self.num_classes = 10\n",
+        "\n",
+        "    def prepare_data(self):\n",
+        "        # download\n",
+        "        MNIST(self.data_dir, train=True, download=True)\n",
+        "        MNIST(self.data_dir, train=False, download=True)\n",
+        "\n",
+        "    def setup(self, stage=None):\n",
+        "\n",
+        "        # Assign train/val datasets for use in dataloaders\n",
+        "        if stage == 'fit' or stage is None:\n",
+        "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
+        "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
+        "\n",
+        "        # Assign test dataset for use in dataloader(s)\n",
+        "        if stage == 'test' or stage is None:\n",
+        "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
+        "\n",
+        "    def train_dataloader(self):\n",
+        "        return DataLoader(self.mnist_train, batch_size=32)\n",
+        "\n",
+        "    def val_dataloader(self):\n",
+        "        return DataLoader(self.mnist_val, batch_size=32)\n",
+        "\n",
+        "    def test_dataloader(self):\n",
+        "        return DataLoader(self.mnist_test, batch_size=32)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "H2Yoj-9M9dS7",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Defining the dataset agnostic `LitModel`\n",
+        "\n",
+        "Below, we define the same model as the `LitMNIST` model we made earlier. \n",
+        "\n",
+        "However, this time our model has the freedom to use any input data that we'd like 🔥."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "PM2IISuOBDIu",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class LitModel(pl.LightningModule):\n",
+        "    \n",
+        "    def __init__(self, channels, width, height, num_classes, hidden_size=64, learning_rate=2e-4):\n",
+        "\n",
+        "        super().__init__()\n",
+        "\n",
+        "        # We take in input dimensions as parameters and use those to dynamically build model.\n",
+        "        self.channels = channels\n",
+        "        self.width = width\n",
+        "        self.height = height\n",
+        "        self.num_classes = num_classes\n",
+        "        self.hidden_size = hidden_size\n",
+        "        self.learning_rate = learning_rate\n",
+        "\n",
+        "        self.model = nn.Sequential(\n",
+        "            nn.Flatten(),\n",
+        "            nn.Linear(channels * width * height, hidden_size),\n",
+        "            nn.ReLU(),\n",
+        "            nn.Dropout(0.1),\n",
+        "            nn.Linear(hidden_size, hidden_size),\n",
+        "            nn.ReLU(),\n",
+        "            nn.Dropout(0.1),\n",
+        "            nn.Linear(hidden_size, num_classes)\n",
+        "        )\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        x = self.model(x)\n",
+        "        return F.log_softmax(x, dim=1)\n",
+        "\n",
+        "    def training_step(self, batch, batch_idx):\n",
+        "        x, y = batch\n",
+        "        logits = self(x)\n",
+        "        loss = F.nll_loss(logits, y)\n",
+        "        return pl.TrainResult(loss)\n",
+        "\n",
+        "    def validation_step(self, batch, batch_idx):\n",
+        "\n",
+        "        x, y = batch\n",
+        "        logits = self(x)\n",
+        "        loss = F.nll_loss(logits, y)\n",
+        "        preds = torch.argmax(logits, dim=1)\n",
+        "        acc = accuracy(preds, y)\n",
+        "        result = pl.EvalResult(checkpoint_on=loss)\n",
+        "        result.log('val_loss', loss, prog_bar=True)\n",
+        "        result.log('val_acc', acc, prog_bar=True)\n",
+        "        return result\n",
+        "\n",
+        "    def configure_optimizers(self):\n",
+        "        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)\n",
+        "        return optimizer"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G4Z5olPe-xEo",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Training the `LitModel` using the `MNISTDataModule`\n",
+        "\n",
+        "Now, we initialize and train the `LitModel` using the `MNISTDataModule`'s configuration settings and dataloaders."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "kV48vP_9mEli",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Init DataModule\n",
+        "dm = MNISTDataModule()\n",
+        "# Init model from datamodule's attributes\n",
+        "model = LitModel(*dm.size(), dm.num_classes)\n",
+        "# Init trainer\n",
+        "trainer = pl.Trainer(max_epochs=3, progress_bar_refresh_rate=20, gpus=1)\n",
+        "# Pass the datamodule as arg to trainer.fit to override model hooks :)\n",
+        "trainer.fit(model, dm)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WNxrugIGRRv5",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Defining the CIFAR10 DataModule\n",
+        "\n",
+        "Lets prove the `LitModel` we made earlier is dataset agnostic by defining a new datamodule for the CIFAR10 dataset."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1tkaYLU7RT5P",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class CIFAR10DataModule(pl.LightningDataModule):\n",
+        "\n",
+        "    def __init__(self, data_dir: str = './'):\n",
+        "        super().__init__()\n",
+        "        self.data_dir = data_dir\n",
+        "        self.transform = transforms.Compose([\n",
+        "            transforms.ToTensor(),\n",
+        "            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
+        "        ])\n",
+        "\n",
+        "        self.dims = (3, 32, 32)\n",
+        "        self.num_classes = 10\n",
+        "\n",
+        "    def prepare_data(self):\n",
+        "        # download\n",
+        "        CIFAR10(self.data_dir, train=True, download=True)\n",
+        "        CIFAR10(self.data_dir, train=False, download=True)\n",
+        "\n",
+        "    def setup(self, stage=None):\n",
+        "\n",
+        "        # Assign train/val datasets for use in dataloaders\n",
+        "        if stage == 'fit' or stage is None:\n",
+        "            cifar_full = CIFAR10(self.data_dir, train=True, transform=self.transform)\n",
+        "            self.cifar_train, self.cifar_val = random_split(cifar_full, [45000, 5000])\n",
+        "\n",
+        "        # Assign test dataset for use in dataloader(s)\n",
+        "        if stage == 'test' or stage is None:\n",
+        "            self.cifar_test = CIFAR10(self.data_dir, train=False, transform=self.transform)\n",
+        "\n",
+        "    def train_dataloader(self):\n",
+        "        return DataLoader(self.cifar_train, batch_size=32)\n",
+        "\n",
+        "    def val_dataloader(self):\n",
+        "        return DataLoader(self.cifar_val, batch_size=32)\n",
+        "\n",
+        "    def test_dataloader(self):\n",
+        "        return DataLoader(self.cifar_test, batch_size=32)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BrXxf3oX_gsZ",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Training the `LitModel` using the `CIFAR10DataModule`\n",
+        "\n",
+        "Our model isn't very good, so it will perform pretty badly on the CIFAR10 dataset.\n",
+        "\n",
+        "The point here is that we can see that our `LitModel` has no problem using a different datamodule as its input data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "sd-SbWi_krdj",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "dm = CIFAR10DataModule()\n",
+        "model = LitModel(*dm.size(), dm.num_classes, hidden_size=256)\n",
+        "trainer = pl.Trainer(max_epochs=5, progress_bar_refresh_rate=20, gpus=1)\n",
+        "trainer.fit(model, dm)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/notebooks/03-basic-gan.ipynb b/notebooks/03-basic-gan.ipynb
new file mode 100644
index 0000000000..d88a524285
--- /dev/null
+++ b/notebooks/03-basic-gan.ipynb
@@ -0,0 +1,424 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "03-basic-gan.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/03-basic-gan.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "J37PBnE_x7IW",
+        "colab_type": "text"
+      },
+      "source": [
+        "# PyTorch Lightning Basic GAN Tutorial ⚡\n",
+        "\n",
+        "How to train a GAN!\n",
+        "\n",
+        "Main takeaways:\n",
+        "1. Generator and discriminator are arbitraty PyTorch modules.\n",
+        "2. training_step does both the generator and discriminator training.\n",
+        "\n",
+        "---\n",
+        "\n",
+        "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
+        "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
+        "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-f6bl2l0l-JYMK3tbAgAmGRrlNr00f1A)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kg2MKpRmybht",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Setup\n",
+        "Lightning is easy to install. Simply `pip install pytorch-lightning`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LfrJLKPFyhsK",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "! pip install pytorch-lightning --quiet"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "BjEPuiVLyanw",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import os\n",
+        "from argparse import ArgumentParser\n",
+        "from collections import OrderedDict\n",
+        "\n",
+        "import numpy as np\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import torch.nn.functional as F\n",
+        "import torchvision\n",
+        "import torchvision.transforms as transforms\n",
+        "from torch.utils.data import DataLoader, random_split\n",
+        "from torchvision.datasets import MNIST\n",
+        "\n",
+        "import pytorch_lightning as pl"
+      ],
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OuXJzr4G2uHV",
+        "colab_type": "text"
+      },
+      "source": [
+        "### MNIST DataModule\n",
+        "\n",
+        "Below, we define a DataModule for the MNIST Dataset. To learn more about DataModules, check out our tutorial on them or see the [latest docs](https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DOY_nHu328g7",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class MNISTDataModule(pl.LightningDataModule):\n",
+        "\n",
+        "    def __init__(self, data_dir: str = './', batch_size: int = 64, num_workers: int = 8):\n",
+        "        super().__init__()\n",
+        "        self.data_dir = data_dir\n",
+        "        self.batch_size = batch_size\n",
+        "        self.num_workers = num_workers\n",
+        "\n",
+        "        self.transform = transforms.Compose([\n",
+        "            transforms.ToTensor(),\n",
+        "            transforms.Normalize((0.1307,), (0.3081,))\n",
+        "        ])\n",
+        "\n",
+        "        # self.dims is returned when you call dm.size()\n",
+        "        # Setting default dims here because we know them.\n",
+        "        # Could optionally be assigned dynamically in dm.setup()\n",
+        "        self.dims = (1, 28, 28)\n",
+        "        self.num_classes = 10\n",
+        "\n",
+        "    def prepare_data(self):\n",
+        "        # download\n",
+        "        MNIST(self.data_dir, train=True, download=True)\n",
+        "        MNIST(self.data_dir, train=False, download=True)\n",
+        "\n",
+        "    def setup(self, stage=None):\n",
+        "\n",
+        "        # Assign train/val datasets for use in dataloaders\n",
+        "        if stage == 'fit' or stage is None:\n",
+        "            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)\n",
+        "            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])\n",
+        "\n",
+        "        # Assign test dataset for use in dataloader(s)\n",
+        "        if stage == 'test' or stage is None:\n",
+        "            self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)\n",
+        "\n",
+        "    def train_dataloader(self):\n",
+        "        return DataLoader(self.mnist_train, batch_size=self.batch_size, num_workers=self.num_workers)\n",
+        "\n",
+        "    def val_dataloader(self):\n",
+        "        return DataLoader(self.mnist_val, batch_size=self.batch_size, num_workers=self.num_workers)\n",
+        "\n",
+        "    def test_dataloader(self):\n",
+        "        return DataLoader(self.mnist_test, batch_size=self.batch_size, num_workers=self.num_workers)"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tW3c0QrQyF9P",
+        "colab_type": "text"
+      },
+      "source": [
+        "### A. Generator"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0E2QDjl5yWtz",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class Generator(nn.Module):\n",
+        "    def __init__(self, latent_dim, img_shape):\n",
+        "        super().__init__()\n",
+        "        self.img_shape = img_shape\n",
+        "\n",
+        "        def block(in_feat, out_feat, normalize=True):\n",
+        "            layers = [nn.Linear(in_feat, out_feat)]\n",
+        "            if normalize:\n",
+        "                layers.append(nn.BatchNorm1d(out_feat, 0.8))\n",
+        "            layers.append(nn.LeakyReLU(0.2, inplace=True))\n",
+        "            return layers\n",
+        "\n",
+        "        self.model = nn.Sequential(\n",
+        "            *block(latent_dim, 128, normalize=False),\n",
+        "            *block(128, 256),\n",
+        "            *block(256, 512),\n",
+        "            *block(512, 1024),\n",
+        "            nn.Linear(1024, int(np.prod(img_shape))),\n",
+        "            nn.Tanh()\n",
+        "        )\n",
+        "\n",
+        "    def forward(self, z):\n",
+        "        img = self.model(z)\n",
+        "        img = img.view(img.size(0), *self.img_shape)\n",
+        "        return img"
+      ],
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uyrltsGvyaI3",
+        "colab_type": "text"
+      },
+      "source": [
+        "### B. Discriminator"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Ed3MR3vnyxyW",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class Discriminator(nn.Module):\n",
+        "    def __init__(self, img_shape):\n",
+        "        super().__init__()\n",
+        "\n",
+        "        self.model = nn.Sequential(\n",
+        "            nn.Linear(int(np.prod(img_shape)), 512),\n",
+        "            nn.LeakyReLU(0.2, inplace=True),\n",
+        "            nn.Linear(512, 256),\n",
+        "            nn.LeakyReLU(0.2, inplace=True),\n",
+        "            nn.Linear(256, 1),\n",
+        "            nn.Sigmoid(),\n",
+        "        )\n",
+        "\n",
+        "    def forward(self, img):\n",
+        "        img_flat = img.view(img.size(0), -1)\n",
+        "        validity = self.model(img_flat)\n",
+        "\n",
+        "        return validity"
+      ],
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BwUMom3ryySK",
+        "colab_type": "text"
+      },
+      "source": [
+        "### C. GAN\n",
+        "\n",
+        "#### A couple of cool features to check out in this example...\n",
+        "\n",
+        "  - We use `some_tensor.type_as(another_tensor)` to make sure we initialize new tensors on the right device (i.e. GPU, CPU).\n",
+        "    - Lightning will put your dataloader data on the right device automatically\n",
+        "    - In this example, we pull from latent dim on the fly, so we need to dynamically add tensors to the right device.\n",
+        "    - `type_as` is the way we recommend to do this.\n",
+        "  - This example shows how to use multiple dataloaders in your `LightningModule`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3vKszYf6y1Vv",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        " class GAN(pl.LightningModule):\n",
+        "\n",
+        "    def __init__(\n",
+        "        self,\n",
+        "        channels,\n",
+        "        width,\n",
+        "        height,\n",
+        "        latent_dim: int = 100,\n",
+        "        lr: float = 0.0002,\n",
+        "        b1: float = 0.5,\n",
+        "        b2: float = 0.999,\n",
+        "        batch_size: int = 64,\n",
+        "        **kwargs\n",
+        "    ):\n",
+        "        super().__init__()\n",
+        "        self.save_hyperparameters()\n",
+        "\n",
+        "        # networks\n",
+        "        data_shape = (channels, width, height)\n",
+        "        self.generator = Generator(latent_dim=self.hparams.latent_dim, img_shape=data_shape)\n",
+        "        self.discriminator = Discriminator(img_shape=data_shape)\n",
+        "\n",
+        "        self.validation_z = torch.randn(8, self.hparams.latent_dim)\n",
+        "\n",
+        "        self.example_input_array = torch.zeros(2, self.hparams.latent_dim)\n",
+        "\n",
+        "    def forward(self, z):\n",
+        "        return self.generator(z)\n",
+        "\n",
+        "    def adversarial_loss(self, y_hat, y):\n",
+        "        return F.binary_cross_entropy(y_hat, y)\n",
+        "\n",
+        "    def training_step(self, batch, batch_idx, optimizer_idx):\n",
+        "        imgs, _ = batch\n",
+        "\n",
+        "        # sample noise\n",
+        "        z = torch.randn(imgs.shape[0], self.hparams.latent_dim)\n",
+        "        z = z.type_as(imgs)\n",
+        "\n",
+        "        # train generator\n",
+        "        if optimizer_idx == 0:\n",
+        "\n",
+        "            # generate images\n",
+        "            self.generated_imgs = self(z)\n",
+        "\n",
+        "            # log sampled images\n",
+        "            sample_imgs = self.generated_imgs[:6]\n",
+        "            grid = torchvision.utils.make_grid(sample_imgs)\n",
+        "            self.logger.experiment.add_image('generated_images', grid, 0)\n",
+        "\n",
+        "            # ground truth result (ie: all fake)\n",
+        "            # put on GPU because we created this tensor inside training_loop\n",
+        "            valid = torch.ones(imgs.size(0), 1)\n",
+        "            valid = valid.type_as(imgs)\n",
+        "\n",
+        "            # adversarial loss is binary cross-entropy\n",
+        "            g_loss = self.adversarial_loss(self.discriminator(self(z)), valid)\n",
+        "            tqdm_dict = {'g_loss': g_loss}\n",
+        "            output = OrderedDict({\n",
+        "                'loss': g_loss,\n",
+        "                'progress_bar': tqdm_dict,\n",
+        "                'log': tqdm_dict\n",
+        "            })\n",
+        "            return output\n",
+        "\n",
+        "        # train discriminator\n",
+        "        if optimizer_idx == 1:\n",
+        "            # Measure discriminator's ability to classify real from generated samples\n",
+        "\n",
+        "            # how well can it label as real?\n",
+        "            valid = torch.ones(imgs.size(0), 1)\n",
+        "            valid = valid.type_as(imgs)\n",
+        "\n",
+        "            real_loss = self.adversarial_loss(self.discriminator(imgs), valid)\n",
+        "\n",
+        "            # how well can it label as fake?\n",
+        "            fake = torch.zeros(imgs.size(0), 1)\n",
+        "            fake = fake.type_as(imgs)\n",
+        "\n",
+        "            fake_loss = self.adversarial_loss(\n",
+        "                self.discriminator(self(z).detach()), fake)\n",
+        "\n",
+        "            # discriminator loss is the average of these\n",
+        "            d_loss = (real_loss + fake_loss) / 2\n",
+        "            tqdm_dict = {'d_loss': d_loss}\n",
+        "            output = OrderedDict({\n",
+        "                'loss': d_loss,\n",
+        "                'progress_bar': tqdm_dict,\n",
+        "                'log': tqdm_dict\n",
+        "            })\n",
+        "            return output\n",
+        "\n",
+        "    def configure_optimizers(self):\n",
+        "        lr = self.hparams.lr\n",
+        "        b1 = self.hparams.b1\n",
+        "        b2 = self.hparams.b2\n",
+        "\n",
+        "        opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))\n",
+        "        opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2))\n",
+        "        return [opt_g, opt_d], []\n",
+        "\n",
+        "    def on_epoch_end(self):\n",
+        "        z = self.validation_z.type_as(self.generator.model[0].weight)\n",
+        "\n",
+        "        # log sampled images\n",
+        "        sample_imgs = self(z)\n",
+        "        grid = torchvision.utils.make_grid(sample_imgs)\n",
+        "        self.logger.experiment.add_image('generated_images', grid, self.current_epoch)"
+      ],
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Ey5FmJPnzm_E",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "dm = MNISTDataModule()\n",
+        "model = GAN(*dm.size())\n",
+        "trainer = pl.Trainer(gpus=1, max_epochs=5, progress_bar_refresh_rate=20)\n",
+        "trainer.fit(model, dm)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "MlECc7cHzolp",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Start tensorboard.\n",
+        "%load_ext tensorboard\n",
+        "%tensorboard --logdir lightning_logs/"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/notebooks/04-transformers-text-classification.ipynb b/notebooks/04-transformers-text-classification.ipynb
new file mode 100644
index 0000000000..d2649c1a8d
--- /dev/null
+++ b/notebooks/04-transformers-text-classification.ipynb
@@ -0,0 +1,556 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "04-transformers-text-classification.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true,
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04-transformers-text-classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8ag5ANQPJ_j9",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Finetune 🤗 Transformers Models with PyTorch Lightning ⚡\n",
+        "\n",
+        "This notebook will use HuggingFace's `datasets` library to get data, which will be wrapped in a `LightningDataModule`. Then, we write a class to perform text classification on any dataset from the[ GLUE Benchmark](https://gluebenchmark.com/). (We just show CoLA and MRPC due to constraint on compute/disk)\n",
+        "\n",
+        "[HuggingFace's NLP Viewer](https://huggingface.co/nlp/viewer/?dataset=glue&config=cola) can help you get a feel for the two datasets we will use and what tasks they are solving for.\n",
+        "\n",
+        "---\n",
+        "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
+        "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
+        "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-f6bl2l0l-JYMK3tbAgAmGRrlNr00f1A)\n",
+        "\n",
+        "  - [HuggingFace nlp](https://github.com/huggingface/nlp)\n",
+        "  - [HuggingFace transformers](https://github.com/huggingface/transformers)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fqlsVTj7McZ3",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OIhHrRL-MnKK",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "!pip install pytorch-lightning datasets transformers"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6yuQT_ZQMpCg",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "from argparse import ArgumentParser\n",
+        "from datetime import datetime\n",
+        "from typing import Optional\n",
+        "\n",
+        "import nlp\n",
+        "import numpy as np\n",
+        "import pytorch_lightning as pl\n",
+        "import torch\n",
+        "from torch.utils.data import DataLoader\n",
+        "from transformers import (\n",
+        "    AdamW,\n",
+        "    AutoModelForSequenceClassification,\n",
+        "    AutoConfig,\n",
+        "    AutoTokenizer,\n",
+        "    get_linear_schedule_with_warmup,\n",
+        "    glue_compute_metrics\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9ORJfiuiNZ_N",
+        "colab_type": "text"
+      },
+      "source": [
+        "## GLUE DataModule"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jW9xQhZxMz1G",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class GLUEDataModule(pl.LightningDataModule):\n",
+        "\n",
+        "    task_text_field_map = {\n",
+        "        'cola': ['sentence'],\n",
+        "        'sst2': ['sentence'],\n",
+        "        'mrpc': ['sentence1', 'sentence2'],\n",
+        "        'qqp': ['question1', 'question2'],\n",
+        "        'stsb': ['sentence1', 'sentence2'],\n",
+        "        'mnli': ['premise', 'hypothesis'],\n",
+        "        'qnli': ['question', 'sentence'],\n",
+        "        'rte': ['sentence1', 'sentence2'],\n",
+        "        'wnli': ['sentence1', 'sentence2'],\n",
+        "        'ax': ['premise', 'hypothesis']\n",
+        "    }\n",
+        "\n",
+        "    glue_task_num_labels = {\n",
+        "        'cola': 2,\n",
+        "        'sst2': 2,\n",
+        "        'mrpc': 2,\n",
+        "        'qqp': 2,\n",
+        "        'stsb': 1,\n",
+        "        'mnli': 3,\n",
+        "        'qnli': 2,\n",
+        "        'rte': 2,\n",
+        "        'wnli': 2,\n",
+        "        'ax': 3\n",
+        "    }\n",
+        "\n",
+        "    loader_columns = [\n",
+        "        'nlp_idx',\n",
+        "        'input_ids',\n",
+        "        'token_type_ids',\n",
+        "        'attention_mask',\n",
+        "        'start_positions',\n",
+        "        'end_positions',\n",
+        "        'labels'\n",
+        "    ]\n",
+        "\n",
+        "    def __init__(\n",
+        "        self,\n",
+        "        model_name_or_path: str,\n",
+        "        task_name: str ='mrpc',\n",
+        "        max_seq_length: int = 128,\n",
+        "        train_batch_size: int = 32,\n",
+        "        eval_batch_size: int = 32,\n",
+        "        **kwargs\n",
+        "    ):\n",
+        "        super().__init__()\n",
+        "        self.model_name_or_path = model_name_or_path\n",
+        "        self.task_name = task_name\n",
+        "        self.max_seq_length = max_seq_length\n",
+        "        self.train_batch_size = train_batch_size\n",
+        "        self.eval_batch_size = eval_batch_size\n",
+        "\n",
+        "        self.text_fields = self.task_text_field_map[task_name]\n",
+        "        self.num_labels = self.glue_task_num_labels[task_name]\n",
+        "        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n",
+        "\n",
+        "    def setup(self, stage):\n",
+        "        self.dataset = nlp.load_dataset('glue', self.task_name)\n",
+        "\n",
+        "        for split in self.dataset.keys():\n",
+        "            self.dataset[split] = self.dataset[split].map(\n",
+        "                self.convert_to_features,\n",
+        "                batched=True,\n",
+        "                remove_columns=['label'],\n",
+        "            )\n",
+        "            self.columns = [c for c in self.dataset[split].column_names if c in self.loader_columns]\n",
+        "            self.dataset[split].set_format(type=\"torch\", columns=self.columns)\n",
+        "\n",
+        "        self.eval_splits = [x for x in self.dataset.keys() if 'validation' in x]\n",
+        "\n",
+        "    def prepare_data(self):\n",
+        "        nlp.load_dataset('glue', self.task_name)\n",
+        "        AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n",
+        "    \n",
+        "    def train_dataloader(self):\n",
+        "        return DataLoader(self.dataset['train'], batch_size=self.train_batch_size)\n",
+        "    \n",
+        "    def val_dataloader(self):\n",
+        "        if len(self.eval_splits) == 1:\n",
+        "            return DataLoader(self.dataset['validation'], batch_size=self.eval_batch_size)\n",
+        "        elif len(self.eval_splits) > 1:\n",
+        "            return [DataLoader(self.dataset[x], batch_size=self.eval_batch_size) for x in self.eval_splits]\n",
+        "\n",
+        "    def test_dataloader(self):\n",
+        "        if len(self.eval_splits) == 1:\n",
+        "            return DataLoader(self.dataset['test'], batch_size=self.eval_batch_size)\n",
+        "        elif len(self.eval_splits) > 1:\n",
+        "            return [DataLoader(self.dataset[x], batch_size=self.eval_batch_size) for x in self.eval_splits]\n",
+        "\n",
+        "    def convert_to_features(self, example_batch, indices=None):\n",
+        "\n",
+        "        # Either encode single sentence or sentence pairs\n",
+        "        if len(self.text_fields) > 1:\n",
+        "            texts_or_text_pairs = list(zip(example_batch[self.text_fields[0]], example_batch[self.text_fields[1]]))\n",
+        "        else:\n",
+        "            texts_or_text_pairs = example_batch[self.text_fields[0]]\n",
+        "\n",
+        "        # Tokenize the text/text pairs\n",
+        "        features = self.tokenizer.batch_encode_plus(\n",
+        "            texts_or_text_pairs,\n",
+        "            max_length=self.max_seq_length,\n",
+        "            pad_to_max_length=True,\n",
+        "            truncation=True\n",
+        "        )\n",
+        "\n",
+        "        # Rename label to labels to make it easier to pass to model forward\n",
+        "        features['labels'] = example_batch['label']\n",
+        "\n",
+        "        return features"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jQC3a6KuOpX3",
+        "colab_type": "text"
+      },
+      "source": [
+        "#### You could use this datamodule with standalone PyTorch if you wanted..."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "JCMH3IAsNffF",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "dm = GLUEDataModule('distilbert-base-uncased')\n",
+        "dm.prepare_data()\n",
+        "dm.setup('fit')\n",
+        "next(iter(dm.train_dataloader()))"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l9fQ_67BO2Lj",
+        "colab_type": "text"
+      },
+      "source": [
+        "## GLUE Model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gtn5YGKYO65B",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "class GLUETransformer(pl.LightningModule):\n",
+        "    def __init__(\n",
+        "        self,\n",
+        "        model_name_or_path: str,\n",
+        "        num_labels: int,\n",
+        "        learning_rate: float = 2e-5,\n",
+        "        adam_epsilon: float = 1e-8,\n",
+        "        warmup_steps: int = 0,\n",
+        "        weight_decay: float = 0.0,\n",
+        "        train_batch_size: int = 32,\n",
+        "        eval_batch_size: int = 32,\n",
+        "        eval_splits: Optional[list] = None,\n",
+        "        **kwargs\n",
+        "    ):\n",
+        "        super().__init__()\n",
+        "\n",
+        "        self.save_hyperparameters()\n",
+        "\n",
+        "        self.config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_labels)\n",
+        "        self.model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, config=self.config)\n",
+        "        self.metric = nlp.load_metric(\n",
+        "            'glue',\n",
+        "            self.hparams.task_name,\n",
+        "            experiment_id=datetime.now().strftime(\"%d-%m-%Y_%H-%M-%S\")\n",
+        "        )\n",
+        "\n",
+        "    def forward(self, **inputs):\n",
+        "        return self.model(**inputs)\n",
+        "\n",
+        "    def training_step(self, batch, batch_idx):\n",
+        "        outputs = self(**batch)\n",
+        "        loss = outputs[0]\n",
+        "        return pl.TrainResult(loss)\n",
+        "\n",
+        "    def validation_step(self, batch, batch_idx, dataloader_idx=0):\n",
+        "        outputs = self(**batch)\n",
+        "        val_loss, logits = outputs[:2]\n",
+        "\n",
+        "        if self.hparams.num_labels >= 1:\n",
+        "            preds = torch.argmax(logits, axis=1)\n",
+        "        elif self.hparams.num_labels == 1:\n",
+        "            preds = logits.squeeze()\n",
+        "\n",
+        "        labels = batch[\"labels\"]\n",
+        "\n",
+        "        return {'loss': val_loss, \"preds\": preds, \"labels\": labels}\n",
+        "\n",
+        "    def validation_epoch_end(self, outputs):\n",
+        "        if self.hparams.task_name == 'mnli':\n",
+        "            for i, output in enumerate(outputs):\n",
+        "                # matched or mismatched\n",
+        "                split = self.hparams.eval_splits[i].split('_')[-1]\n",
+        "                preds = torch.cat([x['preds'] for x in output]).detach().cpu().numpy()\n",
+        "                labels = torch.cat([x['labels'] for x in output]).detach().cpu().numpy()\n",
+        "                loss = torch.stack([x['loss'] for x in output]).mean()\n",
+        "                if i == 0:\n",
+        "                    result = pl.EvalResult(checkpoint_on=loss)\n",
+        "                result.log(f'val_loss_{split}', loss, prog_bar=True)\n",
+        "                split_metrics = {f\"{k}_{split}\": v for k, v in self.metric.compute(preds, labels).items()}\n",
+        "                result.log_dict(split_metrics, prog_bar=True)\n",
+        "            return result\n",
+        "\n",
+        "        preds = torch.cat([x['preds'] for x in outputs]).detach().cpu().numpy()\n",
+        "        labels = torch.cat([x['labels'] for x in outputs]).detach().cpu().numpy()\n",
+        "        loss = torch.stack([x['loss'] for x in outputs]).mean()\n",
+        "        result = pl.EvalResult(checkpoint_on=loss)\n",
+        "        result.log('val_loss', loss, prog_bar=True)\n",
+        "        result.log_dict(self.metric.compute(preds, labels), prog_bar=True)\n",
+        "        return result\n",
+        "\n",
+        "    def setup(self, stage):\n",
+        "        if stage == 'fit':\n",
+        "            # Get dataloader by calling it - train_dataloader() is called after setup() by default\n",
+        "            train_loader = self.train_dataloader()\n",
+        "\n",
+        "            # Calculate total steps\n",
+        "            self.total_steps = (\n",
+        "                (len(train_loader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.gpus)))\n",
+        "                // self.hparams.accumulate_grad_batches\n",
+        "                * float(self.hparams.max_epochs)\n",
+        "            )\n",
+        "\n",
+        "    def configure_optimizers(self):\n",
+        "        \"Prepare optimizer and schedule (linear warmup and decay)\"\n",
+        "        model = self.model\n",
+        "        no_decay = [\"bias\", \"LayerNorm.weight\"]\n",
+        "        optimizer_grouped_parameters = [\n",
+        "            {\n",
+        "                \"params\": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],\n",
+        "                \"weight_decay\": self.hparams.weight_decay,\n",
+        "            },\n",
+        "            {\n",
+        "                \"params\": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],\n",
+        "                \"weight_decay\": 0.0,\n",
+        "            },\n",
+        "        ]\n",
+        "        optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)\n",
+        "\n",
+        "        scheduler = get_linear_schedule_with_warmup(\n",
+        "            optimizer, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.total_steps\n",
+        "        )\n",
+        "        scheduler = {\n",
+        "            'scheduler': scheduler,\n",
+        "            'interval': 'step',\n",
+        "            'frequency': 1\n",
+        "        }\n",
+        "        return [optimizer], [scheduler]\n",
+        "\n",
+        "    @staticmethod\n",
+        "    def add_model_specific_args(parent_parser):\n",
+        "        parser = ArgumentParser(parents=[parent_parser], add_help=False)\n",
+        "        parser.add_argument(\"--learning_rate\", default=2e-5, type=float)\n",
+        "        parser.add_argument(\"--adam_epsilon\", default=1e-8, type=float)\n",
+        "        parser.add_argument(\"--warmup_steps\", default=0, type=int)\n",
+        "        parser.add_argument(\"--weight_decay\", default=0.0, type=float)\n",
+        "        return parser"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ha-NdIP_xbd3",
+        "colab_type": "text"
+      },
+      "source": [
+        "### ⚡ Quick Tip \n",
+        "  - Combine arguments from your DataModule, Model, and Trainer into one for easy and robust configuration"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3dEHnl3RPlAR",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def parse_args(args=None):\n",
+        "    parser = ArgumentParser()\n",
+        "    parser = pl.Trainer.add_argparse_args(parser)\n",
+        "    parser = GLUEDataModule.add_argparse_args(parser)\n",
+        "    parser = GLUETransformer.add_model_specific_args(parser)\n",
+        "    parser.add_argument('--seed', type=int, default=42)\n",
+        "    return parser.parse_args(args)\n",
+        "\n",
+        "\n",
+        "def main(args):\n",
+        "    pl.seed_everything(args.seed)\n",
+        "    dm = GLUEDataModule.from_argparse_args(args)\n",
+        "    dm.prepare_data()\n",
+        "    dm.setup('fit')\n",
+        "    model = GLUETransformer(num_labels=dm.num_labels, eval_splits=dm.eval_splits, **vars(args))\n",
+        "    trainer = pl.Trainer.from_argparse_args(args)\n",
+        "    return dm, model, trainer"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PkuLaeec3sJ-",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Training"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QSpueK5UPsN7",
+        "colab_type": "text"
+      },
+      "source": [
+        "## CoLA\n",
+        "\n",
+        "See an interactive view of the CoLA dataset in [NLP Viewer](https://huggingface.co/nlp/viewer/?dataset=glue&config=cola)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NJnFmtpnPu0Y",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "mocked_args = \"\"\"\n",
+        "    --model_name_or_path albert-base-v2\n",
+        "    --task_name cola\n",
+        "    --max_epochs 3\n",
+        "    --gpus 1\"\"\".split()\n",
+        "\n",
+        "args = parse_args(mocked_args)\n",
+        "dm, model, trainer = main(args)\n",
+        "trainer.fit(model, dm)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_MrNsTnqdz4z",
+        "colab_type": "text"
+      },
+      "source": [
+        "## MRPC\n",
+        "\n",
+        "See an interactive view of the MRPC dataset in [NLP Viewer](https://huggingface.co/nlp/viewer/?dataset=glue&config=mrpc)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "LBwRxg9Cb3d-",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "mocked_args = \"\"\"\n",
+        "    --model_name_or_path distilbert-base-cased\n",
+        "    --task_name mrpc\n",
+        "    --max_epochs 3\n",
+        "    --gpus 1\"\"\".split()\n",
+        "\n",
+        "args = parse_args(mocked_args)\n",
+        "dm, model, trainer = main(args)\n",
+        "trainer.fit(model, dm)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iZhbn0HzfdCu",
+        "colab_type": "text"
+      },
+      "source": [
+        "## MNLI\n",
+        "\n",
+        " - The MNLI dataset is huge, so we aren't going to bother trying to train it here.\n",
+        "\n",
+        " - Let's just make sure our multi-dataloader logic is right by skipping over training and going straight to validation.\n",
+        "\n",
+        "See an interactive view of the MRPC dataset in [NLP Viewer](https://huggingface.co/nlp/viewer/?dataset=glue&config=mnli)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AvsZMOggfcWW",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "mocked_args = \"\"\"\n",
+        "    --model_name_or_path distilbert-base-uncased\n",
+        "    --task_name mnli\n",
+        "    --max_epochs 1\n",
+        "    --gpus 1\n",
+        "    --limit_train_batches 10\n",
+        "    --progress_bar_refresh_rate 20\"\"\".split()\n",
+        "\n",
+        "args = parse_args(mocked_args)\n",
+        "dm, model, trainer = main(args)\n",
+        "trainer.fit(model, dm)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/notebooks/README.md b/notebooks/README.md
new file mode 100644
index 0000000000..1f946ee196
--- /dev/null
+++ b/notebooks/README.md
@@ -0,0 +1,12 @@
+# Lightning Notebooks ⚡
+
+## Official Notebooks
+
+You can easily run any of the official notebooks by clicking the 'Open in Colab' links in the table below :smile:
+
+| Notebook | Description | Colab Link |
+| :--- | :--- | :---: |
+| __MNIST Hello World__   | Train your first Lightning Module on the classic MNIST Handwritten Digits Dataset. | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/01_mnist_hello_world.ipynb) |
+| __Datamodules__     | Learn about DataModules and train a dataset-agnostic model on MNIST and CIFAR10.| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/02_datamodules.ipynb)|
+| __GAN__     | Train a GAN on the MNIST Dataset. Learn how to use multiple optimizers in Lightning.   | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/03_basic_gan.ipynb) |
+| __BERT__     | Fine-tune HuggingFace Transformers models on the GLUE Benchmark | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04_transformers_text_classification.ipynb) |