From 260c1bd9880e02c09a40408be75f0eb07b61bac0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Sun, 26 Mar 2023 01:17:10 +0100
Subject: [PATCH] Update Fabric README (#17112)

* Add Fabric diff and resulting code to the README

* Reduce horizontal space

* sub

* Contributor count

* Feedback

* Other snippets
---
 README.md | 173 ++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 149 insertions(+), 24 deletions(-)
diff --git a/README.md b/README.md
index 136e033849..b183798ecb 100644
--- a/README.md
+++ b/README.md
@@ -322,46 +322,171 @@ ______________________________________________________________________
 
 Run on any device at any scale with expert-level control over PyTorch training loop and scaling strategy. You can even write your own Trainer.
 
-Fabric is designed for the most complex models like foundation model scaling, LLMs, diffussion, transformers, reinforcement learning, active learning.
+Fabric is designed for the most complex models like foundation model scaling, LLMs, diffusion, transformers, reinforcement learning, active learning. Of any size.
+
+<table>
+<tr>
+<th>What to change</th>
+<th>Resulting Fabric Code (copy me!)</th>
+</tr>
+<tr>
+<td>
+<sub>
 
 ```diff
 + import lightning as L
-  import torch
-  import torch.nn as nn
-  from torch.utils.data import DataLoader, Dataset
-  class PyTorchModel(nn.Module):
-      ...
-  class PyTorchDataset(Dataset):
-      ...
-+ fabric = L.Fabric(accelerator="cuda", devices=8, strategy="ddp")
+  import torch; import torchvision as tv
+
++ fabric = L.Fabric()
 + fabric.launch()
-- device = "cuda" if torch.cuda.is_available() else "cpu
-  model = PyTorchModel(...)
-  optimizer = torch.optim.SGD(model.parameters())
+
+  model = tv.models.resnet18()
+  optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
+- device = "cuda" if torch.cuda.is_available() else "cpu"
+- model.to(device)
 + model, optimizer = fabric.setup(model, optimizer)
-  dataloader = DataLoader(PyTorchDataset(...), ...)
+
+  dataset = tv.datasets.CIFAR10("data", download=True,
+                                train=True,
+                                transform=tv.transforms.ToTensor())
+  dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)
 + dataloader = fabric.setup_dataloaders(dataloader)
+
   model.train()
+  num_epochs = 10
   for epoch in range(num_epochs):
       for batch in dataloader:
-          input, target = batch
--         input, target = input.to(device), target.to(device)
+          inputs, labels = batch
+-         inputs, labels = inputs.to(device), labels.to(device)
           optimizer.zero_grad()
-          output = model(input)
-          loss = loss_fn(output, target)
+          outputs = model(inputs)
+          loss = torch.nn.functional.cross_entropy(outputs, labels)
 -         loss.backward()
 +         fabric.backward(loss)
           optimizer.step()
-          lr_scheduler.step()
 ```
 
+</sub>
+<td>
+<sub>
+
+```Python
+import lightning as L
+import torch; import torchvision as tv
+
+fabric = L.Fabric()
+fabric.launch()
+
+model = tv.models.resnet18()
+optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
+model, optimizer = fabric.setup(model, optimizer)
+
+dataset = tv.datasets.CIFAR10("data", download=True,
+                              train=True,
+                              transform=tv.transforms.ToTensor())
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)
+dataloader = fabric.setup_dataloaders(dataloader)
+
+model.train()
+num_epochs = 10
+for epoch in range(num_epochs):
+    for batch in dataloader:
+        inputs, labels = batch
+        optimizer.zero_grad()
+        outputs = model(inputs)
+        loss = torch.nn.functional.cross_entropy(outputs, labels)
+        fabric.backward(loss)
+        optimizer.step()
+```
+
+</sub>
+</td>
+</tr>
+</table>
+
 ## Key features
 
-- Easily switch from running on CPU to GPU (Apple Silicon, CUDA, …), TPU, multi-GPU or even multi-node training
-- Use state-of-the-art distributed training strategies (DDP, FSDP, DeepSpeed) and mixed precision out of the box
-- All the device logic boilerplate is handled for you
-- Designed with multi-billion parameter models in mind
-- Build your own custom Trainer using Fabric primitives for training checkpointing, logging, and more
+<details>
+  <summary>Easily switch from running on CPU to GPU (Apple Silicon, CUDA, …), TPU, multi-GPU or even multi-node training</summary>
+
+```python
+# Use your available hardware
+# no code changes needed
+fabric = Fabric()
+
+# Run on GPUs (CUDA or MPS)
+fabric = Fabric(accelerator="gpu")
+
+# 8 GPUs
+fabric = Fabric(accelerator="gpu", devices=8)
+
+# 256 GPUs, multi-node
+fabric = Fabric(accelerator="gpu", devices=8, num_nodes=32)
+
+# Run on TPUs
+fabric = Fabric(accelerator="tpu")
+```
+
+</details>
+
+<details>
+  <summary>Use state-of-the-art distributed training strategies (DDP, FSDP, DeepSpeed) and mixed precision out of the box</summary>
+
+```python
+# Use state-of-the-art distributed training techniques
+fabric = Fabric(strategy="ddp")
+fabric = Fabric(strategy="deepspeed")
+fabric = Fabric(strategy="fsdp")
+
+# Switch the precision
+fabric = Fabric(precision="16-mixed")
+fabric = Fabric(precision="64")
+```
+
+</details>
+
+<details>
+  <summary>All the device logic boilerplate is handled for you</summary>
+
+```diff
+  # no more of this!
+- model.to(device)
+- batch.to(device)
+```
+
+</details>
+
+<details>
+  <summary>Build your own custom Trainer using Fabric primitives for training checkpointing, logging, and more</summary>
+
+```python
+import lightning as L
+
+
+class MyCustomTrainer:
+    def __init__(self, accelerator="auto", strategy="auto", devices="auto", precision="32-true"):
+        self.fabric = L.Fabric(accelerator=accelerator, strategy=strategy, devices=devices, precision=precision)
+
+    def fit(self, model, optimizer, dataloader, max_epochs):
+        self.fabric.launch()
+
+        model, optimizer = self.fabric.setup(model, optimizer)
+        dataloader = self.fabric.setup_dataloaders(dataloader)
+        model.train()
+
+        for epoch in range(max_epochs):
+            for batch in dataloader:
+                input, target = batch
+                optimizer.zero_grad()
+                output = model(input)
+                loss = loss_fn(output, target)
+                self.fabric.backward(loss)
+                optimizer.step()
+```
+
+You can find a more extensive example in our [examples](examples/fabric/build_your_own_trainer)
+
+</details>
 
 ______________________________________________________________________
 
@@ -489,7 +614,7 @@ ______________________________________________________________________
 The lightning community is maintained by
 
 - [10+ core contributors](https://lightning.ai/docs/pytorch/latest/governance.html) who are all a mix of professional engineers, Research Scientists, and Ph.D. students from top AI labs.
-- 590+ active community contributors.
+- 800+ community contributors.
 
 Want to help us build Lightning and reduce boilerplate for thousands of researchers? [Learn how to make your first contribution here](https://lightning.ai/docs/pytorch/stable/generated/CONTRIBUTING.html)