From 260c1bd9880e02c09a40408be75f0eb07b61bac0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Sun, 26 Mar 2023 01:17:10 +0100 Subject: [PATCH] Update Fabric README (#17112) * Add Fabric diff and resulting code to the README * Reduce horizontal space * sub * Contributor count * Feedback * Other snippets --- README.md | 173 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 149 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 136e033849..b183798ecb 100644 --- a/README.md +++ b/README.md @@ -322,46 +322,171 @@ ______________________________________________________________________ Run on any device at any scale with expert-level control over PyTorch training loop and scaling strategy. You can even write your own Trainer. -Fabric is designed for the most complex models like foundation model scaling, LLMs, diffussion, transformers, reinforcement learning, active learning. +Fabric is designed for the most complex models like foundation model scaling, LLMs, diffusion, transformers, reinforcement learning, active learning. Of any size. + + + + + + + + + +
What to changeResulting Fabric Code (copy me!)
+ ```diff + import lightning as L - import torch - import torch.nn as nn - from torch.utils.data import DataLoader, Dataset - class PyTorchModel(nn.Module): - ... - class PyTorchDataset(Dataset): - ... -+ fabric = L.Fabric(accelerator="cuda", devices=8, strategy="ddp") + import torch; import torchvision as tv + ++ fabric = L.Fabric() + fabric.launch() -- device = "cuda" if torch.cuda.is_available() else "cpu - model = PyTorchModel(...) - optimizer = torch.optim.SGD(model.parameters()) + + model = tv.models.resnet18() + optimizer = torch.optim.SGD(model.parameters(), lr=0.001) +- device = "cuda" if torch.cuda.is_available() else "cpu" +- model.to(device) + model, optimizer = fabric.setup(model, optimizer) - dataloader = DataLoader(PyTorchDataset(...), ...) + + dataset = tv.datasets.CIFAR10("data", download=True, + train=True, + transform=tv.transforms.ToTensor()) + dataloader = torch.utils.data.DataLoader(dataset, batch_size=8) + dataloader = fabric.setup_dataloaders(dataloader) + model.train() + num_epochs = 10 for epoch in range(num_epochs): for batch in dataloader: - input, target = batch -- input, target = input.to(device), target.to(device) + inputs, labels = batch +- inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() - output = model(input) - loss = loss_fn(output, target) + outputs = model(inputs) + loss = torch.nn.functional.cross_entropy(outputs, labels) - loss.backward() + fabric.backward(loss) optimizer.step() - lr_scheduler.step() ``` + + + + +```Python +import lightning as L +import torch; import torchvision as tv + +fabric = L.Fabric() +fabric.launch() + +model = tv.models.resnet18() +optimizer = torch.optim.SGD(model.parameters(), lr=0.001) +model, optimizer = fabric.setup(model, optimizer) + +dataset = tv.datasets.CIFAR10("data", download=True, + train=True, + transform=tv.transforms.ToTensor()) +dataloader = torch.utils.data.DataLoader(dataset, batch_size=8) +dataloader = fabric.setup_dataloaders(dataloader) + +model.train() +num_epochs = 10 +for epoch in range(num_epochs): + for batch in dataloader: + inputs, labels = batch + optimizer.zero_grad() + outputs = model(inputs) + loss = torch.nn.functional.cross_entropy(outputs, labels) + fabric.backward(loss) + optimizer.step() +``` + + +
+ ## Key features -- Easily switch from running on CPU to GPU (Apple Silicon, CUDA, …), TPU, multi-GPU or even multi-node training -- Use state-of-the-art distributed training strategies (DDP, FSDP, DeepSpeed) and mixed precision out of the box -- All the device logic boilerplate is handled for you -- Designed with multi-billion parameter models in mind -- Build your own custom Trainer using Fabric primitives for training checkpointing, logging, and more +
+ Easily switch from running on CPU to GPU (Apple Silicon, CUDA, …), TPU, multi-GPU or even multi-node training + +```python +# Use your available hardware +# no code changes needed +fabric = Fabric() + +# Run on GPUs (CUDA or MPS) +fabric = Fabric(accelerator="gpu") + +# 8 GPUs +fabric = Fabric(accelerator="gpu", devices=8) + +# 256 GPUs, multi-node +fabric = Fabric(accelerator="gpu", devices=8, num_nodes=32) + +# Run on TPUs +fabric = Fabric(accelerator="tpu") +``` + +
+ +
+ Use state-of-the-art distributed training strategies (DDP, FSDP, DeepSpeed) and mixed precision out of the box + +```python +# Use state-of-the-art distributed training techniques +fabric = Fabric(strategy="ddp") +fabric = Fabric(strategy="deepspeed") +fabric = Fabric(strategy="fsdp") + +# Switch the precision +fabric = Fabric(precision="16-mixed") +fabric = Fabric(precision="64") +``` + +
+ +
+ All the device logic boilerplate is handled for you + +```diff + # no more of this! +- model.to(device) +- batch.to(device) +``` + +
+ +
+ Build your own custom Trainer using Fabric primitives for training checkpointing, logging, and more + +```python +import lightning as L + + +class MyCustomTrainer: + def __init__(self, accelerator="auto", strategy="auto", devices="auto", precision="32-true"): + self.fabric = L.Fabric(accelerator=accelerator, strategy=strategy, devices=devices, precision=precision) + + def fit(self, model, optimizer, dataloader, max_epochs): + self.fabric.launch() + + model, optimizer = self.fabric.setup(model, optimizer) + dataloader = self.fabric.setup_dataloaders(dataloader) + model.train() + + for epoch in range(max_epochs): + for batch in dataloader: + input, target = batch + optimizer.zero_grad() + output = model(input) + loss = loss_fn(output, target) + self.fabric.backward(loss) + optimizer.step() +``` + +You can find a more extensive example in our [examples](examples/fabric/build_your_own_trainer) + +
______________________________________________________________________ @@ -489,7 +614,7 @@ ______________________________________________________________________ The lightning community is maintained by - [10+ core contributors](https://lightning.ai/docs/pytorch/latest/governance.html) who are all a mix of professional engineers, Research Scientists, and Ph.D. students from top AI labs. -- 590+ active community contributors. +- 800+ community contributors. Want to help us build Lightning and reduce boilerplate for thousands of researchers? [Learn how to make your first contribution here](https://lightning.ai/docs/pytorch/stable/generated/CONTRIBUTING.html)