diff --git a/Makefile b/Makefile index 6dec5981bd..444dd93d15 100644 --- a/Makefile +++ b/Makefile @@ -16,9 +16,7 @@ clean: rm -rf .mypy_cache rm -rf .pytest_cache rm -rf ./docs/build - rm -rf ./docs/source-fabric/generated - rm -rf ./docs/source-fabric/*/generated - rm -rf ./docs/source-fabric/api + rm -rf ./docs/source-fabric/api/generated rm -rf ./docs/source-pytorch/notebooks rm -rf ./docs/source-pytorch/generated rm -rf ./docs/source-pytorch/*/generated diff --git a/docs/source-fabric/_templates/theme_variables.jinja b/docs/source-fabric/_templates/theme_variables.jinja index 447390ecc9..234fdae885 100644 --- a/docs/source-fabric/_templates/theme_variables.jinja +++ b/docs/source-fabric/_templates/theme_variables.jinja @@ -2,17 +2,14 @@ 'github': 'https://github.com/Lightning-AI/lightning', 'github_issues': 'https://github.com/Lightning-AI/lightning/issues', 'contributing': 'https://github.com/Lightning-AI/lightning/blob/master/.github/CONTRIBUTING.md', - 'governance': 'https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/governance.rst', - 'docs': 'https://lightning.rtfd.io/en/latest', - 'twitter': 'https://twitter.com/PyTorchLightnin', - 'discuss': 'https://pytorch-lightning.slack.com', - 'tutorials': 'https://pt-lightning.readthedocs.io/en/latest/#tutorials', - 'previous_pytorch_versions': 'https://pt-lightning.rtfd.io/en/latest/', - 'home': 'https://lightning.ai/', - 'get_started': 'https://pt-lightning.readthedocs.io/en/latest/introduction_guide.html', - 'features': 'https://pt-lightning.rtfd.io/en/latest/', - 'blog': 'https://www.pytorchlightning.ai/blog', - 'resources': 'https://pt-lightning.readthedocs.io/en/latest/#community-examples', - 'support': 'https://pt-lightning.rtfd.io/en/latest/', + 'governance': 'https://pytorch-lightning.readthedocs.io/en/latest/governance.html', + 'docs': 'https://lightning.ai/docs/fabric/', + 'twitter': 'https://twitter.com/LightningAI', + 'home': 'https://lightning.ai/docs/fabric/', + 'get_started': '', + 'blog': 'https://lightning.ai/pages/blog/', + 'support': '', + 'community': 'https://www.pytorchlightning.ai/community', + 'forums': 'https://lightning.ai/forums/', } -%} diff --git a/docs/source-fabric/api/utilities.rst b/docs/source-fabric/api/utilities.rst index 3d7864a408..fe58e14000 100644 --- a/docs/source-fabric/api/utilities.rst +++ b/docs/source-fabric/api/utilities.rst @@ -40,7 +40,7 @@ For example, when generating noise or data augmentations. This is very straightf fabric.seed_everything(seed + fabric.global_rank) -By default, ``seed_everything`` also handles the initialization of the seed in :class:`~torch.utils.data.DataLoader` worker processes: +By default, :meth:`~lightning.fabric.fabric.Fabric.seed_everything` also handles the initialization of the seed in :class:`~torch.utils.data.DataLoader` worker processes: .. code-block:: python diff --git a/docs/source-fabric/examples/index.rst b/docs/source-fabric/examples/index.rst index fdc1b6ef72..f8ca40769e 100644 --- a/docs/source-fabric/examples/index.rst +++ b/docs/source-fabric/examples/index.rst @@ -46,6 +46,13 @@ Examples :col_css: col-md-4 :height: 150 +.. displayitem:: + :header: K-Fold Cross Validation + :description: Cross validation helps you estimate the generalization error of a model and select the best one. + :button_link: https://github.com/Lightning-AI/lightning/tree/master/examples/fabric/kfold_cv + :col_css: col-md-4 + :height: 150 + .. displayitem:: :header: Active Learning :description: Coming soon diff --git a/docs/source-fabric/fundamentals/code_structure.rst b/docs/source-fabric/fundamentals/code_structure.rst index 4991c25458..36b11ff87e 100644 --- a/docs/source-fabric/fundamentals/code_structure.rst +++ b/docs/source-fabric/fundamentals/code_structure.rst @@ -4,7 +4,7 @@ How to structure your code with Fabric Fabric is flexible enough to adapt to any project structure, regardless of whether you are experimenting with a simple script or an extensive framework, because it makes no assumptions about how your code is organized. Despite the ultimate freedom, this page is meant to give beginners a template for how to organize a typical training script with Fabric: -We also have several :ref:`examples ` that you can take inspiration from. +We also have several :doc:`examples <../examples/index>` that you can take inspiration from. ---- @@ -140,4 +140,29 @@ Here is how the code would be structured if we did that periodically during trai Full Trainer ************ -Coming soon. +Building a fully-fledged, personalized Trainer can be a lot of work. +To get started quickly, copy `this `_ Trainer template and adapt it to your needs. + +- Only ~500 lines of code, all in one file +- Relies on Fabric to configure accelerator, devices, strategy +- Simple epoch based training with validation loop +- Only essential features included: Checkpointing, loggers, progress bar, callbacks, gradient accumulation + + +.. raw:: html + +
+
+ +.. displayitem:: + :header: Trainer Template + :description: Take our Fabric Trainer template and customize it for your needs + :button_link: https://github.com/Lightning-AI/lightning/tree/master/examples/fabric/build_your_own_trainer + :col_css: col-md-4 + :height: 150 + :tag: intermediate + +.. raw:: html + +
+
diff --git a/docs/source-fabric/fundamentals/convert.rst b/docs/source-fabric/fundamentals/convert.rst index 92c2aeb7c2..5967a8c4a0 100644 --- a/docs/source-fabric/fundamentals/convert.rst +++ b/docs/source-fabric/fundamentals/convert.rst @@ -18,7 +18,7 @@ Here are five easy steps to let :class:`~lightning.fabric.fabric.Fabric` scale y fabric.launch() -**Step 3:** Call :meth:`~lightning.fabric.fabric.Fabric.setup` on each model and optimizer pair and :meth:`~lightning_fabric.fabric.Fabric.setup_dataloaders` on all your data loaders. +**Step 3:** Call :meth:`~lightning.fabric.fabric.Fabric.setup` on each model and optimizer pair and :meth:`~lightning.fabric.fabric.Fabric.setup_dataloaders` on all your data loaders. .. code-block:: python @@ -90,7 +90,7 @@ All steps combined, this is how your code will change: That's it! You can now train on any device at any scale with a switch of a flag. -Check out our before-and-after example for `image classification `_ and many more :ref:`examples ` that use Fabric. +Check out our before-and-after example for `image classification `_ and many more :doc:`examples <../examples/index>` that use Fabric. ********** Next steps @@ -105,7 +105,7 @@ Next steps :header: Examples :description: See examples across computer vision, NLP, RL, etc. :col_css: col-md-4 - :button_link: ../fabric.html#examples + :button_link: ../examples/index.html :height: 150 :tag: basic @@ -121,7 +121,7 @@ Next steps :header: Build your own Trainer :description: Learn how to build a trainer tailored for you :col_css: col-md-4 - :button_link: ../fabric.html#build-your-own-trainer + :button_link: ../index.html#build-your-own-trainer :height: 150 :tag: intermediate diff --git a/docs/source-fabric/fundamentals/launch.rst b/docs/source-fabric/fundamentals/launch.rst index cc673a45c3..699970c410 100644 --- a/docs/source-fabric/fundamentals/launch.rst +++ b/docs/source-fabric/fundamentals/launch.rst @@ -15,7 +15,7 @@ To run your code distributed across many devices and many machines, you need to Simple Launch ************* -You can configure and launch processes on your machine directly with Fabric's :meth:`~lightning_fabric.fabric.Fabric.launch` method: +You can configure and launch processes on your machine directly with Fabric's :meth:`~lightning.fabric.fabric.Fabric.launch` method: .. code-block:: python diff --git a/docs/source-fabric/fundamentals/precision.rst b/docs/source-fabric/fundamentals/precision.rst index ab6ffa863e..eee9bf4696 100644 --- a/docs/source-fabric/fundamentals/precision.rst +++ b/docs/source-fabric/fundamentals/precision.rst @@ -13,7 +13,7 @@ Mixed precision training delivers significant computational speedup by conductin Switching to mixed precision has resulted in considerable training speedups since the introduction of Tensor Cores in the Volta and Turing architectures. It combines FP32 and lower-bit floating points (such as FP16) to reduce memory footprint and increase performance during model training and evaluation. It accomplishes this by recognizing the steps that require complete accuracy and employing a 32-bit floating point for those steps only while using a 16-bit floating point for the rest. -Compared to complete precision training, mixed precision training delivers all these benefits while ensuring no task-specific accuracy is lost [`1 `_]. +Compared to complete precision training, mixed precision training delivers all these benefits while ensuring no task-specific accuracy is lost `[1] `_. This is how you select the precision in Fabric: diff --git a/docs/source-fabric/guide/callbacks.rst b/docs/source-fabric/guide/callbacks.rst index 4e92875c98..87fd58e15b 100644 --- a/docs/source-fabric/guide/callbacks.rst +++ b/docs/source-fabric/guide/callbacks.rst @@ -91,7 +91,8 @@ The ones that have a matching method name will get called. Next steps ********** -Callbacks are a powerful tool for building a Trainer. Learn how in our comprehensive guide. +Callbacks are a powerful tool for building a Trainer. +See a real example of how they can be integrated in our Trainer template based on Fabric: .. raw:: html @@ -99,9 +100,9 @@ Callbacks are a powerful tool for building a Trainer. Learn how in our comprehen
.. displayitem:: - :header: Template Trainer - :description: Coming soon - :button_link: guide/trainer_template.html + :header: Trainer Template + :description: Take our Fabric Trainer template and customize it for your needs + :button_link: https://github.com/Lightning-AI/lightning/tree/master/examples/fabric/build_your_own_trainer :col_css: col-md-4 :height: 150 :tag: intermediate diff --git a/docs/source-fabric/guide/checkpoint.rst b/docs/source-fabric/guide/checkpoint.rst index 8528145c65..d8e9fa2db5 100644 --- a/docs/source-fabric/guide/checkpoint.rst +++ b/docs/source-fabric/guide/checkpoint.rst @@ -71,3 +71,32 @@ If you want to be in complete control of how states get restored, you can omit p model.load_state_dict(full_checkpoint["model"]) optimizer.load_state_dict(full_checkpoint["optimizer"]) ... + + + +---- + + +********** +Next steps +********** + +Learn from our template how Fabrics checkpoint mechanism can be integrated into a full Trainer: + +.. raw:: html + +
+
+ +.. displayitem:: + :header: Trainer Template + :description: Take our Fabric Trainer template and customize it for your needs + :button_link: https://github.com/Lightning-AI/lightning/tree/master/examples/fabric/build_your_own_trainer + :col_css: col-md-4 + :height: 150 + :tag: intermediate + +.. raw:: html + +
+
diff --git a/docs/source-fabric/guide/lightning_module.rst b/docs/source-fabric/guide/lightning_module.rst index 662bee9c16..6437bd9b19 100644 --- a/docs/source-fabric/guide/lightning_module.rst +++ b/docs/source-fabric/guide/lightning_module.rst @@ -5,7 +5,7 @@ Organize Your Code Any raw PyTorch can be converted to Fabric with zero refactoring required, giving maximum flexibility in how you want to organize your projects. However, when developing a project in a team or sharing the code publicly, it can be beneficial to conform to a standard format of how core pieces of the code are organized. -This is what the :class:`pytorch_lightning.core.module.LightningModule` was made for! +This is what the `LightningModule `_ was made for! Here is how you can neatly separate the research code (model, loss, optimization, etc.) from the "trainer" code (training loop, checkpointing, logging, etc.). @@ -60,7 +60,7 @@ Take these main ingredients and put them in a LightningModule: ... -This is a minimal :class:`pytorch_lightning.LightningModule`, but there are `many other useful hooks `_ you can use. +This is a minimal LightningModule, but there are `many other useful hooks `_ you can use. ---- diff --git a/docs/source-fabric/guide/trainer_template.rst b/docs/source-fabric/guide/trainer_template.rst index 4ef97805a4..f13eccfc11 100644 --- a/docs/source-fabric/guide/trainer_template.rst +++ b/docs/source-fabric/guide/trainer_template.rst @@ -1,5 +1,7 @@ +:orphan: + ################ Template Trainer ################ -Coming soon. +TODO: Write a guide explaining how to build a template like the one in https://github.com/Lightning-AI/lightning/tree/master/examples/fabric/build_your_own_trainer diff --git a/docs/source-fabric/index.rst b/docs/source-fabric/index.rst index eca5572eca..2e530f8a1e 100644 --- a/docs/source-fabric/index.rst +++ b/docs/source-fabric/index.rst @@ -1,6 +1,6 @@ -############# -Fabric (Beta) -############# +################ +Lightning Fabric +################ Fabric is the fast and lightweight way to scale PyTorch models without boilerplate code. @@ -50,9 +50,6 @@ Fabric is the fast and lightweight way to scale PyTorch models without boilerpla lr_scheduler.step() -.. note:: Fabric is currently in Beta. Its API is subject to change based on feedback. - - ---- @@ -60,25 +57,39 @@ Fabric is the fast and lightweight way to scale PyTorch models without boilerpla Why Fabric? *********** -Fabric differentiates itself from a fully-fledged trainer like Lightning :class:`pytorch_lightning.Trainer` in these key aspects: +Fabric differentiates itself from a fully-fledged trainer like Lightning's `Trainer `_ in these key aspects: **Fast to implement** There is no need to restructure your code: Just change a few lines in the PyTorch script and you'll be able to leverage Fabric features. **Maximum Flexibility** Write your own training and/or inference logic down to the individual optimizer calls. -You aren't forced to conform to a standardized epoch-based training loop like the one in Lightning :class:`pytorch_lightning.Trainer`. +You aren't forced to conform to a standardized epoch-based training loop like the one in Lightning `Trainer `_. You can do flexible iteration based training, meta-learning, cross-validation and other types of optimization algorithms without digging into framework internals. This also makes it super easy to adopt Fabric in existing PyTorch projects to speed-up and scale your models without the compromise on large refactors. Just remember: With great power comes a great responsibility. **Maximum Control** -The Lightning :class:`pytorch_lightning.Trainer` has many built-in features to make research simpler with less boilerplate, but debugging it requires some familiarity with the framework internals. +The Lightning `Trainer `_ has many built-in features to make research simpler with less boilerplate, but debugging it requires some familiarity with the framework internals. In Fabric, everything is opt-in. Think of it as a toolbox: You take out the tools (Fabric functions) you need and leave the other ones behind. This makes it easier to develop and debug your PyTorch code as you gradually add more features to it. Fabric provides important tools to remove undesired boilerplate code (distributed, hardware, checkpoints, logging, ...), but leaves the design and orchestration fully up to you. +---- + +************ +Installation +************ + +Fabric ships directly with Lightning. Install it with + +.. code-block:: bash + + pip install lightning + +For alternative ways to install, read the `installation guide `_. + ---- @@ -192,7 +203,7 @@ Build Your Own Trainer .. displayitem:: :header: Trainer Template :description: Take our Fabric Trainer template and customize it for your needs - :button_link: guide/trainer_template.html + :button_link: https://github.com/Lightning-AI/lightning/tree/master/examples/fabric/build_your_own_trainer :col_css: col-md-4 :height: 150 :tag: intermediate @@ -274,7 +285,7 @@ Advanced Topics Callbacks Logging Checkpoints - Trainer Template + Trainer Template .. toctree:: :maxdepth: 1 diff --git a/docs/source-pytorch/_templates/theme_variables.jinja b/docs/source-pytorch/_templates/theme_variables.jinja index 332a9820f4..d0e0187f9c 100644 --- a/docs/source-pytorch/_templates/theme_variables.jinja +++ b/docs/source-pytorch/_templates/theme_variables.jinja @@ -4,17 +4,16 @@ 'contributing': 'https://github.com/Lightning-AI/lightning/blob/master/.github/CONTRIBUTING.md', 'governance': 'https://pytorch-lightning.readthedocs.io/en/latest/governance.html', 'docs': 'https://pytorch-lightning.rtfd.io/en/latest', - 'twitter': 'https://twitter.com/PyTorchLightnin', + 'twitter': 'https://twitter.com/LightningAI', 'discuss': 'https://www.pytorchlightning.ai/community', 'tutorials': 'https://pytorch-lightning.readthedocs.io/en/latest/#tutorials', - 'previous_pytorch_versions': 'https://pytorch-lightning.rtfd.io/en/latest/', 'home': 'https://pytorch-lightning.rtfd.io/en/latest/', 'get_started': 'https://pytorch-lightning.readthedocs.io/en/latest/starter/introduction.html', 'features': 'https://pytorch-lightning.rtfd.io/en/latest/', - 'blog': 'https://www.pytorchlightning.ai/blog', + 'blog': 'https://lightning.ai/pages/blog/', 'resources': 'https://pytorch-lightning.readthedocs.io/en/latest/#community-examples', 'support': 'https://pytorch-lightning.rtfd.io/en/latest/', 'community': 'https://www.pytorchlightning.ai/community', - 'forums': 'https://github.com/Lightning-AI/lightning/discussions', + 'forums': 'https://lightning.ai/forums/', } -%} diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py index 75f13c4c6d..18dc5b3314 100644 --- a/src/lightning/fabric/fabric.py +++ b/src/lightning/fabric/fabric.py @@ -608,6 +608,24 @@ class Fabric: return self._strategy.load_checkpoint(path=path, state=state) def launch(self, function: Optional[Callable[["Fabric"], Any]] = None, *args: Any, **kwargs: Any) -> Any: + """Launch and initialize all the processes needed for distributed execution. + + Args: + function: Optional function to launch when using a spawn/fork-based strategy, for example, when using the + XLA strategy (``accelerator="tpu"``). The function must accept at least one argument, to which + the Fabric object itself will be passed. + *args: Optional positional arguments to be passed to the function. + **kwargs: Optional keyword arguments to be passed to the function. + + Returns: + Returns the output of the function that ran in worker process with rank 0. + + The ``launch()`` method should only be used if you intend to specify accelerator, devices, and so on in + the code (programmatically). If you are launching with the Lightning CLI, ``lightning run model ...``, remove + ``launch()`` from your code. + + ``launch()`` is a no-op when called multiple times and no function is passed in. + """ if _is_using_cli(): raise RuntimeError( "This script was launched through the CLI, and processes have already been created. Calling " @@ -691,7 +709,7 @@ class Fabric: def seed_everything(seed: Optional[int] = None, workers: Optional[bool] = None) -> int: """Helper function to seed everything without explicitly importing Lightning. - See :func:`lightning.pytorch.seed_everything` for more details. + See :func:`lightning.fabric.utilities.seed.seed_everything` for more details. """ if workers is None: # Lightning sets `workers=False` by default to avoid breaking reproducibility, but since this is a new