From 20ff50caa6aad8d5824d5ec889f2ba8f334c454a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 10 Apr 2021 08:55:07 +0200 Subject: [PATCH] Accelerator API docs (#6936) Co-authored-by: ananthsub Co-authored-by: William Falcon Co-authored-by: Jirka Borovec --- docs/source/api_references.rst | 15 ++++ docs/source/extensions/accelerators.rst | 54 +++++++++++- docs/source/extensions/plugins.rst | 2 + pytorch_lightning/accelerators/accelerator.py | 85 ++++++++++--------- pytorch_lightning/accelerators/cpu.py | 10 +-- pytorch_lightning/accelerators/gpu.py | 10 +-- pytorch_lightning/accelerators/tpu.py | 9 +- 7 files changed, 125 insertions(+), 60 deletions(-) diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst index cbe11defc3..2202ef3048 100644 --- a/docs/source/api_references.rst +++ b/docs/source/api_references.rst @@ -1,6 +1,21 @@ API References ============== +Accelerator API +--------------- + +.. currentmodule:: pytorch_lightning.accelerators + +.. autosummary:: + :toctree: api + :nosignatures: + :template: classtemplate.rst + + Accelerator + CPUAccelerator + GPUAccelerator + TPUAccelerator + Core API -------- diff --git a/docs/source/extensions/accelerators.rst b/docs/source/extensions/accelerators.rst index f88dc3f299..d6deb73789 100644 --- a/docs/source/extensions/accelerators.rst +++ b/docs/source/extensions/accelerators.rst @@ -1,10 +1,56 @@ +.. _accelerators: + ############ Accelerators ############ Accelerators connect a Lightning Trainer to arbitrary accelerators (CPUs, GPUs, TPUs, etc). Accelerators -also manage distributed accelerators (like DP, DDP, HPC cluster). - -Accelerators can also be configured to run on arbitrary clusters using Plugins or to link up to arbitrary +also manage distributed communication through :ref:`Plugins` (like DP, DDP, HPC cluster) and +can also be configured to run on arbitrary clusters or to link up to arbitrary computational strategies like 16-bit precision via AMP and Apex. -**For help setting up custom plugin/accelerator please reach out to us at support@pytorchlightning.ai** +An Accelerator is meant to deal with one type of hardware. +Currently there are accelerators for: + +- CPU +- GPU +- TPU + +Each Accelerator gets two plugins upon initialization: +One to handle differences from the training routine and one to handle different precisions. + +.. testcode:: + + from pytorch_lightning import Trainer + from pytorch_lightning.accelerators import GPUAccelerator + from pytorch_lightning.plugins import NativeMixedPrecisionPlugin, DDPPlugin + + accelerator = GPUAccelerator( + precision_plugin=NativeMixedPrecisionPlugin(), + training_type_plugin=DDPPlugin(), + ) + trainer = Trainer(accelerator=accelerator) + + +We expose Accelerators and Plugins mainly for expert users who want to extend Lightning to work with new +hardware and distributed training or clusters. + + +.. warning:: The Accelerator API is in beta and subject to change. + For help setting up custom plugins/accelerators, please reach out to us at **support@pytorchlightning.ai** + +---------- + + +Accelerator API +--------------- + +.. currentmodule:: pytorch_lightning.accelerators + +.. autosummary:: + :nosignatures: + :template: classtemplate.rst + + Accelerator + CPUAccelerator + GPUAccelerator + TPUAccelerator diff --git a/docs/source/extensions/plugins.rst b/docs/source/extensions/plugins.rst index 7f2c904e6c..403c8c320f 100644 --- a/docs/source/extensions/plugins.rst +++ b/docs/source/extensions/plugins.rst @@ -1,3 +1,5 @@ +.. _plugins: + ####### Plugins ####### diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index e97450fdbd..fea4ae725b 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import contextlib -from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Sequence, TYPE_CHECKING, Union +from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Sequence, Union import torch from torch.optim import Optimizer from torch.utils.data import DataLoader +import pytorch_lightning as pl from pytorch_lightning.core import LightningModule from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin, PrecisionPlugin from pytorch_lightning.plugins.training_type import TrainingTypePlugin @@ -26,11 +27,6 @@ from pytorch_lightning.utilities import rank_zero_warn from pytorch_lightning.utilities.apply_func import move_data_to_device from pytorch_lightning.utilities.enums import AMPType, GradClipAlgorithmType, LightningEnum -if TYPE_CHECKING: - from torch.cuda.amp import GradScaler - - from pytorch_lightning.trainer.trainer import Trainer - _STEP_OUTPUT_TYPE = Union[torch.Tensor, Dict[str, torch.Tensor], None] @@ -40,6 +36,7 @@ class Accelerator(object): An Accelerator is meant to deal with one type of Hardware. Currently there are accelerators for: + - CPU - GPU - TPU @@ -79,9 +76,10 @@ class Accelerator(object): """ self.training_type_plugin.setup_environment() - def setup(self, trainer: 'Trainer', model: LightningModule) -> None: + def setup(self, trainer: 'pl.Trainer', model: LightningModule) -> None: """ Setup plugins for the trainer fit and creates optimizers. + Args: trainer: the trainer instance model: the LightningModule @@ -91,23 +89,23 @@ class Accelerator(object): self.setup_optimizers(trainer) self.setup_precision_plugin(self.precision_plugin) - def start_training(self, trainer: 'Trainer') -> None: + def start_training(self, trainer: 'pl.Trainer') -> None: self.training_type_plugin.start_training(trainer) - def start_evaluating(self, trainer: 'Trainer') -> None: + def start_evaluating(self, trainer: 'pl.Trainer') -> None: self.training_type_plugin.start_evaluating(trainer) - def start_predicting(self, trainer: 'Trainer') -> None: + def start_predicting(self, trainer: 'pl.Trainer') -> None: self.training_type_plugin.start_predicting(trainer) - def pre_dispatch(self, trainer: 'Trainer') -> None: + def pre_dispatch(self, trainer: 'pl.Trainer') -> None: """Hook to do something before the training/evaluation/prediction starts.""" self.training_type_plugin.pre_dispatch() if self.training_type_plugin.setup_optimizers_in_pre_dispatch: self.setup_optimizers(trainer) self.precision_plugin.pre_dispatch() - def post_dispatch(self, trainer: 'Trainer') -> None: + def post_dispatch(self, trainer: 'pl.Trainer') -> None: """Hook to do something before the training/evaluation/prediction starts.""" self.training_type_plugin.post_dispatch() self.precision_plugin.post_dispatch() @@ -169,12 +167,13 @@ class Accelerator(object): Args: args: the arguments for the models training step. Can consist of the following: - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): - The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. - batch_idx (int): Integer displaying index of this batch - optimizer_idx (int): When using multiple optimizers, this argument will also be present. - hiddens(:class:`~torch.Tensor`): Passed in if - :paramref:`~pytorch_lightning.trainer.trainer.Trainer.truncated_bptt_steps` > 0. + + - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): + The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. + - batch_idx (int): Integer displaying index of this batch + - optimizer_idx (int): When using multiple optimizers, this argument will also be present. + - hiddens(:class:`~torch.Tensor`): Passed in if + :paramref:`~pytorch_lightning.trainer.trainer.Trainer.truncated_bptt_steps` > 0. """ args[0] = self.to_device(args[0]) @@ -190,11 +189,12 @@ class Accelerator(object): Args: args: the arguments for the models validation step. Can consist of the following: - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): - The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. - batch_idx (int): The index of this batch - dataloader_idx (int): The index of the dataloader that produced this batch - (only if multiple val dataloaders used) + + - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): + The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. + - batch_idx (int): The index of this batch + - dataloader_idx (int): The index of the dataloader that produced this batch + (only if multiple val dataloaders used) """ batch = self.to_device(args[0]) @@ -208,11 +208,12 @@ class Accelerator(object): Args: args: the arguments for the models test step. Can consist of the following: - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): - The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. - batch_idx (int): The index of this batch. - dataloader_idx (int): The index of the dataloader that produced this batch - (only if multiple test dataloaders used). + + - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): + The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. + - batch_idx (int): The index of this batch. + - dataloader_idx (int): The index of the dataloader that produced this batch + (only if multiple test dataloaders used). """ batch = self.to_device(args[0]) @@ -226,11 +227,13 @@ class Accelerator(object): Args: args: the arguments for the models predict step. Can consist of the following: - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): - The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. - batch_idx (int): The index of this batch. - dataloader_idx (int): The index of the dataloader that produced this batch - (only if multiple predict dataloaders used). + + - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): + The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. + - batch_idx (int): The index of this batch. + - dataloader_idx (int): The index of the dataloader that produced this batch + (only if multiple predict dataloaders used). + """ batch = self.to_device(args[0]) @@ -336,7 +339,7 @@ class Accelerator(object): """Hook to do something at the end of the training""" pass - def setup_optimizers(self, trainer: 'Trainer') -> None: + def setup_optimizers(self, trainer: 'pl.Trainer') -> None: """creates optimizers and schedulers Args: @@ -385,7 +388,7 @@ class Accelerator(object): return self.precision_plugin.precision @property - def scaler(self) -> Optional['GradScaler']: + def scaler(self) -> Optional['torch.cuda.amp.GradScaler']: return getattr(self.precision_plugin, 'scaler', None) @@ -423,6 +426,7 @@ class Accelerator(object): tensor: tensor of shape (batch, ...) group: the process group to gather results from. Defaults to all processes (world) sync_grads: flag that allows users to synchronize gradients for all_gather op + Return: A tensor of shape (world_size, batch, ...) """ @@ -451,7 +455,8 @@ class Accelerator(object): shard the model instantly - useful for extremely large models. Can save memory and initialization time. - Returns: Model parallel context. + Returns: + Model parallel context. """ with self.training_type_plugin.model_sharded_context(): yield @@ -498,7 +503,9 @@ class Accelerator(object): """ Allow model parallel hook to be called in suitable environments determined by the training type plugin. This is useful for when we want to shard the model once within fit. - Returns: True if we want to call the model parallel setup hook. + + Returns: + True if we want to call the model parallel setup hook. """ return self.training_type_plugin.call_configure_sharded_model_hook @@ -512,7 +519,9 @@ class Accelerator(object): Override to delay setting optimizers and schedulers till after dispatch. This is useful when the `TrainingTypePlugin` requires operating on the wrapped accelerator model. However this may break certain precision plugins such as APEX which require optimizers to be set. - Returns: If True, delay setup optimizers till pre_dispatch, else call within setup. + + Returns: + If True, delay setup optimizers until `pre_dispatch`, else call within `setup`. """ return self.training_type_plugin.setup_optimizers_in_pre_dispatch diff --git a/pytorch_lightning/accelerators/cpu.py b/pytorch_lightning/accelerators/cpu.py index 22ea8f1e1b..458f058c27 100644 --- a/pytorch_lightning/accelerators/cpu.py +++ b/pytorch_lightning/accelerators/cpu.py @@ -11,20 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING - +import pytorch_lightning as pl from pytorch_lightning.accelerators.accelerator import Accelerator from pytorch_lightning.plugins.precision import MixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException -if TYPE_CHECKING: - from pytorch_lightning.core.lightning import LightningModule - from pytorch_lightning.trainer.trainer import Trainer - class CPUAccelerator(Accelerator): + """ Accelerator for CPU devices. """ - def setup(self, trainer: 'Trainer', model: 'LightningModule') -> None: + def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None: """ Raises: MisconfigurationException: diff --git a/pytorch_lightning/accelerators/gpu.py b/pytorch_lightning/accelerators/gpu.py index c23960e4fd..b5b037e46d 100644 --- a/pytorch_lightning/accelerators/gpu.py +++ b/pytorch_lightning/accelerators/gpu.py @@ -13,24 +13,22 @@ # limitations under the License. import logging import os -from typing import Any, TYPE_CHECKING +from typing import Any import torch +import pytorch_lightning as pl from pytorch_lightning.accelerators.accelerator import Accelerator from pytorch_lightning.plugins import DataParallelPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException -if TYPE_CHECKING: - from pytorch_lightning.core.lightning import LightningModule - from pytorch_lightning.trainer.trainer import Trainer - _log = logging.getLogger(__name__) class GPUAccelerator(Accelerator): + """ Accelerator for GPU devices. """ - def setup(self, trainer: 'Trainer', model: 'LightningModule') -> None: + def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None: """ Raises: MisconfigurationException: diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py index 087f6df7a1..a0af6ba11a 100644 --- a/pytorch_lightning/accelerators/tpu.py +++ b/pytorch_lightning/accelerators/tpu.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, TYPE_CHECKING, Union +from typing import Any, Callable, Union from torch.optim import Optimizer @@ -28,14 +28,13 @@ if _XLA_AVAILABLE: xla_clip_grad_norm_ = clip_grad_norm_ -if TYPE_CHECKING: - from pytorch_lightning.core.lightning import LightningModule - from pytorch_lightning.trainer.trainer import Trainer +import pytorch_lightning as pl class TPUAccelerator(Accelerator): + """ Accelerator for TPU devices. """ - def setup(self, trainer: 'Trainer', model: 'LightningModule') -> None: + def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None: """ Raises: MisconfigurationException: