Update old device flags (#12471)

2022-03-28 20:14:59 +05:30 · 2022-03-28 20:14:59 +05:30 · fdcc09cf95
parent 1c50ff7a2f
commit fdcc09cf95
14 changed files with 58 additions and 52 deletions
--- a/docs/source/accelerators/gpu.rst
+++ b/docs/source/accelerators/gpu.rst
@ -244,8 +244,8 @@ The table below lists examples of possible input formats and how they are interp

 .. note::

-    When specifying number of gpus as an integer ``devices=k``, setting the trainer flag
-    ``auto_select_gpus=True`` will automatically help you find ``k`` gpus that are not
+    When specifying number of ``devices`` as an integer ``devices=k``, setting the trainer flag
+    ``auto_select_gpus=True`` will automatically help you find ``k`` GPUs that are not
    occupied by other processes. This is especially useful when GPUs are configured
    to be in "exclusive mode", such that only one process at a time can access them.
    For more details see the :doc:`trainer guide <../common/trainer>`.
@ -295,7 +295,7 @@ For a deeper understanding of what Lightning is doing, feel free to read this
 Data Parallel
 ^^^^^^^^^^^^^
 :class:`~torch.nn.DataParallel` (DP) splits a batch across k GPUs.
-That is, if you have a batch of 32 and use DP with 2 gpus, each GPU will process 16 samples,
+That is, if you have a batch of 32 and use DP with 2 GPUs, each GPU will process 16 samples,
 after which the root node will aggregate the results.

 .. warning:: DP use is discouraged by PyTorch and Lightning. State is not maintained on the replicas created by the
@ -749,7 +749,7 @@ Let's say you have a batch size of 7 in your dataloader.
        def train_dataloader(self):
            return Dataset(..., batch_size=7)

-In DDP, DDP_SPAWN, Deepspeed, DDP_SHARDED, or Horovod your effective batch size will be 7 * gpus * num_nodes.
+In DDP, DDP_SPAWN, Deepspeed, DDP_SHARDED, or Horovod your effective batch size will be 7 * devices * num_nodes.

 .. code-block:: python

@ -786,7 +786,7 @@ The reason is that the full batch is visible to all GPUs on the node when using

 Torch Distributed Elastic
 -------------------------
-Lightning supports the use of Torch Distributed Elastic to enable fault-tolerant and elastic distributed job scheduling. To use it, specify the 'ddp' or 'ddp2' backend and the number of gpus you want to use in the trainer.
+Lightning supports the use of Torch Distributed Elastic to enable fault-tolerant and elastic distributed job scheduling. To use it, specify the 'ddp' or 'ddp2' backend and the number of GPUs you want to use in the trainer.

 .. code-block:: python

--- a/docs/source/accelerators/ipu.rst
+++ b/docs/source/accelerators/ipu.rst
@ -34,7 +34,7 @@ Specify the number of IPUs to train with. Note that when training with IPUs, you

 .. code-block:: python

-    trainer = pl.Trainer(ipus=8)  # Train using data parallel on 8 IPUs
+    trainer = pl.Trainer(accelerator="ipu", devices=8)  # Train using data parallel on 8 IPUs

 IPUs only support specifying a single number to allocate devices, which is handled via the underlying libraries.

@ -53,7 +53,7 @@ set the precision flag.
    import pytorch_lightning as pl

    model = MyLightningModule()
-    trainer = pl.Trainer(ipus=8, precision=16)
+    trainer = pl.Trainer(accelerator="ipu", devices=8, precision=16)
    trainer.fit(model)

 You can also use pure 16-bit training, where the weights are also in 16-bit precision.
@ -65,7 +65,7 @@ You can also use pure 16-bit training, where the weights are also in 16-bit prec

    model = MyLightningModule()
    model = model.half()
-    trainer = pl.Trainer(ipus=8, precision=16)
+    trainer = pl.Trainer(accelerator="ipu", devices=8, precision=16)
    trainer.fit(model)

 Advanced IPU options
@ -83,7 +83,7 @@ IPUs provide further optimizations to speed up training. By using the ``IPUStrat
    from pytorch_lightning.strategies import IPUStrategy

    model = MyLightningModule()
-    trainer = pl.Trainer(ipus=8, strategy=IPUStrategy(device_iterations=32))
+    trainer = pl.Trainer(accelerator="ipu", devices=8, strategy=IPUStrategy(device_iterations=32))
    trainer.fit(model)

 Note that by default we return the last device iteration loss. You can override this by passing in your own ``poptorch.Options`` and setting the AnchorMode as described in the `PopTorch documentation <https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/reference.html#poptorch.Options.anchorMode>`__.
@ -102,7 +102,9 @@ Note that by default we return the last device iteration loss. You can override
    training_opts.anchorMode(poptorch.AnchorMode.All)
    training_opts.deviceIterations(32)

-    trainer = Trainer(ipus=8, strategy=IPUStrategy(inference_opts=inference_opts, training_opts=training_opts))
+    trainer = Trainer(
+        accelerator="ipu", devices=8, strategy=IPUStrategy(inference_opts=inference_opts, training_opts=training_opts)
+    )
    trainer.fit(model)

 You can also override all options by passing the ``poptorch.Options`` to the plugin. See `PopTorch options documentation <https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/batching.html>`__ for more information.
@ -124,7 +126,7 @@ Lightning supports dumping all reports to a directory to open using the tool.
    from pytorch_lightning.strategies import IPUStrategy

    model = MyLightningModule()
-    trainer = pl.Trainer(ipus=8, strategy=IPUStrategy(autoreport_dir="report_dir/"))
+    trainer = pl.Trainer(accelerator="ipu", devices=8, strategy=IPUStrategy(autoreport_dir="report_dir/"))
    trainer.fit(model)

 This will dump all reports to ``report_dir/`` which can then be opened using the Graph Analyser Tool, see `Opening Reports <https://docs.graphcore.ai/projects/graph-analyser-userguide/en/latest/graph-analyser.html#opening-reports>`__.
@ -142,7 +144,7 @@ Below is an example using the block annotation in a LightningModule.

    Currently, when using model parallelism we do not infer the number of IPUs required for you. This is done via the annotations themselves. If you specify 4 different IDs when defining Blocks, this means your model will be split onto 4 different IPUs.

-    This is also mutually exclusive with the Trainer flag. In other words, if your model is split onto 2 IPUs and you set ``Trainer(ipus=4)`` this will require 8 IPUs in total: data parallelism will be used to replicate the two-IPU model 4 times.
+    This is also mutually exclusive with the Trainer flag. In other words, if your model is split onto 2 IPUs and you set ``Trainer(accelerator="ipu", devices=4)`` this will require 8 IPUs in total: data parallelism will be used to replicate the two-IPU model 4 times.

    When pipelining the model you must also increase the `device_iterations` to ensure full data saturation of the devices data, i.e whilst one device in the model pipeline processes a batch of data, the other device can start on the next batch. For example if the model is split onto 4 IPUs, we require `device_iterations` to be at-least 4.

@ -174,7 +176,7 @@ Below is an example using the block annotation in a LightningModule.


    model = MyLightningModule()
-    trainer = pl.Trainer(ipus=8, strategy=IPUStrategy(device_iterations=20))
+    trainer = pl.Trainer(accelerator="ipu", devices=8, strategy=IPUStrategy(device_iterations=20))
    trainer.fit(model)


@ -217,7 +219,7 @@ You can also use the block context manager within the forward function, or any o


    model = MyLightningModule()
-    trainer = pl.Trainer(ipus=8, strategy=IPUStrategy(device_iterations=20))
+    trainer = pl.Trainer(accelerator="ipu", devices=8, strategy=IPUStrategy(device_iterations=20))
    trainer.fit(model)


--- a/docs/source/accelerators/tpu.rst
+++ b/docs/source/accelerators/tpu.rst
@ -127,7 +127,7 @@ TPU core training

 Lightning supports training on a single TPU core or 8 TPU cores.

-The Trainer parameters ``tpu_cores`` defines how many TPU cores to train on (1 or 8) / Single TPU to train on [1].
+The Trainer parameters ``devices`` along with ``accelerator="tpu"`` defines how many TPU cores to train on (1 or 8) / Single TPU to train on [1].

 For Single TPU training, Just pass the TPU core ID [1-8] in a list.

--- a/docs/source/advanced/model_parallel.rst
+++ b/docs/source/advanced/model_parallel.rst
@ -732,7 +732,8 @@ When enabled, it can result in a performance hit and can be disabled in most cas
    from pytorch_lightning.strategies import DDPStrategy

    trainer = pl.Trainer(
-        gpus=2,
+        accelerator="gpu",
+        devices=2,
        strategy=DDPStrategy(find_unused_parameters=False),
    )

@ -741,7 +742,8 @@ When enabled, it can result in a performance hit and can be disabled in most cas
    from pytorch_lightning.strategies import DDPSpawnStrategy

    trainer = pl.Trainer(
-        gpus=2,
+        accelerator="gpu",
+        devices=2,
        strategy=DDPSpawnStrategy(find_unused_parameters=False),
    )

@ -894,7 +896,8 @@ When using Post-localSGD, you must also pass ``model_averaging_period`` to allow

    model = MyModel()
    trainer = Trainer(
-        gpus=4,
+        accelerator="gpu",
+        devices=4,
        strategy=DDPStrategy(
            ddp_comm_state=post_localSGD.PostLocalSGDState(
                process_group=None,
--- a/docs/source/clouds/cloud_training.rst
+++ b/docs/source/clouds/cloud_training.rst
@ -32,7 +32,7 @@ You can launch any Lightning model on Grid using the Grid `CLI <https://pypi.org

 .. code-block:: bash

-    grid run --instance_type v100 --gpus 4 my_model.py --gpus 4 --learning_rate 'uniform(1e-6, 1e-1, 20)' --layers '[2, 4, 8, 16]'
+    grid run --instance_type v100 --gpus 4 my_model.py --accelerator 'gpu' --devices 4 --learning_rate 'uniform(1e-6, 1e-1, 20)' --layers '[2, 4, 8, 16]'

 You can also start runs or interactive sessions from the `Grid platform <https://platform.grid.ai>`_, where you can upload datasets, view artifacts, view the logs, the cost, log into tensorboard, and so much more.

--- a/docs/source/common/trainer.rst
+++ b/docs/source/common/trainer.rst
@ -217,7 +217,7 @@ as well as custom accelerator instances.
    # CPU accelerator
    trainer = Trainer(accelerator="cpu")

-    # Training with GPU Accelerator using 2 gpus
+    # Training with GPU Accelerator using 2 GPUs
    trainer = Trainer(devices=2, accelerator="gpu")

    # Training with TPU Accelerator using 8 tpu cores
@ -350,16 +350,16 @@ auto_select_gpus

 |

-If enabled and `gpus` is an integer, pick available gpus automatically.
+If enabled and ``devices`` is an integer, pick available GPUs automatically.
 This is especially useful when GPUs are configured to be in "exclusive mode",
 such that only one process at a time can access them.

 Example::

-    # no auto selection (picks first 2 gpus on system, may fail if other process is occupying)
+    # no auto selection (picks first 2 GPUs on system, may fail if other process is occupying)
    trainer = Trainer(accelerator="gpu", devices=2, auto_select_gpus=False)

-    # enable auto selection (will find two available gpus on system)
+    # enable auto selection (will find two available GPUs on system)
    trainer = Trainer(accelerator="gpu", devices=2, auto_select_gpus=True)

    # specifies all GPUs regardless of its availability
@ -696,8 +696,8 @@ See Also:
 gpus
 ^^^^

-.. warning:: Setting `Trainer(gpus=x)` is deprecated in v1.6 and will be removed"
-    in v2.0. Please use `Trainer(accelerator='gpu', devices=x)` instead.
+.. warning:: Setting `Trainer(gpus=x)` is deprecated in v1.6 and will be removed
+    in v2.0. Please use `Trainer(accelerator="gpu", devices=x)` instead.

 .. raw:: html

@ -1189,7 +1189,7 @@ Half precision, or mixed precision, is the combined use of 32 and 16 bit floatin
    trainer = Trainer(precision=32)

    # 16-bit precision
-    trainer = Trainer(precision=16, gpus=1)  # works only on CUDA
+    trainer = Trainer(precision=16, accelerator="gpu", devices=1)  # works only on CUDA

    # bfloat16 precision
    trainer = Trainer(precision="bf16")
@ -1214,7 +1214,7 @@ Half precision, or mixed precision, is the combined use of 32 and 16 bit floatin
        :skipif: not _APEX_AVAILABLE or not torch.cuda.is_available()

        # turn on 16-bit
-        trainer = Trainer(amp_backend="apex", amp_level="O2", precision=16, gpus=1)
+        trainer = Trainer(amp_backend="apex", amp_level="O2", precision=16, accelerator="gpu", devices=1)


 process_position
@ -1412,7 +1412,7 @@ Supports passing different training strategies with aliases (ddp, ddp_spawn, etc

 .. code-block:: python

-    # Training with the DistributedDataParallel strategy on 4 gpus
+    # Training with the DistributedDataParallel strategy on 4 GPUs
    trainer = Trainer(strategy="ddp", accelerator="gpu", devices=4)

    # Training with the DDP Spawn strategy using 4 cpu processes
--- a/docs/source/guides/speed.rst
+++ b/docs/source/guides/speed.rst
@ -37,10 +37,10 @@ Lightning supports a variety of plugins to speed up distributed GPU training. Mo
    # run on 1 gpu
    trainer = Trainer(accelerator="gpu", devices=1)

-    # train on 8 gpus, using the DDP strategy
+    # train on 8 GPUs, using the DDP strategy
    trainer = Trainer(accelerator="gpu", devices=8, strategy="ddp")

-    # train on multiple GPUs across nodes (uses 8 gpus in total)
+    # train on multiple GPUs across nodes (uses 8 GPUs in total)
    trainer = Trainer(accelerator="gpu", devices=2, num_nodes=4)


@ -140,7 +140,7 @@ This is a limitation of Python ``.spawn()`` and PyTorch.
 TPU Training
 ============

-You can set the ``tpu_cores`` trainer flag to 1, [7] (specific core) or eight cores.
+You can set the ``devices`` trainer argument to 1, [7] (specific core) or eight cores.

 .. code-block:: python

@ -214,7 +214,7 @@ Lightning offers mixed precision training for GPUs and CPUs, as well as bfloat16
    :skipif: torch.cuda.device_count() < 4

    # 16-bit precision
-    trainer = Trainer(precision=16, gpus=4)
+    trainer = Trainer(precision=16, accelerator="gpu", devices=4)


 Read more about :ref:`mixed-precision training <amp>`.
@ -361,7 +361,7 @@ Here is an example of an advanced use case:

 .. testcode::

-    # Scenario for a GAN with gradient accumulation every two batches and optimized for multiple gpus.
+    # Scenario for a GAN with gradient accumulation every two batches and optimized for multiple GPUs.
    class SimpleGAN(LightningModule):
        def __init__(self):
            super().__init__()
--- a/docs/source/starter/introduction.rst
+++ b/docs/source/starter/introduction.rst
@ -390,10 +390,10 @@ CPU
    trainer = Trainer()

    # train on 8 CPUs
-    trainer = Trainer(num_processes=8)
+    trainer = Trainer(accelerator="cpu", devices=8)

    # train on 1024 CPUs across 128 machines
-    trainer = pl.Trainer(num_processes=8, num_nodes=128)
+    trainer = pl.Trainer(accelerator="cpu", devices=8, num_nodes=128)

 GPU
 ---
@ -403,10 +403,10 @@ GPU
    # train on 1 GPU
    trainer = pl.Trainer(accelerator="gpu", devices=1)

-    # train on multiple GPUs across nodes (32 gpus here)
+    # train on multiple GPUs across nodes (32 GPUs here)
    trainer = pl.Trainer(accelerator="gpu", devices=4, num_nodes=8)

-    # train on gpu 1, 3, 5 (3 gpus total)
+    # train on gpu 1, 3, 5 (3 GPUs total)
    trainer = pl.Trainer(accelerator="gpu", devices=[1, 3, 5])

    # Multi GPU with mixed precision
@ -437,7 +437,7 @@ IPU
 .. code-block:: python

    # Train on IPUs
-    trainer = pl.Trainer(ipus=8)
+    trainer = pl.Trainer(accelerator="ipu", devices=8)


 Checkpointing
--- a/docs/source/starter/lightning_lite.rst
+++ b/docs/source/starter/lightning_lite.rst
@ -182,7 +182,7 @@ Here is an example while running on 256 GPUs (eight GPUs times 32 nodes).
            self.barrier()


-    Lite(strategy="ddp", gpus=8, num_nodes=32, accelerator="gpu").run()
+    Lite(strategy="ddp", devices=8, num_nodes=32, accelerator="gpu").run()


 If you require custom data or model device placement, you can deactivate
--- a/pl_examples/basic_examples/README.md
+++ b/pl_examples/basic_examples/README.md
@ -50,7 +50,7 @@ This script shows you the result of the conversion to the `LightningModule` and
 python mnist_examples/image_classifier_4_lightning_module.py

 # GPUs (any number)
-python mnist_examples/image_classifier_4_lightning_module.py --trainer.gpus 2
+python mnist_examples/image_classifier_4_lightning_module.py --trainer.accelerator 'gpu' --trainer.devices 2
 ```

 ______________________________________________________________________
@ -64,10 +64,10 @@ This script shows you how to extract the data related components into a `Lightni
 python mnist_examples/image_classifier_5_lightning_datamodule.py

 # GPUs (any number)
-python mnist_examples/image_classifier_5_lightning_datamodule.py --trainer.gpus 2
+python mnist_examples/image_classifier_5_lightning_datamodule.py --trainer.accelerator 'gpu' --trainer.devices 2

 # Distributed Data Parallel (DDP)
-python mnist_examples/image_classifier_5_lightning_datamodule.py --trainer.gpus 2 --trainer.strategy 'ddp'
+python mnist_examples/image_classifier_5_lightning_datamodule.py --trainer.accelerator 'gpu' --trainer.devices 2 --trainer.strategy 'ddp'
 ```

 ______________________________________________________________________
@ -81,10 +81,10 @@ This script shows you how to implement a CNN auto-encoder.
 python autoencoder.py

 # GPUs (any number)
-python autoencoder.py --trainer.gpus 2
+python autoencoder.py --trainer.accelerator 'gpu' --trainer.devices 2

 # Distributed Data Parallel (DDP)
-python autoencoder.py --trainer.gpus 2 --trainer.strategy 'ddp'
+python autoencoder.py --trainer.accelerator 'gpu' --trainer.devices 2 --trainer.strategy 'ddp'
 ```

 ______________________________________________________________________
@ -99,10 +99,10 @@ A system describes a `LightningModule` which takes a single `torch.nn.Module` wh
 python backbone_image_classifier.py

 # GPUs (any number)
-python backbone_image_classifier.py --trainer.gpus 2
+python backbone_image_classifier.py --trainer.accelerator 'gpu' --trainer.devices 2

 # Distributed Data Parallel (DDP)
-python backbone_image_classifier.py --trainer.gpus 2 --trainer.strategy 'ddp'
+python backbone_image_classifier.py --trainer.accelerator 'gpu' --trainer.devices 2 --trainer.strategy 'ddp'
 ```

 ______________________________________________________________________
--- a/pl_examples/basic_examples/mnist_examples/README.md
+++ b/pl_examples/basic_examples/mnist_examples/README.md
@ -46,7 +46,7 @@ This script shows you the result of the conversion to the `LightningModule` and
 python image_classifier_4_lightning_module.py

 # GPUs (any number)
-python image_classifier_4_lightning_module.py --trainer.gpus 2
+python image_classifier_4_lightning_module.py --trainer.accelerator 'gpu' --trainer.devices 2
 ```

 ______________________________________________________________________
@ -60,8 +60,8 @@ This script shows you how to extract the data related components into a `Lightni
 python image_classifier_5_lightning_datamodule.py

 # GPUs (any number)
-python image_classifier_5_lightning_datamodule.py --trainer.gpus 2
+python image_classifier_5_lightning_datamodule.py --trainer.accelerator 'gpu' --trainer.devices 2

 # Distributed Data parallel
-python image_classifier_5_lightning_datamodule.py --trainer.gpus 2 --trainer.strategy 'ddp'
+python image_classifier_5_lightning_datamodule.py --trainer.accelerator 'gpu' --trainer.devices 2 --trainer.strategy 'ddp'
 ```
--- a/pl_examples/basic_examples/profiler_example.py
+++ b/pl_examples/basic_examples/profiler_example.py
@ -40,7 +40,8 @@ DEFAULT_CMD_LINE = (
    "--trainer.limit_train_batches=15",
    "--trainer.limit_val_batches=15",
    "--trainer.profiler=pytorch",
-    f"--trainer.gpus={int(torch.cuda.is_available())}",
+    "--trainer.accelerator=gpu",
+    f"--trainer.devices={int(torch.cuda.is_available())}",
 )


--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@ -78,7 +78,7 @@ if __name__ == "__main__":

    model = LitClassifier()

-    trainer = pl.Trainer(max_epochs=2, ipus=8)
+    trainer = pl.Trainer(max_epochs=2, accelerator="ipu", devices=8)

    trainer.fit(model, datamodule=dm)
    trainer.test(model, datamodule=dm)
--- a/pl_examples/test_examples.py
+++ b/pl_examples/test_examples.py
@ -28,7 +28,7 @@ ARGS_DEFAULT = (
    "--trainer.limit_predict_batches 2 "
    "--data.batch_size 32 "
 )
-ARGS_GPU = ARGS_DEFAULT + "--trainer.gpus 1 "
+ARGS_GPU = ARGS_DEFAULT + "--trainer.accelerator gpu --trainer.devices 1 "


@pytest.mark.skipif(not _DALI_AVAILABLE, reason="Nvidia DALI required")