Rename `fabric run model` to `fabric run` (#19527)

This commit is contained in:
awaelchli 2024-02-27 17:36:46 +01:00 committed by GitHub
parent e461e90f84
commit ea89133c65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 60 additions and 72 deletions

View File

@ -67,7 +67,7 @@ An alternative way to launch your Python script in multiple processes is to use
.. code-block:: bash
fabric run model path/to/your/script.py
fabric run path/to/your/script.py
This is essentially the same as running ``python path/to/your/script.py``, but it also lets you configure the following settings externally without changing your code:
@ -80,9 +80,9 @@ This is essentially the same as running ``python path/to/your/script.py``, but i
.. code-block:: bash
fabric run model --help
fabric run --help
Usage: fabric run model [OPTIONS] SCRIPT [SCRIPT_ARGS]...
Usage: fabric run [OPTIONS] SCRIPT [SCRIPT_ARGS]...
Run a Lightning Fabric script.
@ -128,7 +128,7 @@ Here is how you run DDP with 8 GPUs and `torch.bfloat16 <https://pytorch.org/doc
.. code-block:: bash
fabric run model ./path/to/train.py \
fabric run ./path/to/train.py \
--strategy=ddp \
--devices=8 \
--accelerator=cuda \
@ -138,7 +138,7 @@ Or `DeepSpeed Zero3 <https://www.deepspeed.ai/2021/03/07/zero3-offload.html>`_ w
.. code-block:: bash
fabric run model ./path/to/train.py \
fabric run ./path/to/train.py \
--strategy=deepspeed_stage_3 \
--devices=8 \
--accelerator=cuda \
@ -148,7 +148,7 @@ Or `DeepSpeed Zero3 <https://www.deepspeed.ai/2021/03/07/zero3-offload.html>`_ w
.. code-block:: bash
fabric run model ./path/to/train.py \
fabric run ./path/to/train.py \
--devices=auto \
--accelerator=auto \
--precision=16

View File

@ -66,7 +66,7 @@ The same values can also be set through the :doc:`command line interface <launch
.. code-block:: bash
lightning run model train.py --precision=bf16-mixed
fabric run train.py --precision=bf16-mixed
.. note::

View File

@ -72,7 +72,7 @@ Log in to the **first node** and run this command:
.. code-block:: bash
:emphasize-lines: 2,3
lightning run model \
fabric run \
--node-rank=0 \
--main-address=10.10.10.16 \
--accelerator=cuda \
@ -85,7 +85,7 @@ Log in to the **second node** and run this command:
.. code-block:: bash
:emphasize-lines: 2,3
lightning run model \
fabric run \
--node-rank=1 \
--main-address=10.10.10.16 \
--accelerator=cuda \
@ -129,7 +129,7 @@ The most likely reasons and how to fix it:
export GLOO_SOCKET_IFNAME=eno1
export NCCL_SOCKET_IFNAME=eno1
lightning run model ...
fabric run ...
You can find the interface name by parsing the output of the ``ifconfig`` command.
The name of this interface **may differ on each node**.
@ -152,7 +152,7 @@ Launch your command by prepending ``NCCL_DEBUG=INFO`` to get more info.
.. code-block:: bash
NCCL_DEBUG=INFO lightning run model ...
NCCL_DEBUG=INFO fabric run ...
----

View File

@ -27,11 +27,11 @@ This script shows you how to scale the pure PyTorch code to enable GPU and multi
```bash
# CPU
lightning run model train_fabric.py
fabric run train_fabric.py
# GPU (CUDA or M1 Mac)
lightning run model train_fabric.py --accelerator=gpu
fabric run train_fabric.py --accelerator=gpu
# Multiple GPUs
lightning run model train_fabric.py --accelerator=gpu --devices=4
fabric run train_fabric.py --accelerator=gpu --devices=4
```

View File

@ -20,10 +20,10 @@
3. Apply ``setup`` over each model and optimizers pair, ``setup_dataloaders`` on all your dataloaders,
and replace ``loss.backward()`` with ``self.backward(loss)``.
4. Run the script from the terminal using ``lightning run model path/to/train.py``
4. Run the script from the terminal using ``fabric run path/to/train.py``
Accelerate your training loop by setting the ``--accelerator``, ``--strategy``, ``--devices`` options directly from
the command line. See ``lightning run model --help`` or learn more from the documentation:
the command line. See ``fabric run --help`` or learn more from the documentation:
https://lightning.ai/docs/fabric.
"""
@ -71,7 +71,7 @@ class Net(nn.Module):
def run(hparams):
# Create the Lightning Fabric object. The parameters like accelerator, strategy, devices etc. will be proided
# by the command line. See all options: `lightning run model --help`
# by the command line. See all options: `fabric run --help`
fabric = Fabric()
seed_everything(hparams.seed) # instead of torch.manual_seed(...)
@ -168,7 +168,7 @@ def run(hparams):
if __name__ == "__main__":
# Arguments can be passed in through the CLI as normal and will be parsed here
# Example:
# lightning run model image_classifier.py accelerator=cuda --epochs=3
# fabric run image_classifier.py accelerator=cuda --epochs=3
parser = argparse.ArgumentParser(description="Fabric MNIST Example")
parser.add_argument(
"--batch-size", type=int, default=64, metavar="N", help="input batch size for training (default: 64)"

View File

@ -14,13 +14,13 @@ This script shows you how to scale the pure PyTorch code to enable GPU and multi
```bash
# CPU
lightning run model train_fabric.py
fabric run train_fabric.py
# GPU (CUDA or M1 Mac)
lightning run model train_fabric.py --accelerator=gpu
fabric run train_fabric.py --accelerator=gpu
# Multiple GPUs
lightning run model train_fabric.py --accelerator=gpu --devices=4
fabric run train_fabric.py --accelerator=gpu --devices=4
```
### References

View File

@ -107,7 +107,7 @@ def validate_dataloader(model, data_loader, fabric, hparams, fold, acc_metric):
def run(hparams):
# Create the Lightning Fabric object. The parameters like accelerator, strategy, devices etc. will be proided
# by the command line. See all options: `lightning run model --help`
# by the command line. See all options: `fabric run --help`
fabric = Fabric()
seed_everything(hparams.seed) # instead of torch.manual_seed(...)
@ -171,7 +171,7 @@ def run(hparams):
if __name__ == "__main__":
# Arguments can be passed in through the CLI as normal and will be parsed here
# Example:
# lightning run model image_classifier.py accelerator=cuda --epochs=3
# fabric run image_classifier.py accelerator=cuda --epochs=3
parser = argparse.ArgumentParser(description="Fabric MNIST K-Fold Cross Validation Example")
parser.add_argument(
"--batch-size", type=int, default=64, metavar="N", help="input batch size for training (default: 64)"

View File

@ -7,11 +7,11 @@ It is a simplified version of the [official PyTorch example](https://github.com/
```bash
# CPU
lightning run model --accelerator=cpu train.py
fabric run --accelerator=cpu train.py
# GPU (CUDA or M1 Mac)
lightning run model --accelerator=gpu train.py
fabric run --accelerator=gpu train.py
# Multiple GPUs
lightning run model --accelerator=gpu --devices=4 train.py
fabric run --accelerator=gpu --devices=4 train.py
```

View File

@ -33,7 +33,7 @@ torchrun --nproc_per_node=2 --standalone train_torch.py
**Accelerated using Lightning Fabric:**
```bash
lightning run model train_fabric.py --devices 2 --strategy ddp --accelerator cpu
fabric run train_fabric.py --devices 2 --strategy ddp --accelerator cpu
```
### References

View File

@ -12,7 +12,7 @@ Requirements:
- gym<=0.22
Run it with:
lightning run model train_fabric.py --accelerator=cuda --devices=2 --strategy=ddp
fabric run train_fabric.py --accelerator=cuda --devices=2 --strategy=ddp
"""
import cherry
@ -59,7 +59,7 @@ def main(
seed=42,
):
# Create the Fabric object
# Arguments get parsed from the command line, see `lightning run model --help`
# Arguments get parsed from the command line, see `fabric run --help`
fabric = Fabric()
meta_batch_size = meta_batch_size // fabric.world_size

View File

@ -40,7 +40,7 @@ torchrun --nproc_per_node=2 --standalone train_torch.py
### Lightning Fabric:
```bash
lightning run model --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py
fabric run --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py
```
### Visualizing logs
@ -71,7 +71,7 @@ The following video shows a trained agent on the [LunarLander-v2 environment](ht
The agent was trained with the following:
```bash
lightning run model \
fabric run \
--accelerator=cpu \
--strategy=ddp \
--devices=2 \
@ -98,25 +98,25 @@ where, differently from the previous example, we have completely decoupled the e
So for example:
```bash
lightning run model --devices=3 train_fabric_decoupled.py --num-envs 4
fabric run --devices=3 train_fabric_decoupled.py --num-envs 4
```
will spawn 3 processes, one is the Player and the others the Trainers, with the Player running 4 independent environments, where every process runs on the CPU;
```bash
lightning run model --devices=3 train_fabric_decoupled.py --num-envs 4 --cuda
fabric run --devices=3 train_fabric_decoupled.py --num-envs 4 --cuda
```
will instead run only the Trainers on the GPU.
If one wants to run both the Player and the Trainers on the GPU, then both the flags `--cuda` and `--player-on-gpu` must be provided:
```bash
lightning run model --devices=3 train_fabric_decoupled.py --num-envs 4 --cuda --player-on-gpu
fabric run --devices=3 train_fabric_decoupled.py --num-envs 4 --cuda --player-on-gpu
```
> **Warning**
>
> With this second example, there is no need for the user to provide the `accelerator` and the `strategy` to the `lightning run model` script.
> With this second example, there is no need for the user to provide the `accelerator` and the `strategy` to the `fabric run` script.
## Number of updates, environment steps and share data

View File

@ -14,7 +14,7 @@ Requirements:
Run it with:
lightning run model --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py
fabric run --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py
"""
import argparse

View File

@ -14,7 +14,7 @@ Requirements:
Run it with:
lightning run model --devices=2 train_fabric_decoupled.py
fabric run --devices=2 train_fabric_decoupled.py
"""
import argparse

View File

@ -18,7 +18,6 @@ from pathlib import Path
from typing import Tuple, Union
import click
from lightning_utilities.core.imports import RequirementCache
from requests.exceptions import ConnectionError
import lightning.app.core.constants as constants
@ -303,13 +302,6 @@ def run_app(
)
if RequirementCache("lightning-fabric>=1.9.0") or RequirementCache("lightning>=1.9.0"):
# note it is automatically replaced to `from lightning.fabric.cli` when building monolithic/mirror package
from lightning.fabric.cli import _run_model
run.add_command(_run_model)
@_main.command("open", hidden=True)
@click.argument("path", type=str, default=".")
@click.option("--name", help="The name to use for the CloudSpace", default="", type=str)

View File

@ -17,7 +17,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
### Changed
- Renamed `lightning run model` to `fabric run model` ([#19442](https://github.com/Lightning-AI/pytorch-lightning/pull/19442))
- Renamed `lightning run model` to `fabric run` ([#19442](https://github.com/Lightning-AI/pytorch-lightning/pull/19442), [#19527](https://github.com/Lightning-AI/pytorch-lightning/pull/19527))
- The `Fabric.rank_zero_first` context manager now uses a barrier without timeout to avoid long-running tasks to be interrupted ([#19448](https://github.com/Lightning-AI/lightning/pull/19448))

View File

@ -55,7 +55,7 @@ if _CLICK_AVAILABLE:
"""
print(
"`lightning run model` is deprecated and will be removed in future versions."
" Please call `fabric run model` instead."
" Please call `fabric run` instead."
)
args = sys.argv[1:]
if args and args[0] == "run" and args[1] == "model":
@ -70,12 +70,8 @@ if _CLICK_AVAILABLE:
def _main() -> None:
pass
@_main.group()
def run() -> None:
pass
@run.command(
"model",
@_main.command(
"run",
context_settings={
"ignore_unknown_options": True,
},
@ -146,7 +142,7 @@ if _CLICK_AVAILABLE:
),
)
@click.argument("script_args", nargs=-1, type=click.UNPROCESSED)
def _run_model(**kwargs: Any) -> None:
def _run(**kwargs: Any) -> None:
"""Run a Lightning Fabric script.
SCRIPT is the path to the Python script with the code to run. The script must contain a Fabric object.
@ -225,4 +221,4 @@ if __name__ == "__main__":
)
raise SystemExit(1)
_run_model()
_run()

View File

@ -839,7 +839,7 @@ class Fabric:
Returns the output of the function that ran in worker process with rank 0.
The ``launch()`` method should only be used if you intend to specify accelerator, devices, and so on in
the code (programmatically). If you are launching with the Lightning CLI, ``lightning run model ...``, remove
the code (programmatically). If you are launching with the Lightning CLI, ``fabric run ...``, remove
``launch()`` from your code.
The ``launch()`` is a no-op when called multiple times and no function is passed in.
@ -1028,7 +1028,7 @@ class Fabric:
if not self._launched and not isinstance(self._strategy, (SingleDeviceStrategy, DataParallelStrategy)):
raise RuntimeError(
"To use Fabric with more than one device, you must call `.launch()` or use the CLI:"
" `lightning run model --help`."
" `fabric run --help`."
)
def _validate_setup(self, module: nn.Module, optimizers: Sequence[Optimizer]) -> None:

View File

@ -20,7 +20,7 @@ from unittest import mock
from unittest.mock import Mock
import pytest
from lightning.fabric.cli import _get_supported_strategies, _run_model
from lightning.fabric.cli import _get_supported_strategies, _run
from tests_fabric.helpers.runif import RunIf
@ -36,7 +36,7 @@ def fake_script(tmp_path):
def test_cli_env_vars_defaults(monkeypatch, fake_script):
monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script])
_run.main([fake_script])
assert e.value.code == 0
assert os.environ["LT_CLI_USED"] == "1"
assert "LT_ACCELERATOR" not in os.environ
@ -52,7 +52,7 @@ def test_cli_env_vars_defaults(monkeypatch, fake_script):
def test_cli_env_vars_accelerator(_, accelerator, monkeypatch, fake_script):
monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script, "--accelerator", accelerator])
_run.main([fake_script, "--accelerator", accelerator])
assert e.value.code == 0
assert os.environ["LT_ACCELERATOR"] == accelerator
@ -63,7 +63,7 @@ def test_cli_env_vars_accelerator(_, accelerator, monkeypatch, fake_script):
def test_cli_env_vars_strategy(_, strategy, monkeypatch, fake_script):
monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script, "--strategy", strategy])
_run.main([fake_script, "--strategy", strategy])
assert e.value.code == 0
assert os.environ["LT_STRATEGY"] == strategy
@ -79,7 +79,7 @@ def test_cli_get_supported_strategies():
def test_cli_env_vars_unsupported_strategy(strategy, fake_script):
ioerr = StringIO()
with pytest.raises(SystemExit) as e, contextlib.redirect_stderr(ioerr):
_run_model.main([fake_script, "--strategy", strategy])
_run.main([fake_script, "--strategy", strategy])
assert e.value.code == 2
assert f"Invalid value for '--strategy': '{strategy}'" in ioerr.getvalue()
@ -90,7 +90,7 @@ def test_cli_env_vars_unsupported_strategy(strategy, fake_script):
def test_cli_env_vars_devices_cuda(_, devices, monkeypatch, fake_script):
monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script, "--accelerator", "cuda", "--devices", devices])
_run.main([fake_script, "--accelerator", "cuda", "--devices", devices])
assert e.value.code == 0
assert os.environ["LT_DEVICES"] == devices
@ -101,7 +101,7 @@ def test_cli_env_vars_devices_cuda(_, devices, monkeypatch, fake_script):
def test_cli_env_vars_devices_mps(accelerator, monkeypatch, fake_script):
monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script, "--accelerator", accelerator])
_run.main([fake_script, "--accelerator", accelerator])
assert e.value.code == 0
assert os.environ["LT_DEVICES"] == "1"
@ -111,7 +111,7 @@ def test_cli_env_vars_devices_mps(accelerator, monkeypatch, fake_script):
def test_cli_env_vars_num_nodes(num_nodes, monkeypatch, fake_script):
monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script, "--num-nodes", num_nodes])
_run.main([fake_script, "--num-nodes", num_nodes])
assert e.value.code == 0
assert os.environ["LT_NUM_NODES"] == num_nodes
@ -121,7 +121,7 @@ def test_cli_env_vars_num_nodes(num_nodes, monkeypatch, fake_script):
def test_cli_env_vars_precision(precision, monkeypatch, fake_script):
monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script, "--precision", precision])
_run.main([fake_script, "--precision", precision])
assert e.value.code == 0
assert os.environ["LT_PRECISION"] == precision
@ -131,7 +131,7 @@ def test_cli_torchrun_defaults(monkeypatch, fake_script):
torchrun_mock = Mock()
monkeypatch.setitem(sys.modules, "torch.distributed.run", torchrun_mock)
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script])
_run.main([fake_script])
assert e.value.code == 0
torchrun_mock.main.assert_called_with([
"--nproc_per_node=1",
@ -159,7 +159,7 @@ def test_cli_torchrun_num_processes_launched(_, devices, expected, monkeypatch,
torchrun_mock = Mock()
monkeypatch.setitem(sys.modules, "torch.distributed.run", torchrun_mock)
with pytest.raises(SystemExit) as e:
_run_model.main([fake_script, "--accelerator", "cuda", "--devices", devices])
_run.main([fake_script, "--accelerator", "cuda", "--devices", devices])
assert e.value.code == 0
torchrun_mock.main.assert_called_with([
f"--nproc_per_node={expected}",
@ -172,9 +172,9 @@ def test_cli_torchrun_num_processes_launched(_, devices, expected, monkeypatch,
def test_cli_through_fabric_entry_point():
result = subprocess.run("fabric run model --help", capture_output=True, text=True, shell=True)
result = subprocess.run("fabric run --help", capture_output=True, text=True, shell=True)
message = "Usage: fabric run model [OPTIONS] SCRIPT [SCRIPT_ARGS]"
message = "Usage: fabric run [OPTIONS] SCRIPT [SCRIPT_ARGS]"
assert message in result.stdout or message in result.stderr
@ -184,8 +184,8 @@ def test_cli_through_lightning_entry_point():
deprecation_message = (
"`lightning run model` is deprecated and will be removed in future versions. "
"Please call `fabric run model` instead"
"Please call `fabric run` instead"
)
message = "Usage: lightning run model [OPTIONS] SCRIPT [SCRIPT_ARGS]"
message = "Usage: lightning run [OPTIONS] SCRIPT [SCRIPT_ARGS]"
assert deprecation_message in result.stdout
assert message in result.stdout or message in result.stderr