Rename `fabric run model` to `fabric run` (#19527)

2024-02-27 17:36:46 +01:00 · 2024-02-27 17:36:46 +01:00 · ea89133c65
parent e461e90f84
commit ea89133c65
18 changed files with 60 additions and 72 deletions
--- a/docs/source-fabric/fundamentals/launch.rst
+++ b/docs/source-fabric/fundamentals/launch.rst
@ -67,7 +67,7 @@ An alternative way to launch your Python script in multiple processes is to use

 .. code-block:: bash

-    fabric run model path/to/your/script.py
+    fabric run path/to/your/script.py

 This is essentially the same as running ``python path/to/your/script.py``, but it also lets you configure the following settings externally without changing your code:

@ -80,9 +80,9 @@ This is essentially the same as running ``python path/to/your/script.py``, but i

 .. code-block:: bash

-    fabric run model --help
+    fabric run --help

-    Usage: fabric run model [OPTIONS] SCRIPT [SCRIPT_ARGS]...
+    Usage: fabric run [OPTIONS] SCRIPT [SCRIPT_ARGS]...

      Run a Lightning Fabric script.

@ -128,7 +128,7 @@ Here is how you run DDP with 8 GPUs and `torch.bfloat16 <https://pytorch.org/doc

 .. code-block:: bash

-    fabric run model ./path/to/train.py \
+    fabric run ./path/to/train.py \
        --strategy=ddp \
        --devices=8 \
        --accelerator=cuda \
@ -138,7 +138,7 @@ Or `DeepSpeed Zero3 <https://www.deepspeed.ai/2021/03/07/zero3-offload.html>`_ w

 .. code-block:: bash

-     fabric run model ./path/to/train.py \
+     fabric run ./path/to/train.py \
        --strategy=deepspeed_stage_3 \
        --devices=8 \
        --accelerator=cuda \
@ -148,7 +148,7 @@ Or `DeepSpeed Zero3 <https://www.deepspeed.ai/2021/03/07/zero3-offload.html>`_ w

 .. code-block:: bash

-    fabric run model ./path/to/train.py \
+    fabric run ./path/to/train.py \
        --devices=auto \
        --accelerator=auto \
        --precision=16
--- a/docs/source-fabric/fundamentals/precision.rst
+++ b/docs/source-fabric/fundamentals/precision.rst
@ -66,7 +66,7 @@ The same values can also be set through the :doc:`command line interface <launch

 .. code-block:: bash

-    lightning run model train.py --precision=bf16-mixed
+    fabric run train.py --precision=bf16-mixed


 .. note::
--- a/docs/source-fabric/guide/multi_node/barebones.rst
+++ b/docs/source-fabric/guide/multi_node/barebones.rst
@ -72,7 +72,7 @@ Log in to the **first node** and run this command:
 .. code-block:: bash
    :emphasize-lines: 2,3

-    lightning run model \
+    fabric run \
        --node-rank=0  \
        --main-address=10.10.10.16 \
        --accelerator=cuda \
@ -85,7 +85,7 @@ Log in to the **second node** and run this command:
 .. code-block:: bash
    :emphasize-lines: 2,3

-    lightning run model \
+    fabric run \
        --node-rank=1  \
        --main-address=10.10.10.16 \
        --accelerator=cuda \
@ -129,7 +129,7 @@ The most likely reasons and how to fix it:

    export GLOO_SOCKET_IFNAME=eno1
    export NCCL_SOCKET_IFNAME=eno1
-    lightning run model ...
+    fabric run ...

  You can find the interface name by parsing the output of the ``ifconfig`` command.
  The name of this interface **may differ on each node**.
@ -152,7 +152,7 @@ Launch your command by prepending ``NCCL_DEBUG=INFO`` to get more info.

 .. code-block:: bash

-    NCCL_DEBUG=INFO lightning run model ...
+    NCCL_DEBUG=INFO fabric run ...


 ----
--- a/examples/fabric/image_classifier/README.md
+++ b/examples/fabric/image_classifier/README.md
@ -27,11 +27,11 @@ This script shows you how to scale the pure PyTorch code to enable GPU and multi

 ```bash
 # CPU
-lightning run model train_fabric.py
+fabric run train_fabric.py

 # GPU (CUDA or M1 Mac)
-lightning run model train_fabric.py --accelerator=gpu
+fabric run train_fabric.py --accelerator=gpu

 # Multiple GPUs
-lightning run model train_fabric.py --accelerator=gpu --devices=4
+fabric run train_fabric.py --accelerator=gpu --devices=4
 ```
--- a/examples/fabric/image_classifier/train_fabric.py
+++ b/examples/fabric/image_classifier/train_fabric.py
@ -20,10 +20,10 @@
 3. Apply ``setup`` over each model and optimizers pair, ``setup_dataloaders`` on all your dataloaders,
 and replace ``loss.backward()`` with ``self.backward(loss)``.

-4. Run the script from the terminal using ``lightning run model path/to/train.py``
+4. Run the script from the terminal using ``fabric run path/to/train.py``

 Accelerate your training loop by setting the ``--accelerator``, ``--strategy``, ``--devices`` options directly from
-the command line. See ``lightning run model --help`` or learn more from the documentation:
+the command line. See ``fabric run --help`` or learn more from the documentation:
 https://lightning.ai/docs/fabric.

 """
@ -71,7 +71,7 @@ class Net(nn.Module):

 def run(hparams):
    # Create the Lightning Fabric object. The parameters like accelerator, strategy, devices etc. will be proided
-    # by the command line. See all options: `lightning run model --help`
+    # by the command line. See all options: `fabric run --help`
    fabric = Fabric()

    seed_everything(hparams.seed)  # instead of torch.manual_seed(...)
@ -168,7 +168,7 @@ def run(hparams):
 if __name__ == "__main__":
    # Arguments can be passed in through the CLI as normal and will be parsed here
    # Example:
-    # lightning run model image_classifier.py accelerator=cuda --epochs=3
+    # fabric run image_classifier.py accelerator=cuda --epochs=3
    parser = argparse.ArgumentParser(description="Fabric MNIST Example")
    parser.add_argument(
        "--batch-size", type=int, default=64, metavar="N", help="input batch size for training (default: 64)"
--- a/examples/fabric/kfold_cv/README.md
+++ b/examples/fabric/kfold_cv/README.md
@ -14,13 +14,13 @@ This script shows you how to scale the pure PyTorch code to enable GPU and multi

 ```bash
 # CPU
-lightning run model train_fabric.py
+fabric run train_fabric.py

 # GPU (CUDA or M1 Mac)
-lightning run model train_fabric.py --accelerator=gpu
+fabric run train_fabric.py --accelerator=gpu

 # Multiple GPUs
-lightning run model train_fabric.py --accelerator=gpu --devices=4
+fabric run train_fabric.py --accelerator=gpu --devices=4
 ```

 ### References
--- a/examples/fabric/kfold_cv/train_fabric.py
+++ b/examples/fabric/kfold_cv/train_fabric.py
@ -107,7 +107,7 @@ def validate_dataloader(model, data_loader, fabric, hparams, fold, acc_metric):

 def run(hparams):
    # Create the Lightning Fabric object. The parameters like accelerator, strategy, devices etc. will be proided
-    # by the command line. See all options: `lightning run model --help`
+    # by the command line. See all options: `fabric run --help`
    fabric = Fabric()

    seed_everything(hparams.seed)  # instead of torch.manual_seed(...)
@ -171,7 +171,7 @@ def run(hparams):
 if __name__ == "__main__":
    # Arguments can be passed in through the CLI as normal and will be parsed here
    # Example:
-    # lightning run model image_classifier.py accelerator=cuda --epochs=3
+    # fabric run image_classifier.py accelerator=cuda --epochs=3
    parser = argparse.ArgumentParser(description="Fabric MNIST K-Fold Cross Validation Example")
    parser.add_argument(
        "--batch-size", type=int, default=64, metavar="N", help="input batch size for training (default: 64)"
--- a/examples/fabric/language_model/README.md
+++ b/examples/fabric/language_model/README.md
@ -7,11 +7,11 @@ It is a simplified version of the [official PyTorch example](https://github.com/

 ```bash
 # CPU
-lightning run model --accelerator=cpu train.py
+fabric run --accelerator=cpu train.py

 # GPU (CUDA or M1 Mac)
-lightning run model --accelerator=gpu train.py
+fabric run --accelerator=gpu train.py

 # Multiple GPUs
-lightning run model --accelerator=gpu --devices=4 train.py
+fabric run --accelerator=gpu --devices=4 train.py
 ```
--- a/examples/fabric/meta_learning/README.md
+++ b/examples/fabric/meta_learning/README.md
@ -33,7 +33,7 @@ torchrun --nproc_per_node=2 --standalone train_torch.py
 **Accelerated using Lightning Fabric:**

 ```bash
-lightning run model train_fabric.py --devices 2 --strategy ddp --accelerator cpu
+fabric run train_fabric.py --devices 2 --strategy ddp --accelerator cpu
 ```

 ### References
--- a/examples/fabric/meta_learning/train_fabric.py
+++ b/examples/fabric/meta_learning/train_fabric.py
@ -12,7 +12,7 @@ Requirements:
 - gym<=0.22

 Run it with:
-    lightning run model train_fabric.py --accelerator=cuda --devices=2 --strategy=ddp
+    fabric run train_fabric.py --accelerator=cuda --devices=2 --strategy=ddp
 """

 import cherry
@ -59,7 +59,7 @@ def main(
    seed=42,
 ):
    # Create the Fabric object
-    # Arguments get parsed from the command line, see `lightning run model --help`
+    # Arguments get parsed from the command line, see `fabric run --help`
    fabric = Fabric()

    meta_batch_size = meta_batch_size // fabric.world_size
--- a/examples/fabric/reinforcement_learning/README.md
+++ b/examples/fabric/reinforcement_learning/README.md
@ -40,7 +40,7 @@ torchrun --nproc_per_node=2 --standalone train_torch.py
 ### Lightning Fabric:

 ```bash
-lightning run model --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py
+fabric run --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py
 ```

 ### Visualizing logs
@ -71,7 +71,7 @@ The following video shows a trained agent on the [LunarLander-v2 environment](ht
 The agent was trained with the following:

 ```bash
-lightning run model \
+fabric run \
  --accelerator=cpu \
  --strategy=ddp \
  --devices=2 \
@ -98,25 +98,25 @@ where, differently from the previous example, we have completely decoupled the e
 So for example:

 ```bash
-lightning run model --devices=3 train_fabric_decoupled.py --num-envs 4
+fabric run --devices=3 train_fabric_decoupled.py --num-envs 4
 ```

 will spawn 3 processes, one is the Player and the others the Trainers, with the Player running 4 independent environments, where every process runs on the CPU;

 ```bash
-lightning run model --devices=3 train_fabric_decoupled.py --num-envs 4 --cuda
+fabric run --devices=3 train_fabric_decoupled.py --num-envs 4 --cuda
 ```

 will instead run only the Trainers on the GPU.
 If one wants to run both the Player and the Trainers on the GPU, then both the flags `--cuda` and `--player-on-gpu` must be provided:

 ```bash
-lightning run model --devices=3 train_fabric_decoupled.py --num-envs 4 --cuda --player-on-gpu
+fabric run --devices=3 train_fabric_decoupled.py --num-envs 4 --cuda --player-on-gpu
 ```

 > **Warning**
 >
-> With this second example, there is no need for the user to provide the `accelerator` and the `strategy` to the `lightning run model` script.
+> With this second example, there is no need for the user to provide the `accelerator` and the `strategy` to the `fabric run` script.

 ## Number of updates, environment steps and share data

--- a/examples/fabric/reinforcement_learning/train_fabric.py
+++ b/examples/fabric/reinforcement_learning/train_fabric.py
@ -14,7 +14,7 @@ Requirements:


 Run it with:
-    lightning run model --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py
+    fabric run --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py
 """

 import argparse
--- a/examples/fabric/reinforcement_learning/train_fabric_decoupled.py
+++ b/examples/fabric/reinforcement_learning/train_fabric_decoupled.py
@ -14,7 +14,7 @@ Requirements:


 Run it with:
-    lightning run model --devices=2 train_fabric_decoupled.py
+    fabric run --devices=2 train_fabric_decoupled.py
 """

 import argparse
--- a/src/lightning/app/cli/lightning_cli.py
+++ b/src/lightning/app/cli/lightning_cli.py
@ -18,7 +18,6 @@ from pathlib import Path
 from typing import Tuple, Union

 import click
-from lightning_utilities.core.imports import RequirementCache
 from requests.exceptions import ConnectionError

 import lightning.app.core.constants as constants
@ -303,13 +302,6 @@ def run_app(
    )


-if RequirementCache("lightning-fabric>=1.9.0") or RequirementCache("lightning>=1.9.0"):
-    # note it is automatically replaced to `from lightning.fabric.cli` when building monolithic/mirror package
-    from lightning.fabric.cli import _run_model
-
-    run.add_command(_run_model)
-
-
@_main.command("open", hidden=True)
@click.argument("path", type=str, default=".")
@click.option("--name", help="The name to use for the CloudSpace", default="", type=str)
--- a/src/lightning/fabric/CHANGELOG.md
+++ b/src/lightning/fabric/CHANGELOG.md
@ -17,7 +17,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

 ### Changed

- Renamed `lightning run model` to `fabric run model` ([#19442](https://github.com/Lightning-AI/pytorch-lightning/pull/19442))
+- Renamed `lightning run model` to `fabric run` ([#19442](https://github.com/Lightning-AI/pytorch-lightning/pull/19442), [#19527](https://github.com/Lightning-AI/pytorch-lightning/pull/19527))


 - The `Fabric.rank_zero_first` context manager now uses a barrier without timeout to avoid long-running tasks to be interrupted ([#19448](https://github.com/Lightning-AI/lightning/pull/19448))
--- a/src/lightning/fabric/cli.py
+++ b/src/lightning/fabric/cli.py
@ -55,7 +55,7 @@ if _CLICK_AVAILABLE:
        """
        print(
            "`lightning run model` is deprecated and will be removed in future versions."
-            " Please call `fabric run model` instead."
+            " Please call `fabric run` instead."
        )
        args = sys.argv[1:]
        if args and args[0] == "run" and args[1] == "model":
@ -70,12 +70,8 @@ if _CLICK_AVAILABLE:
    def _main() -> None:
        pass

-    @_main.group()
-    def run() -> None:
-        pass
-
-    @run.command(
-        "model",
+    @_main.command(
+        "run",
        context_settings={
            "ignore_unknown_options": True,
        },
@ -146,7 +142,7 @@ if _CLICK_AVAILABLE:
        ),
    )
    @click.argument("script_args", nargs=-1, type=click.UNPROCESSED)
-    def _run_model(**kwargs: Any) -> None:
+    def _run(**kwargs: Any) -> None:
        """Run a Lightning Fabric script.

        SCRIPT is the path to the Python script with the code to run. The script must contain a Fabric object.
@ -225,4 +221,4 @@ if __name__ == "__main__":
        )
        raise SystemExit(1)

-    _run_model()
+    _run()
--- a/src/lightning/fabric/fabric.py
+++ b/src/lightning/fabric/fabric.py
@ -839,7 +839,7 @@ class Fabric:
            Returns the output of the function that ran in worker process with rank 0.

        The ``launch()`` method should only be used if you intend to specify accelerator, devices, and so on in
-        the code (programmatically). If you are launching with the Lightning CLI, ``lightning run model ...``, remove
+        the code (programmatically). If you are launching with the Lightning CLI, ``fabric run ...``, remove
        ``launch()`` from your code.

        The ``launch()`` is a no-op when called multiple times and no function is passed in.
@ -1028,7 +1028,7 @@ class Fabric:
        if not self._launched and not isinstance(self._strategy, (SingleDeviceStrategy, DataParallelStrategy)):
            raise RuntimeError(
                "To use Fabric with more than one device, you must call `.launch()` or use the CLI:"
-                " `lightning run model --help`."
+                " `fabric run --help`."
            )

    def _validate_setup(self, module: nn.Module, optimizers: Sequence[Optimizer]) -> None:
--- a/tests/tests_fabric/test_cli.py
+++ b/tests/tests_fabric/test_cli.py
@ -20,7 +20,7 @@ from unittest import mock
 from unittest.mock import Mock

 import pytest
-from lightning.fabric.cli import _get_supported_strategies, _run_model
+from lightning.fabric.cli import _get_supported_strategies, _run

 from tests_fabric.helpers.runif import RunIf

@ -36,7 +36,7 @@ def fake_script(tmp_path):
 def test_cli_env_vars_defaults(monkeypatch, fake_script):
    monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script])
+        _run.main([fake_script])
    assert e.value.code == 0
    assert os.environ["LT_CLI_USED"] == "1"
    assert "LT_ACCELERATOR" not in os.environ
@ -52,7 +52,7 @@ def test_cli_env_vars_defaults(monkeypatch, fake_script):
 def test_cli_env_vars_accelerator(_, accelerator, monkeypatch, fake_script):
    monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script, "--accelerator", accelerator])
+        _run.main([fake_script, "--accelerator", accelerator])
    assert e.value.code == 0
    assert os.environ["LT_ACCELERATOR"] == accelerator

@ -63,7 +63,7 @@ def test_cli_env_vars_accelerator(_, accelerator, monkeypatch, fake_script):
 def test_cli_env_vars_strategy(_, strategy, monkeypatch, fake_script):
    monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script, "--strategy", strategy])
+        _run.main([fake_script, "--strategy", strategy])
    assert e.value.code == 0
    assert os.environ["LT_STRATEGY"] == strategy

@ -79,7 +79,7 @@ def test_cli_get_supported_strategies():
 def test_cli_env_vars_unsupported_strategy(strategy, fake_script):
    ioerr = StringIO()
    with pytest.raises(SystemExit) as e, contextlib.redirect_stderr(ioerr):
-        _run_model.main([fake_script, "--strategy", strategy])
+        _run.main([fake_script, "--strategy", strategy])
    assert e.value.code == 2
    assert f"Invalid value for '--strategy': '{strategy}'" in ioerr.getvalue()

@ -90,7 +90,7 @@ def test_cli_env_vars_unsupported_strategy(strategy, fake_script):
 def test_cli_env_vars_devices_cuda(_, devices, monkeypatch, fake_script):
    monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script, "--accelerator", "cuda", "--devices", devices])
+        _run.main([fake_script, "--accelerator", "cuda", "--devices", devices])
    assert e.value.code == 0
    assert os.environ["LT_DEVICES"] == devices

@ -101,7 +101,7 @@ def test_cli_env_vars_devices_cuda(_, devices, monkeypatch, fake_script):
 def test_cli_env_vars_devices_mps(accelerator, monkeypatch, fake_script):
    monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script, "--accelerator", accelerator])
+        _run.main([fake_script, "--accelerator", accelerator])
    assert e.value.code == 0
    assert os.environ["LT_DEVICES"] == "1"

@ -111,7 +111,7 @@ def test_cli_env_vars_devices_mps(accelerator, monkeypatch, fake_script):
 def test_cli_env_vars_num_nodes(num_nodes, monkeypatch, fake_script):
    monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script, "--num-nodes", num_nodes])
+        _run.main([fake_script, "--num-nodes", num_nodes])
    assert e.value.code == 0
    assert os.environ["LT_NUM_NODES"] == num_nodes

@ -121,7 +121,7 @@ def test_cli_env_vars_num_nodes(num_nodes, monkeypatch, fake_script):
 def test_cli_env_vars_precision(precision, monkeypatch, fake_script):
    monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script, "--precision", precision])
+        _run.main([fake_script, "--precision", precision])
    assert e.value.code == 0
    assert os.environ["LT_PRECISION"] == precision

@ -131,7 +131,7 @@ def test_cli_torchrun_defaults(monkeypatch, fake_script):
    torchrun_mock = Mock()
    monkeypatch.setitem(sys.modules, "torch.distributed.run", torchrun_mock)
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script])
+        _run.main([fake_script])
    assert e.value.code == 0
    torchrun_mock.main.assert_called_with([
        "--nproc_per_node=1",
@ -159,7 +159,7 @@ def test_cli_torchrun_num_processes_launched(_, devices, expected, monkeypatch,
    torchrun_mock = Mock()
    monkeypatch.setitem(sys.modules, "torch.distributed.run", torchrun_mock)
    with pytest.raises(SystemExit) as e:
-        _run_model.main([fake_script, "--accelerator", "cuda", "--devices", devices])
+        _run.main([fake_script, "--accelerator", "cuda", "--devices", devices])
    assert e.value.code == 0
    torchrun_mock.main.assert_called_with([
        f"--nproc_per_node={expected}",
@ -172,9 +172,9 @@ def test_cli_torchrun_num_processes_launched(_, devices, expected, monkeypatch,


 def test_cli_through_fabric_entry_point():
-    result = subprocess.run("fabric run model --help", capture_output=True, text=True, shell=True)
+    result = subprocess.run("fabric run --help", capture_output=True, text=True, shell=True)

-    message = "Usage: fabric run model [OPTIONS] SCRIPT [SCRIPT_ARGS]"
+    message = "Usage: fabric run [OPTIONS] SCRIPT [SCRIPT_ARGS]"
    assert message in result.stdout or message in result.stderr


@ -184,8 +184,8 @@ def test_cli_through_lightning_entry_point():

    deprecation_message = (
        "`lightning run model` is deprecated and will be removed in future versions. "
-        "Please call `fabric run model` instead"
+        "Please call `fabric run` instead"
    )
-    message = "Usage: lightning run model [OPTIONS] SCRIPT [SCRIPT_ARGS]"
+    message = "Usage: lightning run [OPTIONS] SCRIPT [SCRIPT_ARGS]"
    assert deprecation_message in result.stdout
    assert message in result.stdout or message in result.stderr