From ea89133c650c0159cc868e9491dc05ed177b508b Mon Sep 17 00:00:00 2001 From: awaelchli Date: Tue, 27 Feb 2024 17:36:46 +0100 Subject: [PATCH] Rename `fabric run model` to `fabric run` (#19527) --- docs/source-fabric/fundamentals/launch.rst | 12 ++++---- docs/source-fabric/fundamentals/precision.rst | 2 +- .../guide/multi_node/barebones.rst | 8 ++--- examples/fabric/image_classifier/README.md | 6 ++-- .../fabric/image_classifier/train_fabric.py | 8 ++--- examples/fabric/kfold_cv/README.md | 6 ++-- examples/fabric/kfold_cv/train_fabric.py | 4 +-- examples/fabric/language_model/README.md | 6 ++-- examples/fabric/meta_learning/README.md | 2 +- examples/fabric/meta_learning/train_fabric.py | 4 +-- .../fabric/reinforcement_learning/README.md | 12 ++++---- .../reinforcement_learning/train_fabric.py | 2 +- .../train_fabric_decoupled.py | 2 +- src/lightning/app/cli/lightning_cli.py | 8 ----- src/lightning/fabric/CHANGELOG.md | 2 +- src/lightning/fabric/cli.py | 14 ++++----- src/lightning/fabric/fabric.py | 4 +-- tests/tests_fabric/test_cli.py | 30 +++++++++---------- 18 files changed, 60 insertions(+), 72 deletions(-) diff --git a/docs/source-fabric/fundamentals/launch.rst b/docs/source-fabric/fundamentals/launch.rst index d30a756213..efde3f54fe 100644 --- a/docs/source-fabric/fundamentals/launch.rst +++ b/docs/source-fabric/fundamentals/launch.rst @@ -67,7 +67,7 @@ An alternative way to launch your Python script in multiple processes is to use .. code-block:: bash - fabric run model path/to/your/script.py + fabric run path/to/your/script.py This is essentially the same as running ``python path/to/your/script.py``, but it also lets you configure the following settings externally without changing your code: @@ -80,9 +80,9 @@ This is essentially the same as running ``python path/to/your/script.py``, but i .. code-block:: bash - fabric run model --help + fabric run --help - Usage: fabric run model [OPTIONS] SCRIPT [SCRIPT_ARGS]... + Usage: fabric run [OPTIONS] SCRIPT [SCRIPT_ARGS]... Run a Lightning Fabric script. @@ -128,7 +128,7 @@ Here is how you run DDP with 8 GPUs and `torch.bfloat16 `_ w .. code-block:: bash - fabric run model ./path/to/train.py \ + fabric run ./path/to/train.py \ --strategy=deepspeed_stage_3 \ --devices=8 \ --accelerator=cuda \ @@ -148,7 +148,7 @@ Or `DeepSpeed Zero3 `_ w .. code-block:: bash - fabric run model ./path/to/train.py \ + fabric run ./path/to/train.py \ --devices=auto \ --accelerator=auto \ --precision=16 diff --git a/docs/source-fabric/fundamentals/precision.rst b/docs/source-fabric/fundamentals/precision.rst index 49ff2c30a9..8ac5ff6b18 100644 --- a/docs/source-fabric/fundamentals/precision.rst +++ b/docs/source-fabric/fundamentals/precision.rst @@ -66,7 +66,7 @@ The same values can also be set through the :doc:`command line interface **Warning** > -> With this second example, there is no need for the user to provide the `accelerator` and the `strategy` to the `lightning run model` script. +> With this second example, there is no need for the user to provide the `accelerator` and the `strategy` to the `fabric run` script. ## Number of updates, environment steps and share data diff --git a/examples/fabric/reinforcement_learning/train_fabric.py b/examples/fabric/reinforcement_learning/train_fabric.py index c294287ac0..1f3f83f3f2 100644 --- a/examples/fabric/reinforcement_learning/train_fabric.py +++ b/examples/fabric/reinforcement_learning/train_fabric.py @@ -14,7 +14,7 @@ Requirements: Run it with: - lightning run model --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py + fabric run --accelerator=cpu --strategy=ddp --devices=2 train_fabric.py """ import argparse diff --git a/examples/fabric/reinforcement_learning/train_fabric_decoupled.py b/examples/fabric/reinforcement_learning/train_fabric_decoupled.py index 9dd0ebac76..d3336d21da 100644 --- a/examples/fabric/reinforcement_learning/train_fabric_decoupled.py +++ b/examples/fabric/reinforcement_learning/train_fabric_decoupled.py @@ -14,7 +14,7 @@ Requirements: Run it with: - lightning run model --devices=2 train_fabric_decoupled.py + fabric run --devices=2 train_fabric_decoupled.py """ import argparse diff --git a/src/lightning/app/cli/lightning_cli.py b/src/lightning/app/cli/lightning_cli.py index 19c7689d10..8f61554019 100644 --- a/src/lightning/app/cli/lightning_cli.py +++ b/src/lightning/app/cli/lightning_cli.py @@ -18,7 +18,6 @@ from pathlib import Path from typing import Tuple, Union import click -from lightning_utilities.core.imports import RequirementCache from requests.exceptions import ConnectionError import lightning.app.core.constants as constants @@ -303,13 +302,6 @@ def run_app( ) -if RequirementCache("lightning-fabric>=1.9.0") or RequirementCache("lightning>=1.9.0"): - # note it is automatically replaced to `from lightning.fabric.cli` when building monolithic/mirror package - from lightning.fabric.cli import _run_model - - run.add_command(_run_model) - - @_main.command("open", hidden=True) @click.argument("path", type=str, default=".") @click.option("--name", help="The name to use for the CloudSpace", default="", type=str) diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md index 3bb4ce154d..8c923f23a0 100644 --- a/src/lightning/fabric/CHANGELOG.md +++ b/src/lightning/fabric/CHANGELOG.md @@ -17,7 +17,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- Renamed `lightning run model` to `fabric run model` ([#19442](https://github.com/Lightning-AI/pytorch-lightning/pull/19442)) +- Renamed `lightning run model` to `fabric run` ([#19442](https://github.com/Lightning-AI/pytorch-lightning/pull/19442), [#19527](https://github.com/Lightning-AI/pytorch-lightning/pull/19527)) - The `Fabric.rank_zero_first` context manager now uses a barrier without timeout to avoid long-running tasks to be interrupted ([#19448](https://github.com/Lightning-AI/lightning/pull/19448)) diff --git a/src/lightning/fabric/cli.py b/src/lightning/fabric/cli.py index 6aec4bec0c..80256a0f08 100644 --- a/src/lightning/fabric/cli.py +++ b/src/lightning/fabric/cli.py @@ -55,7 +55,7 @@ if _CLICK_AVAILABLE: """ print( "`lightning run model` is deprecated and will be removed in future versions." - " Please call `fabric run model` instead." + " Please call `fabric run` instead." ) args = sys.argv[1:] if args and args[0] == "run" and args[1] == "model": @@ -70,12 +70,8 @@ if _CLICK_AVAILABLE: def _main() -> None: pass - @_main.group() - def run() -> None: - pass - - @run.command( - "model", + @_main.command( + "run", context_settings={ "ignore_unknown_options": True, }, @@ -146,7 +142,7 @@ if _CLICK_AVAILABLE: ), ) @click.argument("script_args", nargs=-1, type=click.UNPROCESSED) - def _run_model(**kwargs: Any) -> None: + def _run(**kwargs: Any) -> None: """Run a Lightning Fabric script. SCRIPT is the path to the Python script with the code to run. The script must contain a Fabric object. @@ -225,4 +221,4 @@ if __name__ == "__main__": ) raise SystemExit(1) - _run_model() + _run() diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py index 36757b60ea..0d05bd3563 100644 --- a/src/lightning/fabric/fabric.py +++ b/src/lightning/fabric/fabric.py @@ -839,7 +839,7 @@ class Fabric: Returns the output of the function that ran in worker process with rank 0. The ``launch()`` method should only be used if you intend to specify accelerator, devices, and so on in - the code (programmatically). If you are launching with the Lightning CLI, ``lightning run model ...``, remove + the code (programmatically). If you are launching with the Lightning CLI, ``fabric run ...``, remove ``launch()`` from your code. The ``launch()`` is a no-op when called multiple times and no function is passed in. @@ -1028,7 +1028,7 @@ class Fabric: if not self._launched and not isinstance(self._strategy, (SingleDeviceStrategy, DataParallelStrategy)): raise RuntimeError( "To use Fabric with more than one device, you must call `.launch()` or use the CLI:" - " `lightning run model --help`." + " `fabric run --help`." ) def _validate_setup(self, module: nn.Module, optimizers: Sequence[Optimizer]) -> None: diff --git a/tests/tests_fabric/test_cli.py b/tests/tests_fabric/test_cli.py index c44dc43619..5560d114d9 100644 --- a/tests/tests_fabric/test_cli.py +++ b/tests/tests_fabric/test_cli.py @@ -20,7 +20,7 @@ from unittest import mock from unittest.mock import Mock import pytest -from lightning.fabric.cli import _get_supported_strategies, _run_model +from lightning.fabric.cli import _get_supported_strategies, _run from tests_fabric.helpers.runif import RunIf @@ -36,7 +36,7 @@ def fake_script(tmp_path): def test_cli_env_vars_defaults(monkeypatch, fake_script): monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock()) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script]) + _run.main([fake_script]) assert e.value.code == 0 assert os.environ["LT_CLI_USED"] == "1" assert "LT_ACCELERATOR" not in os.environ @@ -52,7 +52,7 @@ def test_cli_env_vars_defaults(monkeypatch, fake_script): def test_cli_env_vars_accelerator(_, accelerator, monkeypatch, fake_script): monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock()) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script, "--accelerator", accelerator]) + _run.main([fake_script, "--accelerator", accelerator]) assert e.value.code == 0 assert os.environ["LT_ACCELERATOR"] == accelerator @@ -63,7 +63,7 @@ def test_cli_env_vars_accelerator(_, accelerator, monkeypatch, fake_script): def test_cli_env_vars_strategy(_, strategy, monkeypatch, fake_script): monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock()) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script, "--strategy", strategy]) + _run.main([fake_script, "--strategy", strategy]) assert e.value.code == 0 assert os.environ["LT_STRATEGY"] == strategy @@ -79,7 +79,7 @@ def test_cli_get_supported_strategies(): def test_cli_env_vars_unsupported_strategy(strategy, fake_script): ioerr = StringIO() with pytest.raises(SystemExit) as e, contextlib.redirect_stderr(ioerr): - _run_model.main([fake_script, "--strategy", strategy]) + _run.main([fake_script, "--strategy", strategy]) assert e.value.code == 2 assert f"Invalid value for '--strategy': '{strategy}'" in ioerr.getvalue() @@ -90,7 +90,7 @@ def test_cli_env_vars_unsupported_strategy(strategy, fake_script): def test_cli_env_vars_devices_cuda(_, devices, monkeypatch, fake_script): monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock()) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script, "--accelerator", "cuda", "--devices", devices]) + _run.main([fake_script, "--accelerator", "cuda", "--devices", devices]) assert e.value.code == 0 assert os.environ["LT_DEVICES"] == devices @@ -101,7 +101,7 @@ def test_cli_env_vars_devices_cuda(_, devices, monkeypatch, fake_script): def test_cli_env_vars_devices_mps(accelerator, monkeypatch, fake_script): monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock()) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script, "--accelerator", accelerator]) + _run.main([fake_script, "--accelerator", accelerator]) assert e.value.code == 0 assert os.environ["LT_DEVICES"] == "1" @@ -111,7 +111,7 @@ def test_cli_env_vars_devices_mps(accelerator, monkeypatch, fake_script): def test_cli_env_vars_num_nodes(num_nodes, monkeypatch, fake_script): monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock()) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script, "--num-nodes", num_nodes]) + _run.main([fake_script, "--num-nodes", num_nodes]) assert e.value.code == 0 assert os.environ["LT_NUM_NODES"] == num_nodes @@ -121,7 +121,7 @@ def test_cli_env_vars_num_nodes(num_nodes, monkeypatch, fake_script): def test_cli_env_vars_precision(precision, monkeypatch, fake_script): monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock()) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script, "--precision", precision]) + _run.main([fake_script, "--precision", precision]) assert e.value.code == 0 assert os.environ["LT_PRECISION"] == precision @@ -131,7 +131,7 @@ def test_cli_torchrun_defaults(monkeypatch, fake_script): torchrun_mock = Mock() monkeypatch.setitem(sys.modules, "torch.distributed.run", torchrun_mock) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script]) + _run.main([fake_script]) assert e.value.code == 0 torchrun_mock.main.assert_called_with([ "--nproc_per_node=1", @@ -159,7 +159,7 @@ def test_cli_torchrun_num_processes_launched(_, devices, expected, monkeypatch, torchrun_mock = Mock() monkeypatch.setitem(sys.modules, "torch.distributed.run", torchrun_mock) with pytest.raises(SystemExit) as e: - _run_model.main([fake_script, "--accelerator", "cuda", "--devices", devices]) + _run.main([fake_script, "--accelerator", "cuda", "--devices", devices]) assert e.value.code == 0 torchrun_mock.main.assert_called_with([ f"--nproc_per_node={expected}", @@ -172,9 +172,9 @@ def test_cli_torchrun_num_processes_launched(_, devices, expected, monkeypatch, def test_cli_through_fabric_entry_point(): - result = subprocess.run("fabric run model --help", capture_output=True, text=True, shell=True) + result = subprocess.run("fabric run --help", capture_output=True, text=True, shell=True) - message = "Usage: fabric run model [OPTIONS] SCRIPT [SCRIPT_ARGS]" + message = "Usage: fabric run [OPTIONS] SCRIPT [SCRIPT_ARGS]" assert message in result.stdout or message in result.stderr @@ -184,8 +184,8 @@ def test_cli_through_lightning_entry_point(): deprecation_message = ( "`lightning run model` is deprecated and will be removed in future versions. " - "Please call `fabric run model` instead" + "Please call `fabric run` instead" ) - message = "Usage: lightning run model [OPTIONS] SCRIPT [SCRIPT_ARGS]" + message = "Usage: lightning run [OPTIONS] SCRIPT [SCRIPT_ARGS]" assert deprecation_message in result.stdout assert message in result.stdout or message in result.stderr