lightning/tests/strategies/test_ddp_strategy.py

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from unittest import mock

import pytest
import torch
from torch.nn.parallel import DistributedDataParallel

from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.plugins.environments import ClusterEnvironment, LightningEnvironment
from pytorch_lightning.strategies import DDPStrategy
from pytorch_lightning.trainer.states import TrainerFn
from tests.helpers.boring_model import BoringModel
from tests.helpers.runif import RunIf


class BoringModelGPU(BoringModel):
    def on_train_start(self) -> None:
        # make sure that the model is on GPU when training
        assert self.device == torch.device(f"cuda:{self.trainer.strategy.local_rank}")
        self.start_cuda_memory = torch.cuda.memory_allocated()


@RunIf(min_gpus=2, skip_windows=True, standalone=True)
def test_ddp_with_2_gpus():
    """Tests if device is set correctly when training and after teardown for DDPStrategy."""
    trainer = Trainer(accelerator="gpu", devices=2, strategy="ddp", fast_dev_run=True)
    # assert training type plugin attributes for device setting
    assert isinstance(trainer.strategy, DDPStrategy)
    local_rank = trainer.strategy.local_rank
    assert trainer.strategy.root_device == torch.device(f"cuda:{local_rank}")

    model = BoringModelGPU()

    trainer.fit(model)

    # assert after training, model is moved to CPU and memory is deallocated
    assert model.device == torch.device("cpu")
    cuda_memory = torch.cuda.memory_allocated()
    assert cuda_memory < model.start_cuda_memory


class BarrierModel(BoringModel):
    def setup(self, stage=None):
        assert not isinstance(self.trainer.strategy.model, DistributedDataParallel)
        self.trainer.strategy.barrier("barrier before model is wrapped")

    def on_train_start(self):
        assert isinstance(self.trainer.strategy.model, DistributedDataParallel)
        self.trainer.strategy.barrier("barrier after model is wrapped")


@RunIf(min_gpus=4, standalone=True)
@mock.patch("torch.distributed.barrier")
def test_ddp_barrier_non_consecutive_device_ids(barrier_mock, tmpdir):
    """Test correct usage of barriers when device ids do not start at 0 or are not consecutive."""
    model = BoringModel()
    gpus = [1, 3]
    trainer = Trainer(default_root_dir=tmpdir, max_steps=1, accelerator="gpu", devices=gpus, strategy="ddp")
    trainer.fit(model)
    barrier_mock.assert_any_call(device_ids=[gpus[trainer.local_rank]])


@mock.patch.dict(os.environ, {"LOCAL_RANK": "1"})
def test_incorrect_ddp_script_spawning(tmpdir):
    """Test an error message when user accidentally instructs Lightning to spawn children processes on rank > 0."""

    class WronglyImplementedEnvironment(LightningEnvironment):
        @property
        def creates_processes_externally(self):
            # returning false no matter what means Lightning would spawn also on ranks > 0 new processes
            return False

    model = BoringModel()
    trainer = Trainer(
        default_root_dir=tmpdir,
        strategy="ddp",
        accelerator="cpu",
        devices=2,
        plugins=[WronglyImplementedEnvironment()],
    )
    with pytest.raises(
        RuntimeError, match="Lightning attempted to launch new distributed processes with `local_rank > 0`."
    ):
        trainer.fit(model)


@RunIf(skip_windows=True)
def test_ddp_configure_ddp():
    """Tests with ddp strategy."""
    model = BoringModel()
    ddp_strategy = DDPStrategy()
    trainer = Trainer(
        max_epochs=1,
        strategy=ddp_strategy,
    )
    # test wrap the model if fitting
    trainer.state.fn = TrainerFn.FITTING
    trainer.strategy.connect(model)
    trainer.lightning_module.trainer = trainer
    trainer.strategy.setup_environment()
    assert isinstance(trainer.model, LightningModule)
    trainer.strategy.setup(trainer)
    # in DDPStrategy configure_ddp(), model wrapped by DistributedDataParallel
    assert isinstance(trainer.model, DistributedDataParallel)

    ddp_strategy = DDPStrategy()
    trainer = Trainer(
        max_epochs=1,
        strategy=ddp_strategy,
    )
    # test do not wrap the model if TrainerFn is not fitting
    trainer.state.fn = TrainerFn.VALIDATING
    trainer.strategy.connect(model)
    trainer.lightning_module.trainer = trainer
    trainer.strategy.setup_environment()
    trainer.strategy.setup(trainer)
    # in DDPStrategy configure_ddp(), model are still LightningModule
    assert isinstance(trainer.model, LightningModule)


@RunIf(min_gpus=1)
@pytest.mark.parametrize(
    "trainer_fn", (TrainerFn.VALIDATING, TrainerFn.TUNING, TrainerFn.TESTING, TrainerFn.PREDICTING)
)
def test_ddp_dont_configure_sync_batchnorm(trainer_fn):
    model = BoringModelGPU()
    model.layer = torch.nn.BatchNorm1d(10)
    ddp_strategy = DDPStrategy()
    trainer = Trainer(accelerator="gpu", devices=1, strategy=ddp_strategy, sync_batchnorm=True)
    trainer.state.fn = trainer_fn
    trainer.strategy.connect(model)
    trainer.lightning_module.trainer = trainer
    trainer.strategy.setup_environment()
    assert isinstance(trainer.model, LightningModule)
    trainer.strategy.setup(trainer)
    # because TrainerFn is not FITTING, model is not configured with sync batchnorm
    assert not isinstance(trainer.strategy.model.layer, torch.nn.modules.batchnorm.SyncBatchNorm)


def test_configure_launcher_create_processes_externally():
    class MyClusterEnvironment(ClusterEnvironment):
        @property
        def creates_processes_externally(self):
            return True

        @property
        def main_address(self):
            return ""

        @property
        def main_port(self):
            return 8080

        @staticmethod
        def detect():
            return True

        def world_size(self):
            return 1

        def set_world_size(self):
            pass

        def global_rank(self):
            return 0

        def set_global_rank(self):
            pass

        def local_rank(self):
            return 0

        def node_rank(self):
            return 0

    ddp_strategy = DDPStrategy(cluster_environment=MyClusterEnvironment())
    assert ddp_strategy.launcher is None
    ddp_strategy._configure_launcher()
    assert ddp_strategy.launcher is None
refactor accelerator teardown -> training type plugin teardown (#7579) 2021-05-22 20:19:24 +00:00			`# Copyright The PyTorch Lightning team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
update an outdated error message in DDPPlugin (#9005) 2021-08-23 15:29:07 +00:00			`import os`
fix NCCL error with non-consecutive trainer gpus (#8165) * device ids in barrier x x s same fix for spawn fix non-nccl x * add changelog * get nccl backend * get backend Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> 2021-06-28 20:08:10 +00:00			`from unittest import mock`

update an outdated error message in DDPPlugin (#9005) 2021-08-23 15:29:07 +00:00			`import pytest`
refactor accelerator teardown -> training type plugin teardown (#7579) 2021-05-22 20:19:24 +00:00			`import torch`
fix NCCL error with non-consecutive trainer gpus (#8165) * device ids in barrier x x s same fix for spawn fix non-nccl x * add changelog * get nccl backend * get backend Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> 2021-06-28 20:08:10 +00:00			`from torch.nn.parallel import DistributedDataParallel`
refactor accelerator teardown -> training type plugin teardown (#7579) 2021-05-22 20:19:24 +00:00
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`from pytorch_lightning import LightningModule, Trainer`
Do not configure launcher if processes are launched externally (#12431) 2022-03-24 09:40:34 +00:00			`from pytorch_lightning.plugins.environments import ClusterEnvironment, LightningEnvironment`
Update strategy import statements (#11231) 2021-12-23 07:26:28 +00:00			`from pytorch_lightning.strategies import DDPStrategy`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`from pytorch_lightning.trainer.states import TrainerFn`
refactor accelerator teardown -> training type plugin teardown (#7579) 2021-05-22 20:19:24 +00:00			`from tests.helpers.boring_model import BoringModel`
			`from tests.helpers.runif import RunIf`


			`class BoringModelGPU(BoringModel):`
			`def on_train_start(self) -> None:`
			`# make sure that the model is on GPU when training`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`assert self.device == torch.device(f"cuda:{self.trainer.strategy.local_rank}")`
refactor accelerator teardown -> training type plugin teardown (#7579) 2021-05-22 20:19:24 +00:00			`self.start_cuda_memory = torch.cuda.memory_allocated()`


Drop PyTorch 1.7 support (#12432) 2022-03-27 21:31:20 +00:00			`@RunIf(min_gpus=2, skip_windows=True, standalone=True)`
refactor accelerator teardown -> training type plugin teardown (#7579) 2021-05-22 20:19:24 +00:00			`def test_ddp_with_2_gpus():`
fix typos (#11937) 2022-02-17 01:27:51 +00:00			`"""Tests if device is set correctly when training and after teardown for DDPStrategy."""`
Update `gpus` flag with `accelerator` and `devices` flag (#12156) Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2022-03-23 19:52:12 +00:00			`trainer = Trainer(accelerator="gpu", devices=2, strategy="ddp", fast_dev_run=True)`
refactor accelerator teardown -> training type plugin teardown (#7579) 2021-05-22 20:19:24 +00:00			`# assert training type plugin attributes for device setting`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`assert isinstance(trainer.strategy, DDPStrategy)`
			`local_rank = trainer.strategy.local_rank`
			`assert trainer.strategy.root_device == torch.device(f"cuda:{local_rank}")`
refactor accelerator teardown -> training type plugin teardown (#7579) 2021-05-22 20:19:24 +00:00
			`model = BoringModelGPU()`

			`trainer.fit(model)`

			`# assert after training, model is moved to CPU and memory is deallocated`
			`assert model.device == torch.device("cpu")`
			`cuda_memory = torch.cuda.memory_allocated()`
			`assert cuda_memory < model.start_cuda_memory`
fix NCCL error with non-consecutive trainer gpus (#8165) * device ids in barrier x x s same fix for spawn fix non-nccl x * add changelog * get nccl backend * get backend Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> 2021-06-28 20:08:10 +00:00

			`class BarrierModel(BoringModel):`
			`def setup(self, stage=None):`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`assert not isinstance(self.trainer.strategy.model, DistributedDataParallel)`
			`self.trainer.strategy.barrier("barrier before model is wrapped")`
fix NCCL error with non-consecutive trainer gpus (#8165) * device ids in barrier x x s same fix for spawn fix non-nccl x * add changelog * get nccl backend * get backend Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> 2021-06-28 20:08:10 +00:00
			`def on_train_start(self):`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`assert isinstance(self.trainer.strategy.model, DistributedDataParallel)`
			`self.trainer.strategy.barrier("barrier after model is wrapped")`
fix NCCL error with non-consecutive trainer gpus (#8165) * device ids in barrier x x s same fix for spawn fix non-nccl x * add changelog * get nccl backend * get backend Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> 2021-06-28 20:08:10 +00:00

Rename special to standalone (#10779) 2021-11-26 17:13:14 +00:00			`@RunIf(min_gpus=4, standalone=True)`
fix NCCL error with non-consecutive trainer gpus (#8165) * device ids in barrier x x s same fix for spawn fix non-nccl x * add changelog * get nccl backend * get backend Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> 2021-06-28 20:08:10 +00:00			`@mock.patch("torch.distributed.barrier")`
			`def test_ddp_barrier_non_consecutive_device_ids(barrier_mock, tmpdir):`
Replace `yapf` with `black` (#7783) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2021-07-26 11:37:35 +00:00			`"""Test correct usage of barriers when device ids do not start at 0 or are not consecutive."""`
fix NCCL error with non-consecutive trainer gpus (#8165) * device ids in barrier x x s same fix for spawn fix non-nccl x * add changelog * get nccl backend * get backend Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> 2021-06-28 20:08:10 +00:00			`model = BoringModel()`
			`gpus = [1, 3]`
Update standalone tests (#12472) 2022-03-28 02:16:17 +00:00			`trainer = Trainer(default_root_dir=tmpdir, max_steps=1, accelerator="gpu", devices=gpus, strategy="ddp")`
fix NCCL error with non-consecutive trainer gpus (#8165) * device ids in barrier x x s same fix for spawn fix non-nccl x * add changelog * get nccl backend * get backend Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> 2021-06-28 20:08:10 +00:00			`trainer.fit(model)`
			`barrier_mock.assert_any_call(device_ids=[gpus[trainer.local_rank]])`
update an outdated error message in DDPPlugin (#9005) 2021-08-23 15:29:07 +00:00

			`@mock.patch.dict(os.environ, {"LOCAL_RANK": "1"})`
			`def test_incorrect_ddp_script_spawning(tmpdir):`
			`"""Test an error message when user accidentally instructs Lightning to spawn children processes on rank > 0."""`

			`class WronglyImplementedEnvironment(LightningEnvironment):`
Rename `ClusterEnvironment.creates_processes` (#10106) Co-authored-by: tchaton <thomas@grid.ai> 2021-10-25 23:15:41 +00:00			`@property`
			`def creates_processes_externally(self):`
update an outdated error message in DDPPlugin (#9005) 2021-08-23 15:29:07 +00:00			`# returning false no matter what means Lightning would spawn also on ranks > 0 new processes`
			`return False`

			`model = BoringModel()`
			`trainer = Trainer(`
			`default_root_dir=tmpdir,`
(1/n) tests: Use strategy flag instead of accelerator for training strategies (#9931) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2021-10-16 15:10:25 +00:00			`strategy="ddp",`
Update tests in `strategies` directory in preparation for #11040 (#12467) 2022-03-27 16:13:48 +00:00			`accelerator="cpu",`
			`devices=2,`
(1/n) tests: Use strategy flag instead of accelerator for training strategies (#9931) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2021-10-16 15:10:25 +00:00			`plugins=[WronglyImplementedEnvironment()],`
update an outdated error message in DDPPlugin (#9005) 2021-08-23 15:29:07 +00:00			`)`
			`with pytest.raises(`
			RuntimeError, match="Lightning attempted to launch new distributed processes with `local_rank > 0`."
			`):`
			`trainer.fit(model)`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00

			`@RunIf(skip_windows=True)`
			`def test_ddp_configure_ddp():`
Rename `DDPPlugin` to `DDPStrategy` (#11142) * Raname DDPPlugin to DDPStrategy * Change ddp_plugin to ddp_strategy * update changelog * rename occurences in docs * rename more occurrences * fix line too long * more fixes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-21 08:55:51 +00:00			`"""Tests with ddp strategy."""`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`model = BoringModel()`
Rename `DDPPlugin` to `DDPStrategy` (#11142) * Raname DDPPlugin to DDPStrategy * Change ddp_plugin to ddp_strategy * update changelog * rename occurences in docs * rename more occurrences * fix line too long * more fixes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-21 08:55:51 +00:00			`ddp_strategy = DDPStrategy()`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`trainer = Trainer(`
			`max_epochs=1,`
Rename `DDPPlugin` to `DDPStrategy` (#11142) * Raname DDPPlugin to DDPStrategy * Change ddp_plugin to ddp_strategy * update changelog * rename occurences in docs * rename more occurrences * fix line too long * more fixes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-21 08:55:51 +00:00			`strategy=ddp_strategy,`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`)`
			`# test wrap the model if fitting`
			`trainer.state.fn = TrainerFn.FITTING`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`trainer.strategy.connect(model)`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`trainer.lightning_module.trainer = trainer`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`trainer.strategy.setup_environment()`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`assert isinstance(trainer.model, LightningModule)`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`trainer.strategy.setup(trainer)`
Rename `DDPPlugin` to `DDPStrategy` (#11142) * Raname DDPPlugin to DDPStrategy * Change ddp_plugin to ddp_strategy * update changelog * rename occurences in docs * rename more occurrences * fix line too long * more fixes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-21 08:55:51 +00:00			`# in DDPStrategy configure_ddp(), model wrapped by DistributedDataParallel`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`assert isinstance(trainer.model, DistributedDataParallel)`

Check `parallel_devices` passed through `strategy` is consistent with the `accelerator` flag (#12105) 2022-03-03 18:30:24 +00:00			`ddp_strategy = DDPStrategy()`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`trainer = Trainer(`
			`max_epochs=1,`
Rename `DDPPlugin` to `DDPStrategy` (#11142) * Raname DDPPlugin to DDPStrategy * Change ddp_plugin to ddp_strategy * update changelog * rename occurences in docs * rename more occurrences * fix line too long * more fixes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-21 08:55:51 +00:00			`strategy=ddp_strategy,`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`)`
check trainerfn == FITTING before configuring sync_batchnorm (#11919) Co-authored-by: edward-io <me@edward.io> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Aki Nitta <nitta@akihironitta.com> 2022-03-12 03:52:59 +00:00			`# test do not wrap the model if TrainerFn is not fitting`
3/n Simplify spawn plugins: Merge `pre_dispatch` and `setup` logic (#11137) 2021-12-20 16:41:22 +00:00			`trainer.state.fn = TrainerFn.VALIDATING`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`trainer.strategy.connect(model)`
3/n Simplify spawn plugins: Merge `pre_dispatch` and `setup` logic (#11137) 2021-12-20 16:41:22 +00:00			`trainer.lightning_module.trainer = trainer`
Deprecate Trainer.training_type_plugin in favor of trainer.strategy (#11141) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-22 02:11:43 +00:00			`trainer.strategy.setup_environment()`
			`trainer.strategy.setup(trainer)`
Rename `DDPPlugin` to `DDPStrategy` (#11142) * Raname DDPPlugin to DDPStrategy * Change ddp_plugin to ddp_strategy * update changelog * rename occurences in docs * rename more occurrences * fix line too long * more fixes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2021-12-21 08:55:51 +00:00			`# in DDPStrategy configure_ddp(), model are still LightningModule`
Avoid wrapping LightningModule in DDP plugins when not fitting (#9096) * Avoid wrapping LightningModule in DDP plugins when not fitting * Avoid wrapping LightningModule in DDP plugins when not fitting 2021-09-02 02:23:59 +00:00			`assert isinstance(trainer.model, LightningModule)`
check trainerfn == FITTING before configuring sync_batchnorm (#11919) Co-authored-by: edward-io <me@edward.io> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Aki Nitta <nitta@akihironitta.com> 2022-03-12 03:52:59 +00:00

			`@RunIf(min_gpus=1)`
			`@pytest.mark.parametrize(`
			`"trainer_fn", (TrainerFn.VALIDATING, TrainerFn.TUNING, TrainerFn.TESTING, TrainerFn.PREDICTING)`
			`)`
			`def test_ddp_dont_configure_sync_batchnorm(trainer_fn):`
			`model = BoringModelGPU()`
			`model.layer = torch.nn.BatchNorm1d(10)`
			`ddp_strategy = DDPStrategy()`
Update tests in `strategies` directory in preparation for #11040 (#12467) 2022-03-27 16:13:48 +00:00			`trainer = Trainer(accelerator="gpu", devices=1, strategy=ddp_strategy, sync_batchnorm=True)`
check trainerfn == FITTING before configuring sync_batchnorm (#11919) Co-authored-by: edward-io <me@edward.io> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Aki Nitta <nitta@akihironitta.com> 2022-03-12 03:52:59 +00:00			`trainer.state.fn = trainer_fn`
			`trainer.strategy.connect(model)`
			`trainer.lightning_module.trainer = trainer`
			`trainer.strategy.setup_environment()`
			`assert isinstance(trainer.model, LightningModule)`
			`trainer.strategy.setup(trainer)`
			`# because TrainerFn is not FITTING, model is not configured with sync batchnorm`
			`assert not isinstance(trainer.strategy.model.layer, torch.nn.modules.batchnorm.SyncBatchNorm)`
Do not configure launcher if processes are launched externally (#12431) 2022-03-24 09:40:34 +00:00

			`def test_configure_launcher_create_processes_externally():`
			`class MyClusterEnvironment(ClusterEnvironment):`
			`@property`
			`def creates_processes_externally(self):`
			`return True`

			`@property`
			`def main_address(self):`
			`return ""`

			`@property`
			`def main_port(self):`
			`return 8080`

			`@staticmethod`
			`def detect():`
			`return True`

			`def world_size(self):`
			`return 1`

			`def set_world_size(self):`
			`pass`

			`def global_rank(self):`
			`return 0`

			`def set_global_rank(self):`
			`pass`

			`def local_rank(self):`
			`return 0`

			`def node_rank(self):`
			`return 0`

			`ddp_strategy = DDPStrategy(cluster_environment=MyClusterEnvironment())`
			`assert ddp_strategy.launcher is None`
			`ddp_strategy._configure_launcher()`
			`assert ddp_strategy.launcher is None`