2021-04-16 12:31:56 +00:00
|
|
|
# Copyright The Lightning AI team.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2023-02-20 10:01:06 +00:00
|
|
|
from unittest import mock
|
|
|
|
|
2021-04-16 12:31:56 +00:00
|
|
|
import pytest
|
|
|
|
|
2023-02-02 10:06:45 +00:00
|
|
|
from lightning.pytorch import Trainer
|
|
|
|
from lightning.pytorch.plugins import CheckpointIO
|
2023-02-27 19:43:23 +00:00
|
|
|
from lightning.pytorch.strategies import DDPStrategy, DeepSpeedStrategy, FSDPStrategy, StrategyRegistry, XLAStrategy
|
2022-06-15 22:10:49 +00:00
|
|
|
from tests_pytorch.helpers.runif import RunIf
|
2021-04-16 12:31:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
2022-01-05 11:44:18 +00:00
|
|
|
"strategy_name, init_params",
|
2021-04-16 12:31:56 +00:00
|
|
|
[
|
|
|
|
("deepspeed", {}),
|
2021-08-18 14:10:19 +00:00
|
|
|
("deepspeed_stage_1", {"stage": 1}),
|
2021-07-26 11:37:35 +00:00
|
|
|
("deepspeed_stage_2", {"stage": 2}),
|
|
|
|
("deepspeed_stage_2_offload", {"stage": 2, "offload_optimizer": True}),
|
|
|
|
("deepspeed_stage_3", {"stage": 3}),
|
|
|
|
("deepspeed_stage_3_offload", {"stage": 3, "offload_parameters": True, "offload_optimizer": True}),
|
2021-04-16 12:31:56 +00:00
|
|
|
],
|
|
|
|
)
|
2022-01-05 11:44:18 +00:00
|
|
|
def test_strategy_registry_with_deepspeed_strategies(strategy_name, init_params):
|
2021-04-16 12:31:56 +00:00
|
|
|
|
2022-01-05 11:44:18 +00:00
|
|
|
assert strategy_name in StrategyRegistry
|
|
|
|
assert StrategyRegistry[strategy_name]["init_params"] == init_params
|
|
|
|
assert StrategyRegistry[strategy_name]["strategy"] == DeepSpeedStrategy
|
2021-04-16 12:31:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
@RunIf(deepspeed=True)
|
2022-01-05 11:44:18 +00:00
|
|
|
@pytest.mark.parametrize("strategy", ["deepspeed", "deepspeed_stage_2_offload", "deepspeed_stage_3"])
|
|
|
|
def test_deepspeed_strategy_registry_with_trainer(tmpdir, strategy):
|
2021-04-16 12:31:56 +00:00
|
|
|
|
2023-02-17 16:58:14 +00:00
|
|
|
trainer = Trainer(default_root_dir=tmpdir, strategy=strategy, precision="16-mixed")
|
2021-04-16 12:31:56 +00:00
|
|
|
|
2021-12-22 02:11:43 +00:00
|
|
|
assert isinstance(trainer.strategy, DeepSpeedStrategy)
|
2021-05-04 22:40:00 +00:00
|
|
|
|
|
|
|
|
2022-07-22 16:05:35 +00:00
|
|
|
@RunIf(skip_windows=True)
|
2023-02-20 10:01:06 +00:00
|
|
|
@mock.patch("lightning.pytorch.strategies.xla.XLAStrategy.set_world_ranks")
|
|
|
|
def test_xla_debug_strategy_registry(_, tpu_available, xla_available):
|
2023-02-17 02:06:24 +00:00
|
|
|
strategy = "xla_debug"
|
2021-06-15 22:32:51 +00:00
|
|
|
|
2022-01-05 11:44:18 +00:00
|
|
|
assert strategy in StrategyRegistry
|
|
|
|
assert StrategyRegistry[strategy]["init_params"] == {"debug": True}
|
2023-02-17 02:06:24 +00:00
|
|
|
assert StrategyRegistry[strategy]["strategy"] == XLAStrategy
|
2021-06-15 22:32:51 +00:00
|
|
|
|
2022-01-05 11:44:18 +00:00
|
|
|
trainer = Trainer(strategy=strategy)
|
2023-02-17 02:06:24 +00:00
|
|
|
assert isinstance(trainer.strategy, XLAStrategy)
|
2021-07-24 04:02:54 +00:00
|
|
|
|
|
|
|
|
2023-01-23 13:39:04 +00:00
|
|
|
@RunIf(min_torch="1.12")
|
|
|
|
def test_fsdp_strategy_registry(cuda_count_1):
|
2023-01-25 14:09:09 +00:00
|
|
|
strategy = "fsdp"
|
2022-01-05 11:44:18 +00:00
|
|
|
assert strategy in StrategyRegistry
|
2023-01-25 14:09:09 +00:00
|
|
|
assert StrategyRegistry[strategy]["strategy"] == FSDPStrategy
|
2021-11-05 09:42:58 +00:00
|
|
|
|
2023-01-23 13:39:04 +00:00
|
|
|
trainer = Trainer(accelerator="cuda", strategy=strategy)
|
2023-01-25 14:09:09 +00:00
|
|
|
assert isinstance(trainer.strategy, FSDPStrategy)
|
2021-11-05 09:42:58 +00:00
|
|
|
|
|
|
|
|
2021-07-24 04:02:54 +00:00
|
|
|
@pytest.mark.parametrize(
|
2022-07-22 16:05:35 +00:00
|
|
|
"strategy_name, strategy, expected_init_params",
|
2021-07-24 04:02:54 +00:00
|
|
|
[
|
2022-07-22 16:05:35 +00:00
|
|
|
(
|
|
|
|
"ddp_find_unused_parameters_false",
|
|
|
|
DDPStrategy,
|
2023-02-27 19:43:23 +00:00
|
|
|
{"find_unused_parameters": False, "start_method": "popen"},
|
2022-07-22 16:05:35 +00:00
|
|
|
),
|
2023-02-06 15:51:21 +00:00
|
|
|
(
|
|
|
|
"ddp_find_unused_parameters_true",
|
|
|
|
DDPStrategy,
|
2023-02-27 19:43:23 +00:00
|
|
|
{"find_unused_parameters": True, "start_method": "popen"},
|
2023-02-06 15:51:21 +00:00
|
|
|
),
|
2022-07-22 16:05:35 +00:00
|
|
|
(
|
|
|
|
"ddp_spawn_find_unused_parameters_false",
|
2023-02-27 19:43:23 +00:00
|
|
|
DDPStrategy,
|
2022-07-22 16:05:35 +00:00
|
|
|
{"find_unused_parameters": False, "start_method": "spawn"},
|
|
|
|
),
|
2023-02-06 15:51:21 +00:00
|
|
|
(
|
|
|
|
"ddp_spawn_find_unused_parameters_true",
|
2023-02-27 19:43:23 +00:00
|
|
|
DDPStrategy,
|
2023-02-06 15:51:21 +00:00
|
|
|
{"find_unused_parameters": True, "start_method": "spawn"},
|
|
|
|
),
|
2022-07-22 16:05:35 +00:00
|
|
|
pytest.param(
|
|
|
|
"ddp_fork_find_unused_parameters_false",
|
2023-02-27 19:43:23 +00:00
|
|
|
DDPStrategy,
|
2022-07-22 16:05:35 +00:00
|
|
|
{"find_unused_parameters": False, "start_method": "fork"},
|
|
|
|
marks=RunIf(skip_windows=True),
|
|
|
|
),
|
2023-02-06 15:51:21 +00:00
|
|
|
pytest.param(
|
|
|
|
"ddp_fork_find_unused_parameters_true",
|
2023-02-27 19:43:23 +00:00
|
|
|
DDPStrategy,
|
2023-02-06 15:51:21 +00:00
|
|
|
{"find_unused_parameters": True, "start_method": "fork"},
|
|
|
|
marks=RunIf(skip_windows=True),
|
|
|
|
),
|
2022-07-23 13:06:35 +00:00
|
|
|
pytest.param(
|
|
|
|
"ddp_notebook_find_unused_parameters_false",
|
2023-02-27 19:43:23 +00:00
|
|
|
DDPStrategy,
|
2022-07-23 13:06:35 +00:00
|
|
|
{"find_unused_parameters": False, "start_method": "fork"},
|
|
|
|
marks=RunIf(skip_windows=True),
|
|
|
|
),
|
2023-02-06 15:51:21 +00:00
|
|
|
pytest.param(
|
|
|
|
"ddp_notebook_find_unused_parameters_true",
|
2023-02-27 19:43:23 +00:00
|
|
|
DDPStrategy,
|
2023-02-06 15:51:21 +00:00
|
|
|
{"find_unused_parameters": True, "start_method": "fork"},
|
|
|
|
marks=RunIf(skip_windows=True),
|
|
|
|
),
|
2021-07-24 04:02:54 +00:00
|
|
|
],
|
|
|
|
)
|
2023-02-06 15:51:21 +00:00
|
|
|
def test_ddp_find_unused_parameters_strategy_registry(
|
|
|
|
tmpdir, strategy_name, strategy, expected_init_params, mps_count_0
|
|
|
|
):
|
2023-01-23 13:39:04 +00:00
|
|
|
trainer = Trainer(default_root_dir=tmpdir, strategy=strategy_name)
|
2022-01-05 11:44:18 +00:00
|
|
|
assert isinstance(trainer.strategy, strategy)
|
|
|
|
assert strategy_name in StrategyRegistry
|
2022-07-22 16:05:35 +00:00
|
|
|
assert StrategyRegistry[strategy_name]["init_params"] == expected_init_params
|
2022-01-05 11:44:18 +00:00
|
|
|
assert StrategyRegistry[strategy_name]["strategy"] == strategy
|
2021-10-29 04:06:06 +00:00
|
|
|
|
|
|
|
|
2022-01-05 11:44:18 +00:00
|
|
|
def test_custom_registered_strategy_to_strategy_flag():
|
2021-10-29 04:06:06 +00:00
|
|
|
class CustomCheckpointIO(CheckpointIO):
|
|
|
|
def save_checkpoint(self, checkpoint, path):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def load_checkpoint(self, path):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def remove_checkpoint(self, path):
|
|
|
|
pass
|
|
|
|
|
|
|
|
custom_checkpoint_io = CustomCheckpointIO()
|
|
|
|
|
2021-12-21 08:55:51 +00:00
|
|
|
# Register the DDP Strategy with your custom CheckpointIO plugin
|
2022-01-05 11:44:18 +00:00
|
|
|
StrategyRegistry.register(
|
2021-10-29 04:06:06 +00:00
|
|
|
"ddp_custom_checkpoint_io",
|
2021-12-21 08:55:51 +00:00
|
|
|
DDPStrategy,
|
|
|
|
description="DDP Strategy with custom checkpoint io plugin",
|
2021-10-29 04:06:06 +00:00
|
|
|
checkpoint_io=custom_checkpoint_io,
|
|
|
|
)
|
|
|
|
trainer = Trainer(strategy="ddp_custom_checkpoint_io", accelerator="cpu", devices=2)
|
|
|
|
|
2021-12-22 02:11:43 +00:00
|
|
|
assert isinstance(trainer.strategy, DDPStrategy)
|
|
|
|
assert trainer.strategy.checkpoint_io == custom_checkpoint_io
|