lightning/tests/plugins/environments/test_kubeflow_environment.py

101 lines
3.0 KiB
Python
Raw Normal View History

Add LSF support (#5102) * add ClusterEnvironment for LSF systems * update init file * add available cluster environments * clean up LSFEnvironment * add ddp_hpc as a distributed backend * clean up SLURMEnvironment * remove extra blank line * init device for DDPHPCAccelerator We need to do this so we don't send the model to the same device from multiple ranks * committing current state * add additional methods to ClusterEnvironments * add NVIDIA mixin for setting up CUDA envars * remove troubleshooting prints * cleanup SLURMEnvironment * fix docstring * cleanup TorchElasticEnvironment and add documentation * PEP8 puts a cork in it * add set_ranks_to_trainer * remove unused import * move to new location * update LSF environment * remove mixin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * changelog * reset slurm env * add tests * add licence * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test node_rank * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add lsf env to docs * add auto detection for lsf environment * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix is_using_lsf() and test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2021-07-09 14:14:26 +00:00
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
from unittest import mock
import pytest
from pytorch_lightning.plugins.environments import KubeflowEnvironment
@mock.patch.dict(os.environ, {})
def test_default_attributes():
""" Test the default attributes when no environment variables are set. """
env = KubeflowEnvironment()
assert env.creates_children()
with pytest.raises(KeyError):
# MASTER_ADDR is required
env.master_address()
with pytest.raises(KeyError):
# MASTER_PORT is required
env.master_port()
with pytest.raises(KeyError):
# WORLD_SIZE is required
env.world_size()
with pytest.raises(KeyError):
# RANK is required
env.global_rank()
assert env.local_rank() == 0
@mock.patch.dict(
os.environ, {
"KUBERNETES_PORT": "tcp://127.0.0.1:443",
"MASTER_ADDR": "1.2.3.4",
"MASTER_PORT": "500",
"WORLD_SIZE": "20",
"RANK": "1",
}
)
def test_attributes_from_environment_variables(caplog):
""" Test that the torchelastic cluster environment takes the attributes from the environment variables. """
env = KubeflowEnvironment()
assert env.master_address() == "1.2.3.4"
assert env.master_port() == 500
assert env.world_size() == 20
assert env.global_rank() == 1
assert env.local_rank() == 0
assert env.node_rank() == 1
# setter should be no-op
with caplog.at_level(logging.DEBUG, logger="pytorch_lightning.plugins.environments"):
env.set_global_rank(100)
assert env.global_rank() == 1
assert "setting global rank is not allowed" in caplog.text
caplog.clear()
with caplog.at_level(logging.DEBUG, logger="pytorch_lightning.plugins.environments"):
env.set_world_size(100)
assert env.world_size() == 20
assert "setting world size is not allowed" in caplog.text
@mock.patch.dict(
os.environ, {
"KUBERNETES_PORT": "tcp://127.0.0.1:443",
"MASTER_ADDR": "1.2.3.4",
"MASTER_PORT": "500",
"WORLD_SIZE": "20",
"RANK": "1",
}
)
def test_is_using_kubeflow():
assert KubeflowEnvironment.is_using_kubeflow()
@mock.patch.dict(
os.environ, {
"KUBERNETES_PORT": "tcp://127.0.0.1:443",
"MASTER_ADDR": "1.2.3.4",
"MASTER_PORT": "500",
"WORLD_SIZE": "20",
"RANK": "1",
"GROUP_RANK": "1",
}
)
def test_is_using_kubeflow_torchelastic():
assert not KubeflowEnvironment.is_using_kubeflow()