refactor imports of optional dependencies (#4859)
* refactor imports of optional dependencies * fix * fix * fix * fix * fix * flake8 * flake8 Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> Co-authored-by: chaton <thomas@grid.ai>
This commit is contained in:
parent
02ac3aba05
commit
3976db597d
|
@ -15,21 +15,18 @@ import os
|
|||
|
||||
import torch
|
||||
|
||||
from pytorch_lightning.utilities import HOROVOD_AVAILABLE
|
||||
from pytorch_lightning import _logger as log
|
||||
from pytorch_lightning import accelerators
|
||||
from pytorch_lightning.accelerators.accelerator import Accelerator
|
||||
from pytorch_lightning.cluster_environments.slurm_environment import SLURMEnvironment
|
||||
from pytorch_lightning.cluster_environments.torchelastic_environment import TorchElasticEnvironment
|
||||
from pytorch_lightning.utilities import XLA_AVAILABLE, device_parser, rank_zero_only, TPU_AVAILABLE
|
||||
from pytorch_lightning.utilities import device_parser, rank_zero_only, TPU_AVAILABLE
|
||||
from pytorch_lightning.utilities.distributed import rank_zero_info, rank_zero_warn
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
|
||||
try:
|
||||
if HOROVOD_AVAILABLE:
|
||||
import horovod.torch as hvd
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
HOROVOD_AVAILABLE = False
|
||||
else:
|
||||
HOROVOD_AVAILABLE = True
|
||||
|
||||
|
||||
class AcceleratorConnector:
|
||||
|
|
|
@ -18,15 +18,11 @@ import torch
|
|||
from torch.optim.lr_scheduler import _LRScheduler
|
||||
|
||||
from pytorch_lightning.accelerators.accelerator import Accelerator, ReduceOp
|
||||
from pytorch_lightning.utilities import AMPType
|
||||
from pytorch_lightning.utilities import AMPType, HOROVOD_AVAILABLE
|
||||
from pytorch_lightning.utilities.distributed import rank_zero_only
|
||||
|
||||
try:
|
||||
if HOROVOD_AVAILABLE:
|
||||
import horovod.torch as hvd
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
HOROVOD_AVAILABLE = False
|
||||
else:
|
||||
HOROVOD_AVAILABLE = True
|
||||
|
||||
|
||||
class HorovodAccelerator(Accelerator):
|
||||
|
|
|
@ -1670,7 +1670,7 @@ class LightningModule(
|
|||
line = re.sub(r"\s+", "", line, flags=re.UNICODE)
|
||||
if ".hparams=" in line:
|
||||
return line.split("=")[1]
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return "hparams"
|
||||
|
||||
return None
|
||||
|
|
|
@ -16,14 +16,14 @@ import multiprocessing
|
|||
import platform
|
||||
from abc import ABC
|
||||
from copy import deepcopy
|
||||
from typing import Callable, Iterable, List, Optional, Tuple, Union
|
||||
from typing import Union, List, Tuple, Callable, Optional, Iterable
|
||||
|
||||
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
|
||||
from torch.utils.data.distributed import DistributedSampler
|
||||
|
||||
from pytorch_lightning.accelerators.accelerator import Accelerator
|
||||
from pytorch_lightning.core import LightningModule
|
||||
from pytorch_lightning.utilities import TPU_AVAILABLE, rank_zero_warn
|
||||
from pytorch_lightning.utilities import rank_zero_warn, TPU_AVAILABLE, HOROVOD_AVAILABLE
|
||||
from pytorch_lightning.utilities.data import has_iterable_dataset, has_len
|
||||
from pytorch_lightning.utilities.debugging import InternalDebugger
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
|
@ -32,12 +32,8 @@ from pytorch_lightning.utilities.model_utils import is_overridden
|
|||
if TPU_AVAILABLE:
|
||||
import torch_xla.core.xla_model as xm
|
||||
|
||||
try:
|
||||
if HOROVOD_AVAILABLE:
|
||||
import horovod.torch as hvd
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
HOROVOD_AVAILABLE = False
|
||||
else:
|
||||
HOROVOD_AVAILABLE = True
|
||||
|
||||
|
||||
class TrainerDataLoadingMixin(ABC):
|
||||
|
|
|
@ -47,6 +47,7 @@ APEX_AVAILABLE = _module_available("apex.amp")
|
|||
NATIVE_AMP_AVAILABLE = _module_available("torch.cuda.amp") and hasattr(torch.cuda.amp, "autocast")
|
||||
OMEGACONF_AVAILABLE = _module_available("omegaconf")
|
||||
HYDRA_AVAILABLE = _module_available("hydra")
|
||||
HOROVOD_AVAILABLE = _module_available("horovod.torch")
|
||||
|
||||
TPU_AVAILABLE = XLADeviceUtils.tpu_device_exists()
|
||||
FAIRSCALE_AVAILABLE = platform.system() != 'Windows' and _module_available('fairscale.nn.data_parallel')
|
||||
|
|
|
@ -87,7 +87,7 @@ class InternalDebugger(object):
|
|||
for dl in dataloaders:
|
||||
try:
|
||||
length = len(dl)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
length = -1
|
||||
lengths.append(length)
|
||||
|
||||
|
|
|
@ -21,18 +21,19 @@ import json
|
|||
import os
|
||||
import sys
|
||||
|
||||
|
||||
try:
|
||||
import horovod.torch as hvd
|
||||
except ImportError:
|
||||
print('You requested to import Horovod which is missing or not supported for your OS.')
|
||||
|
||||
PATH_HERE = os.path.abspath(os.path.dirname(__file__))
|
||||
PATH_ROOT = os.path.abspath(os.path.join(PATH_HERE, '..', '..', '..', '..'))
|
||||
sys.path.insert(0, os.path.abspath(PATH_ROOT))
|
||||
|
||||
from pytorch_lightning import Trainer # noqa: E402
|
||||
from pytorch_lightning.callbacks import ModelCheckpoint # noqa: E402
|
||||
from pytorch_lightning.utilities import HOROVOD_AVAILABLE # noqa: E402
|
||||
|
||||
if HOROVOD_AVAILABLE:
|
||||
import horovod.torch as hvd # noqa: E402
|
||||
else:
|
||||
print('You requested to import Horovod which is missing or not supported for your OS.')
|
||||
|
||||
|
||||
# Move project root to the front of the search path, as some imports may have reordered things
|
||||
idx = sys.path.index(PATH_ROOT)
|
||||
|
|
|
@ -29,33 +29,25 @@ from pytorch_lightning import Trainer
|
|||
from pytorch_lightning.accelerators.horovod_accelerator import HorovodAccelerator
|
||||
from pytorch_lightning.core.step_result import EvalResult, Result, TrainResult
|
||||
from pytorch_lightning.metrics.classification.accuracy import Accuracy
|
||||
from pytorch_lightning.utilities import APEX_AVAILABLE, NATIVE_AMP_AVAILABLE
|
||||
from pytorch_lightning.utilities import APEX_AVAILABLE, NATIVE_AMP_AVAILABLE, HOROVOD_AVAILABLE, _module_available
|
||||
from tests.base import EvalModelTemplate
|
||||
from tests.base.boring_model import BoringModel
|
||||
from tests.base.models import BasicGAN
|
||||
|
||||
try:
|
||||
if HOROVOD_AVAILABLE:
|
||||
import horovod
|
||||
from horovod.common.util import nccl_built
|
||||
except ImportError:
|
||||
HOROVOD_AVAILABLE = False
|
||||
else:
|
||||
HOROVOD_AVAILABLE = True
|
||||
|
||||
import horovod.torch as hvd
|
||||
|
||||
# This script will run the actual test model training in parallel
|
||||
TEST_SCRIPT = os.path.join(os.path.dirname(__file__), 'data', 'horovod', 'train_default_model.py')
|
||||
|
||||
|
||||
def _nccl_available():
|
||||
if not HOROVOD_AVAILABLE:
|
||||
return False
|
||||
|
||||
try:
|
||||
return nccl_built()
|
||||
except AttributeError:
|
||||
# Horovod 0.19.1 nccl_built() does not yet work with Python 3.8:
|
||||
# See: https://github.com/horovod/horovod/issues/1891
|
||||
return False
|
||||
try:
|
||||
from horovod.common.util import nccl_built
|
||||
nccl_built()
|
||||
except (ImportError, ModuleNotFoundError, AttributeError):
|
||||
HOROVOD_NCCL_AVAILABLE = False
|
||||
finally:
|
||||
HOROVOD_NCCL_AVAILABLE = True
|
||||
|
||||
|
||||
def _run_horovod(trainer_options, on_gpu=False):
|
||||
|
@ -114,7 +106,7 @@ def test_horovod_cpu_implicit(enable_pl_optimizer, tmpdir):
|
|||
|
||||
|
||||
@pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows")
|
||||
@pytest.mark.skipif(not _nccl_available(), reason="test requires Horovod with NCCL support")
|
||||
@pytest.mark.skipif(not HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support")
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
def test_horovod_multi_gpu(tmpdir):
|
||||
"""Test Horovod with multi-GPU support."""
|
||||
|
@ -134,7 +126,7 @@ def test_horovod_multi_gpu(tmpdir):
|
|||
|
||||
|
||||
@pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows")
|
||||
@pytest.mark.skipif(not _nccl_available(), reason="test requires Horovod with NCCL support")
|
||||
@pytest.mark.skipif(not HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support")
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
@pytest.mark.skipif(not APEX_AVAILABLE, reason="test requires apex")
|
||||
def test_horovod_apex(tmpdir):
|
||||
|
@ -158,7 +150,7 @@ def test_horovod_apex(tmpdir):
|
|||
|
||||
@pytest.mark.skip(reason="Skip till Horovod fixes integration with Native torch.cuda.amp")
|
||||
@pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows")
|
||||
@pytest.mark.skipif(not _nccl_available(), reason="test requires Horovod with NCCL support")
|
||||
@pytest.mark.skipif(not HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support")
|
||||
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
|
||||
@pytest.mark.skipif(not NATIVE_AMP_AVAILABLE, reason="test requires torch.cuda.amp")
|
||||
def test_horovod_amp(tmpdir):
|
||||
|
@ -181,7 +173,7 @@ def test_horovod_amp(tmpdir):
|
|||
|
||||
|
||||
@pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows")
|
||||
@pytest.mark.skipif(not _nccl_available(), reason="test requires Horovod with NCCL support")
|
||||
@pytest.mark.skipif(not HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support")
|
||||
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
|
||||
def test_horovod_transfer_batch_to_gpu(tmpdir):
|
||||
class TestTrainingStepModel(EvalModelTemplate):
|
||||
|
@ -263,10 +255,6 @@ def test_result_reduce_horovod(enable_pl_optimizer, tmpdir):
|
|||
path_root = os.path.abspath(os.path.join(path_here, '..', '..'))
|
||||
sys.path.insert(0, os.path.abspath(path_root))
|
||||
|
||||
import horovod.torch as hvd
|
||||
|
||||
from tests.base.boring_model import BoringModel
|
||||
|
||||
class TestModel(BoringModel):
|
||||
def training_step(self, batch, batch_idx):
|
||||
self.training_step_called = True
|
||||
|
@ -318,8 +306,6 @@ def test_accuracy_metric_horovod():
|
|||
target = torch.randint(high=2, size=(num_batches, batch_size))
|
||||
|
||||
def _compute_batch():
|
||||
import horovod.torch as hvd
|
||||
|
||||
trainer = Trainer(
|
||||
fast_dev_run=True,
|
||||
distributed_backend='horovod',
|
||||
|
|
Loading…
Reference in New Issue