From 0435e23a64c32ce3888b4d12a8c117eea2d4cf01 Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 8 Jan 2021 21:13:12 +0000 Subject: [PATCH] deprecate enable_pl_optimizer as it is not restored properly (#5244) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update * clean test * still in progress * udpdate test * update * update * resolve flake * add test for zero_grad * update * works without accumulated_grad * update * update * resolve amp * revert back to True * update * clean tests * cleaned out * typo * update test * git repare bug * remove print * udpate * Fix formatting/optimizer imports * Refactor the test for cleanliness * Add vanilla model to the test, better var names * Fixed var names, let's clean up these mock tests * repare test * update test * resolve flake8 * add manual_optimization * update tests * resolve flake8 * add random accumulate_grad_batches * improve test * Update tests/trainer/optimization/test_parity_automatic_optimization.py Co-authored-by: Jirka Borovec * Update tests/trainer/optimization/test_parity_automatic_optimization.py Co-authored-by: Jirka Borovec * update * clean tests * correct bug * Apply suggestions from code review * format * adress comments * update on comments * wip * typo * depreceate enable_pl_optimizer * resolve latest bugs * update * resolve merge * add comment * Update pytorch_lightning/core/lightning.py Co-authored-by: Jirka Borovec * Update tests/deprecated_api/test_remove_1-3.py Co-authored-by: Jirka Borovec * Update pytorch_lightning/trainer/connectors/optimizer_connector.py Co-authored-by: Jirka Borovec * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jirka Borovec * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jirka Borovec * Update tests/trainer/optimization/test_parity_automatic_optimization.py Co-authored-by: Jirka Borovec * update on comments * update restore * add a property * remove setstate as not needed anymore * update test * provide optimizer to on_before_zero_grad * update on comments * update on comments * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Adrian Wälchli * Update tests/trainer/optimization/test_parity_automatic_optimization.py Co-authored-by: Adrian Wälchli * Update tests/trainer/optimization/test_parity_automatic_optimization.py Co-authored-by: Adrian Wälchli * Update tests/trainer/optimization/test_parity_automatic_optimization.py Co-authored-by: Adrian Wälchli * mofidy import * update changelog * resolve flake8 * update * update * clean doc Co-authored-by: SeanNaren Co-authored-by: Ubuntu Co-authored-by: Jirka Borovec Co-authored-by: Jirka Borovec Co-authored-by: Adrian Wälchli Co-authored-by: Sean Naren (cherry picked from commit f2e99d617f05ec65fded81ccc6d0d59807c47573) --- CHANGELOG.md | 2 + README.md | 3 +- benchmarks/test_sharded_parity.py | 3 +- docs/source/new-project.rst | 3 +- docs/source/optimizers.rst | 31 ++++++++++---- docs/source/trainer.rst | 6 ++- .../accelerators/cpu_accelerator.py | 2 - .../accelerators/ddp2_accelerator.py | 2 - .../accelerators/ddp_accelerator.py | 4 +- .../accelerators/ddp_cpu_spawn_accelerator.py | 2 - .../accelerators/ddp_hpc_accelerator.py | 4 +- .../accelerators/ddp_spawn_accelerator.py | 2 - .../accelerators/dp_accelerator.py | 2 - .../accelerators/gpu_accelerator.py | 2 - .../accelerators/horovod_accelerator.py | 2 - .../accelerators/tpu_accelerator.py | 3 +- pytorch_lightning/core/lightning.py | 7 +++- pytorch_lightning/core/optimizer.py | 16 ++++++-- .../plugins/ddp_sequential_plugin.py | 4 +- pytorch_lightning/plugins/native_amp.py | 5 ++- pytorch_lightning/plugins/sharded_plugin.py | 3 +- .../trainer/configuration_validator.py | 17 -------- .../trainer/connectors/optimizer_connector.py | 6 ++- .../trainer/connectors/precision_connector.py | 1 - pytorch_lightning/trainer/optimizers.py | 7 +++- pytorch_lightning/trainer/properties.py | 22 +++++----- pytorch_lightning/trainer/trainer.py | 5 ++- pytorch_lightning/trainer/training_loop.py | 3 ++ tests/callbacks/test_callbacks.py | 2 - tests/checkpointing/test_model_checkpoint.py | 9 +---- tests/checkpointing/test_torch_saving.py | 6 +-- tests/core/test_lightning_module.py | 12 +----- tests/core/test_lightning_optimizer.py | 12 +----- tests/deprecated_api/test_remove_1-3.py | 5 +++ tests/models/test_amp.py | 8 ++-- tests/models/test_cpu.py | 40 ++++++++++++------- tests/models/test_horovod.py | 22 ++++------ tests/models/test_restore.py | 21 ++++------ .../optimization/test_manual_optimization.py | 40 ------------------- tests/trainer/optimization/test_optimizers.py | 5 +-- tests/trainer/test_trainer.py | 19 ++++----- 41 files changed, 156 insertions(+), 214 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0e373ba78..fc7b105354 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -130,6 +130,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed +- Changed depreceated `enable_pl_optimizer=True` ([#5244](https://github.com/PyTorchLightning/pytorch-lightning/pull/5244)) + ### Deprecated diff --git a/README.md b/README.md index 29abbd5aa5..402b494414 100644 --- a/README.md +++ b/README.md @@ -224,7 +224,8 @@ with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as tmpfile: ```python class LitAutoEncoder(pl.LightningModule): def training_step(self, batch, batch_idx, opt_idx): - (opt_a, opt_b) = self.optimizers() + # access your optimizers with use_pl_optimizer=False. Default is True + (opt_a, opt_b) = self.optimizers(use_pl_optimizer=True) loss_a = ... self.manual_backward(loss_a, opt_a) diff --git a/benchmarks/test_sharded_parity.py b/benchmarks/test_sharded_parity.py index 05fde8e115..0197549359 100644 --- a/benchmarks/test_sharded_parity.py +++ b/benchmarks/test_sharded_parity.py @@ -175,7 +175,8 @@ class SeedTrainLoaderModel(BoringModel): class SeedTrainLoaderManualModel(SeedTrainLoaderModel): def training_step(self, batch, batch_idx, optimizer_idx): # manual - (opt_a, opt_b) = self.optimizers() + # access your optimizers with use_pl_optimizer=False. Default is True + (opt_a, opt_b) = self.optimizers(use_pl_optimizer=True) loss_1 = self.step(batch) self.manual_backward(loss_1, opt_a) diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 41af270b79..21c8693d12 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -269,7 +269,8 @@ Now you own the train loop! .. code-block:: python def training_step(self, batch, batch_idx, opt_idx): - (opt_a, opt_b, opt_c) = self.optimizers() + # access your optimizers with use_pl_optimizer=False. Default is True + (opt_a, opt_b, opt_c) = self.optimizers(use_pl_optimizer=True) loss_a = self.generator(batch[0]) diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 5e96b5da0d..588bdefb36 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -28,8 +28,15 @@ to manually manage the optimization process. To do so, do the following: .. code-block:: python def training_step(self, batch, batch_idx, optimizer_idx): - # ignore optimizer_idx - (opt_g, opt_d) = self.optimizers() + + # 1. ignore optimizer_idx + # 2. `use_pl_optimizer=True` means `opt_g` and `opt_d` will be of type `LightingOptimizer` + # `LightingOptimizer` simply wrapped your optimizer and behave the same way ! + # When calling `optimizer.step`, `LightingOptimizer` will just handle TPU, AMP, accumulate_grad_batches, etc ... for you. + + # access your optimizers with `use_pl_optimizer=False` or `optimizer.optimizer` when using use_pl_optimizer=True + # use_pl_optimizer=True is the default + (opt_g, opt_d) = self.optimizers(use_pl_optimizer=True) # do anything you want loss_a = ... @@ -242,19 +249,29 @@ Here we add a learning-rate warm up # update params optimizer.step(closure=closure) -The default ``optimizer_step`` is relying on the internal ``LightningOptimizer`` to properly perform a step. +.. note:: The default ``optimizer_step`` is relying on the internal ``LightningOptimizer`` to properly perform a step. It handles TPUs, AMP, accumulate_grad_batches, zero_grad, and much more ... .. testcode:: - from pytorch_lightning.core.optimizer import LightningOptimizer + # function hook in LightningModule + def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): + optimizer.step(closure=closure) + +.. note:: To access your wrapped Optimizer from ``LightningOptimizer``, do as follow. + +.. testcode:: # function hook in LightningModule def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False): - if not isinstance(optimizer, LightningOptimizer): - # wraps into LightingOptimizer only for running step - optimizer = LightningOptimizer.to_lightning_optimizer(optimizer, self.trainer) + + # `optimizer is a ``LightningOptimizer`` wrapping the optimizer. + # To access it, do as follow: + optimizer = optimizer.optimizer + + # run step. However, it won't work on TPU, AMP, etc... optimizer.step(closure=closure) + ---------- Using the closure functions for optimization diff --git a/docs/source/trainer.rst b/docs/source/trainer.rst index ecbe241f9f..e55adf0e43 100644 --- a/docs/source/trainer.rst +++ b/docs/source/trainer.rst @@ -335,7 +335,8 @@ optimizer behavior Example:: def training_step(self, batch, batch_idx): - opt = self.optimizers() + # access your optimizers with use_pl_optimizer=False. Default is True + opt = self.optimizers(use_pl_optimizer=True) loss = ... self.manual_backward(loss, opt) @@ -350,7 +351,8 @@ In the multi-optimizer case, ignore the optimizer_idx flag and use the optimizer Example:: def training_step(self, batch, batch_idx, optimizer_idx): - (opt_a, opt_b) = self.optimizers() + # access your optimizers with use_pl_optimizer=False. Default is True + (opt_a, opt_b) = self.optimizers(use_pl_optimizer=True) gen_loss = ... self.manual_backward(gen_loss, opt_a) diff --git a/pytorch_lightning/accelerators/cpu_accelerator.py b/pytorch_lightning/accelerators/cpu_accelerator.py index 7c80a4a30d..3c2eac7dbb 100644 --- a/pytorch_lightning/accelerators/cpu_accelerator.py +++ b/pytorch_lightning/accelerators/cpu_accelerator.py @@ -48,8 +48,6 @@ class CPUAccelerator(Accelerator): # allow for lr schedulers as well self.setup_optimizers(model) - self.trainer.convert_to_lightning_optimizers() - self.trainer.model = model def train(self): diff --git a/pytorch_lightning/accelerators/ddp2_accelerator.py b/pytorch_lightning/accelerators/ddp2_accelerator.py index a61db38ea7..0448bf8628 100644 --- a/pytorch_lightning/accelerators/ddp2_accelerator.py +++ b/pytorch_lightning/accelerators/ddp2_accelerator.py @@ -189,8 +189,6 @@ class DDP2Accelerator(Accelerator): # 16-bit model = self.trainer.precision_connector.connect(model) - self.trainer.convert_to_lightning_optimizers() - # device ids change depending on the DDP setup device_ids = self.get_device_ids() diff --git a/pytorch_lightning/accelerators/ddp_accelerator.py b/pytorch_lightning/accelerators/ddp_accelerator.py index 081d66c79e..4f75c6844e 100644 --- a/pytorch_lightning/accelerators/ddp_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_accelerator.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License import os +from os.path import abspath import subprocess import sys -from os.path import abspath from time import sleep from typing import Any, List, Optional, Union @@ -292,8 +292,6 @@ class DDPAccelerator(Accelerator): # 16-bit model = self.trainer.precision_connector.connect(model) - self.trainer.convert_to_lightning_optimizers() - # device ids change depending on the DDP setup device_ids = self.get_device_ids() diff --git a/pytorch_lightning/accelerators/ddp_cpu_spawn_accelerator.py b/pytorch_lightning/accelerators/ddp_cpu_spawn_accelerator.py index afbdeed2b3..2820763a61 100644 --- a/pytorch_lightning/accelerators/ddp_cpu_spawn_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_cpu_spawn_accelerator.py @@ -148,8 +148,6 @@ class DDPCPUSpawnAccelerator(Accelerator): # 16-bit model = self.trainer.precision_connector.connect(model) - self.trainer.convert_to_lightning_optimizers() - # DDP spawn already spawned off each process... no need to do anything device_ids = self.get_device_ids() diff --git a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py index c708c5e106..ad953da6d1 100644 --- a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py @@ -14,8 +14,8 @@ from typing import Any, List, Optional, Union import torch -import torch.distributed as torch_distrib import torch.distributed as dist +import torch.distributed as torch_distrib from torch.nn.parallel import DistributedDataParallel from pytorch_lightning import _logger as log @@ -177,8 +177,6 @@ class DDPHPCAccelerator(Accelerator): # 16-bit model = self.trainer.precision_connector.connect(model) - self.trainer.convert_to_lightning_optimizers() - # device ids change depending on the DDP setup device_ids = self.get_device_ids() diff --git a/pytorch_lightning/accelerators/ddp_spawn_accelerator.py b/pytorch_lightning/accelerators/ddp_spawn_accelerator.py index 3cd79700ef..2ff5fa0cc0 100644 --- a/pytorch_lightning/accelerators/ddp_spawn_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_spawn_accelerator.py @@ -163,8 +163,6 @@ class DDPSpawnAccelerator(Accelerator): # 16-bit model = self.trainer.precision_connector.connect(model) - self.trainer.convert_to_lightning_optimizers() - # device ids change depending on the DDP setup device_ids = self.get_device_ids() diff --git a/pytorch_lightning/accelerators/dp_accelerator.py b/pytorch_lightning/accelerators/dp_accelerator.py index 847d156d4f..8eb1b199a6 100644 --- a/pytorch_lightning/accelerators/dp_accelerator.py +++ b/pytorch_lightning/accelerators/dp_accelerator.py @@ -64,8 +64,6 @@ class DataParallelAccelerator(Accelerator): if self.trainer.amp_backend: model = self.__init_half_precision(model) - self.trainer.convert_to_lightning_optimizers() - self.trainer.model = model def __init_torch_data_parallel(self, model): diff --git a/pytorch_lightning/accelerators/gpu_accelerator.py b/pytorch_lightning/accelerators/gpu_accelerator.py index 2fe3b26679..62486f04a5 100644 --- a/pytorch_lightning/accelerators/gpu_accelerator.py +++ b/pytorch_lightning/accelerators/gpu_accelerator.py @@ -53,8 +53,6 @@ class GPUAccelerator(Accelerator): # 16-bit model = self.trainer.precision_connector.connect(model) - self.trainer.convert_to_lightning_optimizers() - self.trainer.model = model def train(self): diff --git a/pytorch_lightning/accelerators/horovod_accelerator.py b/pytorch_lightning/accelerators/horovod_accelerator.py index 150be86210..57f39125c6 100644 --- a/pytorch_lightning/accelerators/horovod_accelerator.py +++ b/pytorch_lightning/accelerators/horovod_accelerator.py @@ -90,8 +90,6 @@ class HorovodAccelerator(Accelerator): # 16-bit model = self.trainer.precision_connector.connect(model) - self.trainer.convert_to_lightning_optimizers() - # Update logger rank info from Horovod to avoid race conditions from different ranks # creating directories / writing files in the same locations. self.trainer.global_rank = hvd.rank() diff --git a/pytorch_lightning/accelerators/tpu_accelerator.py b/pytorch_lightning/accelerators/tpu_accelerator.py index 66fc236a2a..74b30eff0b 100644 --- a/pytorch_lightning/accelerators/tpu_accelerator.py +++ b/pytorch_lightning/accelerators/tpu_accelerator.py @@ -30,6 +30,7 @@ from pytorch_lightning.utilities import ( rank_zero_info, rank_zero_only, rank_zero_warn, + TPU_AVAILABLE, ) from pytorch_lightning.utilities.cloud_io import atomic_save from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -229,8 +230,6 @@ class TPUAccelerator(Accelerator): f' global rank: {trainer.tpu_global_core_rank}' f' with XLA_USE_BF16={os.environ.get("XLA_USE_BF16")}') - self.trainer.convert_to_lightning_optimizers() - def backward(self, closure_loss, optimizer, opt_idx, *args, **kwargs): # do backward pass if self.trainer.train_loop.automatic_optimization: diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 15f33d526d..ce01f8ff28 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -104,8 +104,11 @@ class LightningModule( self._current_dataloader_idx = None self._automatic_optimization: bool = True - def optimizers(self): - opts = self.trainer.optimizers + def optimizers(self, use_pl_optimizer: bool = True) -> Union[Optimizer, List[Optimizer], List[LightningOptimizer]]: + if use_pl_optimizer: + opts = list(self.trainer.lightning_optimizers.values()) + else: + opts = self.trainer.optimizers # single optimizer if isinstance(opts, list) and len(opts) == 1 and isinstance(opts[0], Optimizer): diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index acba35d9ae..b7f0c610c3 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -63,6 +63,10 @@ class LightningOptimizer: self._accumulate_grad_batches = accumulate_grad_batches self._optimizer_idx = None + @property + def optimizer(self): + return self._optimizer + @property def defaults(self): return self._optimizer.defaults @@ -103,9 +107,13 @@ class LightningOptimizer: break @classmethod - def to_lightning_optimizer(cls, optimizer, trainer): - optimizer = cls(optimizer) - optimizer._on_trainer_init(trainer) + def _to_lightning_optimizer(cls, optimizer, trainer, opt_idx): + # apex overrides .step function and need to be wrapped on each step + if trainer.amp_backend == AMPType.APEX: + optimizer = cls(optimizer) + optimizer._on_trainer_init(trainer) + else: + optimizer = trainer.lightning_optimizers[opt_idx] return optimizer def _accumulated_batches_reached(self): @@ -147,7 +155,7 @@ class LightningOptimizer: **kwargs ) - trainer.train_loop.on_before_zero_grad(self) + trainer.train_loop.on_before_zero_grad(optimizer) model.optimizer_zero_grad( trainer.current_epoch, diff --git a/pytorch_lightning/plugins/ddp_sequential_plugin.py b/pytorch_lightning/plugins/ddp_sequential_plugin.py index a476e30dc7..13303b02f5 100644 --- a/pytorch_lightning/plugins/ddp_sequential_plugin.py +++ b/pytorch_lightning/plugins/ddp_sequential_plugin.py @@ -15,8 +15,8 @@ import os from typing import Any, List, Optional import torch -import torch.distributed as torch_distrib from torch import nn +import torch.distributed as torch_distrib from torch.nn.parallel import DistributedDataParallel from pytorch_lightning import _logger as log @@ -28,6 +28,7 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException if _FAIRSCALE_PIPE_AVAILABLE: import fairscale.nn.model_parallel as mpu from fairscale.nn import PipeRPCWrapper + import fairscale.nn.model_parallel as mpu from fairscale.nn.pipe import balance as pipe_balance from fairscale.nn.pipe import rpc as rpc_pipe from fairscale.nn.pipe.pipeline import PipelineStyle @@ -383,7 +384,6 @@ def register_optimizers(ctx, model): model.trainer.optimizers = optimizers model.trainer.lr_schedulers = lr_schedulers model.trainer.optimizer_frequencies = optimizer_frequencies - model.trainer.convert_to_lightning_optimizers() def run_optimizer(ctx, model): diff --git a/pytorch_lightning/plugins/native_amp.py b/pytorch_lightning/plugins/native_amp.py index 4df5d12847..73bf2d8538 100644 --- a/pytorch_lightning/plugins/native_amp.py +++ b/pytorch_lightning/plugins/native_amp.py @@ -52,7 +52,10 @@ class NativeAMPPlugin(PrecisionPlugin): # unscale gradient to allow analyze within `on_after_backward` if not self.trainer.train_loop.should_accumulate() and automatic_optimization: - self.trainer.scaler.unscale_(optimizer) + if isinstance(optimizer, LightningOptimizer): + self.trainer.scaler.unscale_(optimizer.optimizer) + else: + self.trainer.scaler.unscale_(optimizer) return closure_loss diff --git a/pytorch_lightning/plugins/sharded_plugin.py b/pytorch_lightning/plugins/sharded_plugin.py index fbba43a050..53439ebc2a 100644 --- a/pytorch_lightning/plugins/sharded_plugin.py +++ b/pytorch_lightning/plugins/sharded_plugin.py @@ -63,7 +63,7 @@ class DDPShardedPlugin(DDPPlugin): optimizers = trainer.optimizers for x, optimizer in enumerate(optimizers): if is_lightning_optimizer(optimizer): - optimizer = optimizer._optimizer + optimizer = optimizer.optimizer if not isinstance(optimizer, OSS): optim_class = type(optimizer) zero_optimizer = OSS( @@ -73,7 +73,6 @@ class DDPShardedPlugin(DDPPlugin): ) optimizers[x] = zero_optimizer del optimizer - trainer.convert_to_lightning_optimizers() def get_model_from_plugin( self, diff --git a/pytorch_lightning/trainer/configuration_validator.py b/pytorch_lightning/trainer/configuration_validator.py index 80d4c8952a..12aa27279a 100644 --- a/pytorch_lightning/trainer/configuration_validator.py +++ b/pytorch_lightning/trainer/configuration_validator.py @@ -72,17 +72,7 @@ class ConfigValidator(object): trainer.overriden_optimizer_step = is_overridden('optimizer_step', model) trainer.overriden_optimizer_zero_grad = is_overridden('optimizer_zero_grad', model) - - enable_pl_optimizer = trainer._enable_pl_optimizer automatic_optimization = trainer.train_loop.automatic_optimization - if trainer.overriden_optimizer_step and not enable_pl_optimizer and automatic_optimization: - rank_zero_warn( - "When overriding `LightningModule` optimizer_step with" - " `Trainer(..., enable_pl_optimizer=False, ...)`," - " we won't be calling `.zero_grad` we can't assume when you call your `optimizer.step()`." - " For Lightning to take care of it, please use `Trainer(enable_pl_optimizer=True)`." - ) - going_to_accumulate_grad_batches = trainer.accumulation_scheduler.going_to_accumulate_grad_batches() has_overriden_optimization_functions = trainer.overriden_optimizer_step or trainer.overriden_optimizer_zero_grad @@ -93,13 +83,6 @@ class ConfigValidator(object): ' It ensures optimizer_step or optimizer_zero_grad are called on every batch.' ) - if (enable_pl_optimizer) and trainer.overriden_optimizer_zero_grad and not automatic_optimization: - raise MisconfigurationException( - 'When overriding `LightningModule` optimizer_zero_grad' - ' and preserving model property `automatic_optimization` as True with' - ' `Trainer(enable_pl_optimizer=True, ...) is not supported' - ) - def __verify_eval_loop_configuration(self, model, eval_loop_name): step_name = f'{eval_loop_name}_step' diff --git a/pytorch_lightning/trainer/connectors/optimizer_connector.py b/pytorch_lightning/trainer/connectors/optimizer_connector.py index 8c352c8e5f..8b23203e42 100644 --- a/pytorch_lightning/trainer/connectors/optimizer_connector.py +++ b/pytorch_lightning/trainer/connectors/optimizer_connector.py @@ -20,7 +20,11 @@ class OptimizerConnector: self.trainer = trainer def on_trainer_init(self, enable_pl_optimizer): - self.trainer._enable_pl_optimizer = enable_pl_optimizer + if enable_pl_optimizer is not None: + rank_zero_warn( + "Trainer argument `enable_pl_optimizer` is deprecated in v1.1.3. It will be removed in v1.3.0", + DeprecationWarning + ) self.trainer.lr_schedulers = [] self.trainer.optimizers = [] self.trainer.optimizer_frequencies = [] diff --git a/pytorch_lightning/trainer/connectors/precision_connector.py b/pytorch_lightning/trainer/connectors/precision_connector.py index 78f1635fb7..4633e328cb 100644 --- a/pytorch_lightning/trainer/connectors/precision_connector.py +++ b/pytorch_lightning/trainer/connectors/precision_connector.py @@ -67,7 +67,6 @@ class PrecisionConnector: self.trainer.amp_backend = AMPType.APEX self.backend = ApexPlugin(self.trainer) log.warn("LightningOptimizer doesn't support Apex") - self.trainer._enable_pl_optimizer = False if not self.trainer.amp_backend: raise ModuleNotFoundError( diff --git a/pytorch_lightning/trainer/optimizers.py b/pytorch_lightning/trainer/optimizers.py index 919042516a..02b694e37a 100644 --- a/pytorch_lightning/trainer/optimizers.py +++ b/pytorch_lightning/trainer/optimizers.py @@ -13,6 +13,7 @@ # limitations under the License. from abc import ABC +from collections import OrderedDict from typing import List, Optional, Tuple import torch @@ -88,8 +89,10 @@ class TrainerOptimizersMixin(ABC): optimizer._on_trainer_init(trainer) return optimizer - if self._enable_pl_optimizer: - self.optimizers = [_convert_to_lightning_optimizer(self, opt) for opt in self.optimizers] + self._lightning_optimizers = { + opt_idx: _convert_to_lightning_optimizer(self, opt) + for opt_idx, opt in enumerate(self.optimizers) + } def configure_schedulers(self, schedulers: list, monitor: Optional[str] = None): # Convert each scheduler into dict structure with relevant information diff --git a/pytorch_lightning/trainer/properties.py b/pytorch_lightning/trainer/properties.py index c32b24458c..84e2192090 100644 --- a/pytorch_lightning/trainer/properties.py +++ b/pytorch_lightning/trainer/properties.py @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import inspect -import os from abc import ABC from argparse import ArgumentParser, Namespace +import inspect +import os from typing import cast, List, Optional, Type, TypeVar, Union from pytorch_lightning.accelerators.accelerator import Accelerator @@ -66,6 +66,7 @@ class TrainerProperties(ABC): callbacks: List[Callback] num_nodes: int num_processes: int + _lightning_optimizers = None @property def log_dir(self): @@ -267,15 +268,16 @@ class TrainerProperties(ABC): def get_model(self): return self.model_connector.get_model() - def __getstate__(self): - # unwrap optimizer - self.optimizers = [opt._optimizer if is_lightning_optimizer(opt) else opt for opt in self.optimizers] - return self.__dict__ + @property + def lightning_optimizers(self): + if self._lightning_optimizers is None: + self.convert_to_lightning_optimizers() + return self._lightning_optimizers - def __setstate__(self, d): - self.__dict__ = d - # wrap optimizers in enable_pl_optimzer is True - self.convert_to_lightning_optimizers() + def __getstate__(self): + # remove lightning_optimizers + self._lightning_optimizers = None + return self.__dict__ @property def require_distributed_sampler(self): diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 02977b1c1d..f1499edb10 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -134,7 +134,7 @@ class Trainer( distributed_backend: Optional[str] = None, automatic_optimization: Optional[bool] = None, move_metrics_to_cpu: bool = False, - enable_pl_optimizer: bool = False, + enable_pl_optimizer: bool = None, # todo: remove in v1.3 multiple_trainloader_mode: str = 'max_size_cycle', ): r""" @@ -285,7 +285,8 @@ class Trainer( enable_pl_optimizer: If True, each optimizer will be wrapped by `pytorch_lightning.core.optimizer.LightningOptimizer`. It allows Lightning to - handle AMP, TPU, accumulated_gradients, etc.. + handle AMP, TPU, accumulated_gradients, etc. + .. warning:: Currently deprecated and it will be removed in v1.3 multiple_trainloader_mode: How to loop over the datasets when there are multiple train loaders. In 'max_size_cycle' mode, the trainer ends one epoch when the largest dataset is traversed, diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 8c5cfce75a..454715437e 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -499,6 +499,9 @@ class TrainLoop: 'native PyTorch amp and lbfgs are not compatible.' ' To request, please file a Github issue in PyTorch and tag @mcarilli') + # wraps into LightingOptimizer only for running step + optimizer = LightningOptimizer._to_lightning_optimizer(optimizer, self.trainer, opt_idx) + # model hook model_ref.optimizer_step( self.trainer.current_epoch, diff --git a/tests/callbacks/test_callbacks.py b/tests/callbacks/test_callbacks.py index 53d6f80d9d..b12d0c2884 100644 --- a/tests/callbacks/test_callbacks.py +++ b/tests/callbacks/test_callbacks.py @@ -33,8 +33,6 @@ def test_trainer_callback_system(torch_save): limit_train_batches=3, limit_test_batches=2, progress_bar_refresh_rate=0, - # todo: enabled since internally we wrap the model for optimizer step, this should be fixed - enable_pl_optimizer=True ) # no call yet diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py index 9e757488b2..34bc657bae 100644 --- a/tests/checkpointing/test_model_checkpoint.py +++ b/tests/checkpointing/test_model_checkpoint.py @@ -561,8 +561,7 @@ def test_checkpointing_with_nan_as_first(tmpdir, mode): @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_checkpoint_repeated_strategy(enable_pl_optimizer, tmpdir): +def test_checkpoint_repeated_strategy(tmpdir): """ This test validates that the checkpoint can be called when provided to callbacks list """ @@ -582,7 +581,6 @@ def test_checkpoint_repeated_strategy(enable_pl_optimizer, tmpdir): limit_val_batches=2, limit_test_batches=2, callbacks=[checkpoint_callback], - enable_pl_optimizer=enable_pl_optimizer, weights_summary=None, progress_bar_refresh_rate=0, ) @@ -599,7 +597,6 @@ def test_checkpoint_repeated_strategy(enable_pl_optimizer, tmpdir): limit_val_batches=2, limit_test_batches=2, resume_from_checkpoint=checkpoint_callback.best_model_path, - enable_pl_optimizer=enable_pl_optimizer, weights_summary=None, progress_bar_refresh_rate=0, ) @@ -610,8 +607,7 @@ def test_checkpoint_repeated_strategy(enable_pl_optimizer, tmpdir): @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_checkpoint_repeated_strategy_extended(enable_pl_optimizer, tmpdir): +def test_checkpoint_repeated_strategy_extended(tmpdir): """ This test validates checkpoint can be called several times without increasing internally its global step if nothing run. @@ -656,7 +652,6 @@ def test_checkpoint_repeated_strategy_extended(enable_pl_optimizer, tmpdir): limit_train_batches=limit_train_batches, limit_val_batches=3, limit_test_batches=4, - enable_pl_optimizer=enable_pl_optimizer, callbacks=[checkpoint_cb], ) trainer = pl.Trainer(**trainer_config) diff --git a/tests/checkpointing/test_torch_saving.py b/tests/checkpointing/test_torch_saving.py index a15d425f5a..513eed3594 100644 --- a/tests/checkpointing/test_torch_saving.py +++ b/tests/checkpointing/test_torch_saving.py @@ -22,15 +22,13 @@ from pytorch_lightning.core.optimizer import LightningOptimizer from tests.base import BoringModel -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_model_torch_save(tmpdir, enable_pl_optimizer): +def test_model_torch_save(tmpdir): """Test to ensure torch save does not fail for model and trainer.""" model = BoringModel() num_epochs = 1 trainer = Trainer( default_root_dir=tmpdir, max_epochs=num_epochs, - enable_pl_optimizer=enable_pl_optimizer, ) temp_path = os.path.join(tmpdir, 'temp.pt') trainer.fit(model) @@ -39,8 +37,6 @@ def test_model_torch_save(tmpdir, enable_pl_optimizer): torch.save(trainer.model, temp_path) torch.save(trainer, temp_path) trainer = torch.load(temp_path) - is_lightning_optimizer = isinstance(trainer.optimizers[0], LightningOptimizer) - assert is_lightning_optimizer if enable_pl_optimizer else not is_lightning_optimizer @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows") diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py index 9d45310a1d..9cea8cf28c 100644 --- a/tests/core/test_lightning_module.py +++ b/tests/core/test_lightning_module.py @@ -41,8 +41,7 @@ def test_automatic_optimization(tmpdir): trainer.fit(model) -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_automatic_optimization_num_calls(enable_pl_optimizer, tmpdir): +def test_automatic_optimization_num_calls(tmpdir): with patch("torch.optim.SGD.step") as sgd_step, \ patch("torch.optim.SGD.zero_grad") as sgd_zero_grad, \ @@ -71,16 +70,12 @@ def test_automatic_optimization_num_calls(enable_pl_optimizer, tmpdir): if batch_idx % 2 == 0: assert isinstance(optimizer, SGD) optimizer.step(closure=optimizer_closure) - if not enable_pl_optimizer: - optimizer.zero_grad() # update discriminator opt every 4 steps if optimizer_idx == 1: if batch_idx % 4 == 0: assert isinstance(optimizer, Adam) optimizer.step(closure=optimizer_closure) - if not enable_pl_optimizer: - optimizer.zero_grad() model = TestModel() model.training_epoch_end = None @@ -91,7 +86,6 @@ def test_automatic_optimization_num_calls(enable_pl_optimizer, tmpdir): limit_train_batches=8, limit_val_batches=1, accumulate_grad_batches=1, - enable_pl_optimizer=enable_pl_optimizer ) trainer.fit(model) @@ -102,8 +96,7 @@ def test_automatic_optimization_num_calls(enable_pl_optimizer, tmpdir): assert adam_zero_grad.call_count == 2 -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_params_groups_and_state_are_accessible(enable_pl_optimizer, tmpdir): +def test_params_groups_and_state_are_accessible(tmpdir): class TestModel(BoringModel): @@ -136,7 +129,6 @@ def test_params_groups_and_state_are_accessible(enable_pl_optimizer, tmpdir): limit_train_batches=8, limit_val_batches=1, accumulate_grad_batches=1, - enable_pl_optimizer=enable_pl_optimizer ) trainer.fit(model) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 30330729d1..d32ef1ab69 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -40,13 +40,12 @@ def test_lightning_optimizer(tmpdir): limit_val_batches=1, max_epochs=1, weights_summary=None, - enable_pl_optimizer=True, ) trainer.fit(model) groups = "{'dampening': 0, 'initial_lr': 0.1, 'lr': 0.01, 'momentum': 0, 'nesterov': False, 'weight_decay': 0}" expected = f"LightningSGD(groups=[{groups}])" - assert trainer.optimizers[0].__repr__() == expected + assert trainer._lightning_optimizers[0].__repr__() == expected def test_lightning_optimizer_from_user(tmpdir): @@ -68,13 +67,12 @@ def test_lightning_optimizer_from_user(tmpdir): limit_val_batches=1, max_epochs=1, weights_summary=None, - enable_pl_optimizer=True, ) trainer.fit(model) groups = "{'amsgrad': False, 'betas': (0.9, 0.999), 'eps': 1e-08, 'initial_lr': 0.1, 'lr': 0.01, 'weight_decay': 0}" expected = f"LightningAdam(groups=[{groups}])" - assert trainer.optimizers[0].__repr__() == expected + assert trainer._lightning_optimizers[0].__repr__() == expected @patch("torch.optim.Adam.step", autospec=True) @@ -122,7 +120,6 @@ def test_lightning_optimizer_manual_optimization(mock_sgd_step, mock_adam_step, limit_val_batches=1, max_epochs=1, weights_summary=None, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -176,7 +173,6 @@ def test_lightning_optimizer_manual_optimization_and_accumulated_gradients(mock_ max_epochs=1, weights_summary=None, accumulate_grad_batches=2, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -259,7 +255,6 @@ def test_lightning_optimizer_automatic_optimization(tmpdir): limit_val_batches=1, max_epochs=1, weights_summary=None, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -312,7 +307,6 @@ def test_lightning_optimizer_automatic_optimization_optimizer_zero_grad(tmpdir): limit_val_batches=1, max_epochs=1, weights_summary=None, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -372,7 +366,6 @@ def test_lightning_optimizer_automatic_optimization_optimizer_zero_grad_make_opt limit_val_batches=1, max_epochs=1, weights_summary=None, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -425,7 +418,6 @@ def test_lightning_optimizer_automatic_optimization_make_optimizer_step_2(tmpdir limit_val_batches=1, max_epochs=1, weights_summary=None, - enable_pl_optimizer=True, ) trainer.fit(model) diff --git a/tests/deprecated_api/test_remove_1-3.py b/tests/deprecated_api/test_remove_1-3.py index 3deb4e219f..ff442f192c 100644 --- a/tests/deprecated_api/test_remove_1-3.py +++ b/tests/deprecated_api/test_remove_1-3.py @@ -134,3 +134,8 @@ def test_v1_3_0_trainer_cli_profiler(cli_args, expected_parsed_arg, expected_pro assert getattr(args, "profiler") == expected_parsed_arg trainer = Trainer.from_argparse_args(args) assert isinstance(trainer.profiler, expected_profiler) + + +def test_trainer_enable_pl_optimizer(tmpdir): + with pytest.deprecated_call(match='will be removed in v1.3'): + Trainer(enable_pl_optimizer=True) diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py index 55d32cc662..381dada609 100644 --- a/tests/models/test_amp.py +++ b/tests/models/test_amp.py @@ -17,13 +17,13 @@ from unittest import mock import pytest import torch -import tests.base.develop_pipelines as tpipes -import tests.base.develop_utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.utilities import _APEX_AVAILABLE from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import EvalModelTemplate +import tests.base.develop_pipelines as tpipes +import tests.base.develop_utils as tutils @pytest.mark.skip(reason='dp + amp not supported currently') # TODO @@ -144,8 +144,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): assert trainer.slurm_connector.resolve_root_node_address('abc[23-24, 45-40, 40]') == 'abc23' -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_cpu_model_with_amp(enable_pl_optimizer, tmpdir): +def test_cpu_model_with_amp(tmpdir): """Make sure model trains on CPU.""" trainer_options = dict( default_root_dir=tmpdir, @@ -154,7 +153,6 @@ def test_cpu_model_with_amp(enable_pl_optimizer, tmpdir): limit_train_batches=0.4, limit_val_batches=0.4, precision=16, - enable_pl_optimizer=enable_pl_optimizer, ) model = EvalModelTemplate() diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index c39e84c21d..e107a127ab 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -11,23 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from distutils.version import LooseVersion import os import platform -from distutils.version import LooseVersion import pytest import torch -import tests.base.develop_pipelines as tpipes -import tests.base.develop_utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.callbacks import Callback, EarlyStopping, ModelCheckpoint from pytorch_lightning.trainer.states import TrainerState from tests.base import BoringModel +from tests.base import EvalModelTemplate +import tests.base.develop_pipelines as tpipes +import tests.base.develop_utils as tutils - -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): +def test_cpu_slurm_save_load(tmpdir): """Verify model save/load/checkpoint on CPU.""" model = BoringModel() @@ -43,7 +42,6 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): limit_train_batches=0.2, limit_val_batches=0.2, callbacks=[ModelCheckpoint(dirpath=tmpdir)], - enable_pl_optimizer=enable_pl_optimizer, ) trainer.fit(model) real_global_step = trainer.global_step @@ -89,7 +87,6 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): default_root_dir=tmpdir, max_epochs=1, logger=logger, - enable_pl_optimizer=enable_pl_optimizer, callbacks=[_StartCallback(), ModelCheckpoint(dirpath=tmpdir)], ) # by calling fit again, we trigger training, loading weights from the cluster @@ -97,8 +94,7 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): trainer.fit(model) -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_early_stopping_cpu_model(enable_pl_optimizer, tmpdir): +def test_early_stopping_cpu_model(tmpdir): class ModelTrainVal(BoringModel): def validation_epoch_end(self, outputs) -> None: val_loss = torch.stack([x["x"] for x in outputs]).mean() @@ -111,7 +107,6 @@ def test_early_stopping_cpu_model(enable_pl_optimizer, tmpdir): gradient_clip_val=1.0, overfit_batches=0.20, track_grad_norm=2, - enable_pl_optimizer=enable_pl_optimizer, progress_bar_refresh_rate=0, accumulate_grad_batches=2, limit_train_batches=0.1, @@ -131,8 +126,7 @@ def test_early_stopping_cpu_model(enable_pl_optimizer, tmpdir): @pytest.mark.skipif((platform.system() == "Darwin" and LooseVersion(torch.__version__) < LooseVersion("1.3.0")), reason="Distributed training is not supported on MacOS before Torch 1.3.0") -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_multi_cpu_model_ddp(enable_pl_optimizer, tmpdir): +def test_multi_cpu_model_ddp(tmpdir): """Make sure DDP works.""" tutils.set_random_master_port() @@ -145,7 +139,6 @@ def test_multi_cpu_model_ddp(enable_pl_optimizer, tmpdir): gpus=None, num_processes=2, accelerator='ddp_cpu', - enable_pl_optimizer=enable_pl_optimizer, ) model = BoringModel() @@ -300,6 +293,25 @@ def test_cpu_model(tmpdir): progress_bar_refresh_rate=0, max_epochs=1, limit_train_batches=0.4, + limit_val_batches=0.4 + ) + + model = EvalModelTemplate() + + tpipes.run_model_test(trainer_options, model, on_gpu=False) + + +def test_all_features_cpu_model(tmpdir): + """Test each of the trainer options.""" + trainer_options = dict( + default_root_dir=tmpdir, + gradient_clip_val=1.0, + overfit_batches=0.20, + track_grad_norm=2, + progress_bar_refresh_rate=0, + accumulate_grad_batches=2, + max_epochs=1, + limit_train_batches=0.4, limit_val_batches=0.4, ) diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index 7ac7cd235f..b8fc6d3809 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -20,11 +20,9 @@ import sys import numpy as np import pytest -import torch from sklearn.metrics import accuracy_score +import torch -import tests.base.develop_pipelines as tpipes -import tests.base.develop_utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.accelerators.horovod_accelerator import HorovodAccelerator from pytorch_lightning.metrics.classification.accuracy import Accuracy @@ -32,6 +30,8 @@ from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.utilities import _APEX_AVAILABLE, _HOROVOD_AVAILABLE, _NATIVE_AMP_AVAILABLE from tests.base import EvalModelTemplate from tests.base.boring_model import BoringModel +import tests.base.develop_pipelines as tpipes +import tests.base.develop_utils as tutils from tests.base.models import BasicGAN if _HOROVOD_AVAILABLE: @@ -69,8 +69,7 @@ def _run_horovod(trainer_options, on_gpu=False): @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_horovod_cpu(enable_pl_optimizer, tmpdir): +def test_horovod_cpu(tmpdir): """Test Horovod running multi-process on CPU.""" trainer_options = dict( default_root_dir=str(tmpdir), @@ -82,14 +81,12 @@ def test_horovod_cpu(enable_pl_optimizer, tmpdir): limit_val_batches=0.2, accelerator='horovod', deterministic=True, - enable_pl_optimizer=enable_pl_optimizer, ) _run_horovod(trainer_options) @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_horovod_cpu_implicit(enable_pl_optimizer, tmpdir): +def test_horovod_cpu_implicit(tmpdir): """Test Horovod without specifying a backend, inferring from env set by `horovodrun`.""" trainer_options = dict( default_root_dir=str(tmpdir), @@ -100,7 +97,6 @@ def test_horovod_cpu_implicit(enable_pl_optimizer, tmpdir): limit_train_batches=0.4, limit_val_batches=0.2, deterministic=True, - enable_pl_optimizer=enable_pl_optimizer, ) _run_horovod(trainer_options) @@ -206,8 +202,7 @@ def test_horovod_transfer_batch_to_gpu(tmpdir): @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_horovod_multi_optimizer(enable_pl_optimizer, tmpdir): +def test_horovod_multi_optimizer(tmpdir): model = BasicGAN(**EvalModelTemplate.get_default_hparams()) # fit model @@ -219,7 +214,6 @@ def test_horovod_multi_optimizer(enable_pl_optimizer, tmpdir): limit_val_batches=0.2, deterministic=True, accelerator='horovod', - enable_pl_optimizer=enable_pl_optimizer, ) trainer.fit(model) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" @@ -241,8 +235,7 @@ def test_horovod_multi_optimizer(enable_pl_optimizer, tmpdir): @pytest.mark.skipif(not _HOROVOD_AVAILABLE, reason="Horovod is unavailable") @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_result_reduce_horovod(enable_pl_optimizer, tmpdir): +def test_result_reduce_horovod(tmpdir): """Make sure result logging works with Horovod. This test mirrors tests/core/test_results.py::_ddp_test_fn @@ -282,7 +275,6 @@ def test_result_reduce_horovod(enable_pl_optimizer, tmpdir): max_epochs=1, log_every_n_steps=1, weights_summary=None, - enable_pl_optimizer=enable_pl_optimizer, ) trainer.fit(model) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index e29afe8e66..2b4ec87980 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -11,22 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from copy import deepcopy import glob import logging as log import os import pickle -from copy import deepcopy import cloudpickle import pytest import torch +from pytorch_lightning import Callback, LightningModule, seed_everything, Trainer +from pytorch_lightning.callbacks import ModelCheckpoint import tests.base.develop_pipelines as tpipes import tests.base.develop_utils as tutils -from pytorch_lightning import Callback, Trainer -from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.trainer.states import TrainerState -from tests.base import BoringModel, EvalModelTemplate, GenericEvalModelTemplate +from tests.base import BoringModel, EvalModelTemplate, GenericEvalModelTemplate, TrialMNIST class ModelTrainerPropertyParity(Callback): @@ -51,8 +51,7 @@ class ModelTrainerPropertyParity(Callback): self._check_properties(trainer, pl_module) -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_model_properties_resume_from_checkpoint(enable_pl_optimizer, tmpdir): +def test_model_properties_resume_from_checkpoint(tmpdir): """ Test that properties like `current_epoch` and `global_step` in model and trainer are always the same. """ model = EvalModelTemplate() @@ -61,7 +60,6 @@ def test_model_properties_resume_from_checkpoint(enable_pl_optimizer, tmpdir): default_root_dir=tmpdir, max_epochs=1, logger=False, - enable_pl_optimizer=enable_pl_optimizer, callbacks=[checkpoint_callback, ModelTrainerPropertyParity()], # this performs the assertions ) trainer = Trainer(**trainer_args) @@ -98,8 +96,7 @@ class CaptureCallbacksBeforeTraining(Callback): self.callbacks = deepcopy(trainer.callbacks) -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_callbacks_state_resume_from_checkpoint(enable_pl_optimizer, tmpdir): +def test_callbacks_state_resume_from_checkpoint(tmpdir): """ Test that resuming from a checkpoint restores callbacks that persist state. """ model = EvalModelTemplate() callback_capture = CaptureCallbacksBeforeTraining() @@ -110,7 +107,6 @@ def test_callbacks_state_resume_from_checkpoint(enable_pl_optimizer, tmpdir): default_root_dir=tmpdir, max_steps=1, logger=False, - enable_pl_optimizer=enable_pl_optimizer, callbacks=[ checkpoint, callback_capture, @@ -137,11 +133,10 @@ def test_callbacks_state_resume_from_checkpoint(enable_pl_optimizer, tmpdir): assert before.best_model_score == after.best_model_score -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_callbacks_references_resume_from_checkpoint(enable_pl_optimizer, tmpdir): +def test_callbacks_references_resume_from_checkpoint(tmpdir): """ Test that resuming from a checkpoint sets references as expected. """ model = EvalModelTemplate() - args = {'default_root_dir': tmpdir, 'max_steps': 1, 'logger': False, "enable_pl_optimizer": enable_pl_optimizer} + args = {'default_root_dir': tmpdir, 'max_steps': 1, 'logger': False} # initial training checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="early_stop_on", save_last=True) diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py index 8f2f2126b3..02814ad4dc 100644 --- a/tests/trainer/optimization/test_manual_optimization.py +++ b/tests/trainer/optimization/test_manual_optimization.py @@ -457,7 +457,6 @@ def test_manual_optimization_and_return_tensor(tmpdir): amp_backend='native', accelerator="ddp_spawn", gpus=2, - enable_pl_optimizer=True ) trainer.fit(model) @@ -576,7 +575,6 @@ def test_manual_optimization_and_accumulated_gradient(tmpdir): amp_backend='native', accumulate_grad_batches=4, gpus=1, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -651,7 +649,6 @@ def test_multiple_optimizers_step(tmpdir): precision=16, amp_backend='native', gpus=1, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -733,7 +730,6 @@ def test_step_with_optimizer_closure(tmpdir): limit_val_batches=2, max_epochs=1, log_every_n_steps=1, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -798,7 +794,6 @@ def test_step_with_optimizer_closure_and_accumulated_grad(tmpdir): max_epochs=1, log_every_n_steps=1, accumulate_grad_batches=2, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -854,7 +849,6 @@ def test_step_with_optimizer_closure_and_extra_arguments(step_mock, tmpdir): max_epochs=1, log_every_n_steps=1, accumulate_grad_batches=2, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -932,7 +926,6 @@ def test_step_with_optimizer_closure_with_different_frequencies(mock_sgd_step, m max_epochs=1, log_every_n_steps=1, accumulate_grad_batches=2, - enable_pl_optimizer=True, ) trainer.fit(model) @@ -1041,7 +1034,6 @@ def test_step_with_optimizer_closure_with_different_frequencies_ddp(mock_sgd_ste max_epochs=1, log_every_n_steps=1, accumulate_grad_batches=2, - enable_pl_optimizer=True, gpus=2, accelerator="ddp", ) @@ -1052,35 +1044,3 @@ def test_step_with_optimizer_closure_with_different_frequencies_ddp(mock_sgd_ste expected_calls = [call(closure=ANY, optim='adam')] * 2 mock_adam_step.assert_has_calls(expected_calls) - - -def test_step_with_misconfiguraiton_error_when_overriding_optimizer_zero_grad(tmpdir): - """ - Tests that `optimizer_zero_grad` in manual_optimization triggers a MisconfigurationException - """ - try: - class TestModel(BoringModel): - - def optimizer_zero_grad(self, *_): - pass - - @property - def automatic_optimization(self) -> bool: - return False - - model = TestModel() - model.val_dataloader = None - model.training_epoch_end = None - - limit_train_batches = 8 - Trainer( - default_root_dir=tmpdir, - limit_train_batches=limit_train_batches, - limit_val_batches=2, - max_epochs=1, - log_every_n_steps=1, - accumulate_grad_batches=2, - enable_pl_optimizer=True, - ) - except MisconfigurationException as ex: - assert "`Trainer(enable_pl_optimizer=True, ...) is not supported" in str(ex) diff --git a/tests/trainer/optimization/test_optimizers.py b/tests/trainer/optimization/test_optimizers.py index 8e4e7abf9c..dacdc98848 100644 --- a/tests/trainer/optimization/test_optimizers.py +++ b/tests/trainer/optimization/test_optimizers.py @@ -181,9 +181,8 @@ def test_reducelronplateau_scheduling(tmpdir): ), 'lr scheduler was not correctly converted to dict' -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_optimizer_return_options(enable_pl_optimizer): - trainer = Trainer(enable_pl_optimizer=enable_pl_optimizer) +def test_optimizer_return_options(): + trainer = Trainer() model = EvalModelTemplate() # single optimizer diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index b9723878ad..13ea10c42d 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -11,22 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import math -import os -import pickle -import sys from argparse import Namespace from copy import deepcopy -from distutils.version import LooseVersion +import math +import os from pathlib import Path +import pickle +import sys from unittest.mock import ANY, call, patch import cloudpickle +from omegaconf import OmegaConf import pytest import torch -from omegaconf import OmegaConf -import tests.base.develop_utils as tutils from pytorch_lightning import Callback, LightningModule, Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml, save_hparams_to_tags_csv @@ -38,6 +36,7 @@ from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import BoringModel, EvalModelTemplate +import tests.base.develop_utils as tutils @pytest.fixture @@ -504,16 +503,13 @@ def test_model_checkpoint_only_weights(tmpdir): def test_model_freeze_unfreeze(): - model = EvalModelTemplate() - model.freeze() model.unfreeze() -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) @pytest.mark.parametrize("url_ckpt", [True, False]) -def test_resume_from_checkpoint_epoch_restored(monkeypatch, tmpdir, tmpdir_server, url_ckpt, enable_pl_optimizer): +def test_resume_from_checkpoint_epoch_restored(monkeypatch, tmpdir, tmpdir_server, url_ckpt): """Verify resuming from checkpoint runs the right number of epochs""" # set $TORCH_HOME, which determines torch hub's cache path, to tmpdir monkeypatch.setenv("TORCH_HOME", tmpdir) @@ -541,7 +537,6 @@ def test_resume_from_checkpoint_epoch_restored(monkeypatch, tmpdir, tmpdir_serve callbacks=[ModelCheckpoint(dirpath=tmpdir, monitor='early_stop_on', save_top_k=-1)], default_root_dir=tmpdir, val_check_interval=1.0, - enable_pl_optimizer=enable_pl_optimizer, progress_bar_refresh_rate=0, logger=False, weights_summary=None,