lightning/pytorch_lightning/accelerators/cpu_accelerator.py

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Optional, Union

import torch

from pytorch_lightning.accelerators.accelerator import Accelerator, ReduceOp
from pytorch_lightning.utilities import AMPType, rank_zero_warn
from pytorch_lightning.utilities.exceptions import MisconfigurationException


class CPUAccelerator(Accelerator):

    def __init__(self, trainer, cluster_environment=None):
        """
        Runs training on CPU

        Example::

            # default
            trainer = Trainer(accelerator=CPUAccelerator())

        """
        super().__init__(trainer, cluster_environment)
        self.nickname = None

    def setup(self, model):
        # run through amp wrapper
        if self.trainer.amp_backend:
            raise MisconfigurationException('amp + cpu is not supported.  Please use a GPU option')

        # call setup after the ddp process has connected
        self.trainer.call_setup_hook(model)

        # CHOOSE OPTIMIZER
        # allow for lr schedulers as well
        self.setup_optimizers(model)

        self.trainer.convert_to_lightning_optimizers()

        self.trainer.model = model

    def train(self):
        model = self.trainer.model

        # set up training routine
        self.trainer.train_loop.setup_training(model)

        # train or test
        results = self.train_or_test()
        return results

    def training_step(self, args):
        if self.trainer.amp_backend == AMPType.NATIVE:
            with torch.cuda.amp.autocast():
                output = self.trainer.model.training_step(*args)
        else:
            output = self.trainer.model.training_step(*args)
        return output

    def validation_step(self, args):
        if self.trainer.amp_backend == AMPType.NATIVE:
            with torch.cuda.amp.autocast():
                output = self.trainer.model.validation_step(*args)
        else:
            output = self.trainer.model.validation_step(*args)
        return output

    def test_step(self, args):
        if self.trainer.amp_backend == AMPType.NATIVE:
            with torch.cuda.amp.autocast():
                output = self.trainer.model.test_step(*args)
        else:
            output = self.trainer.model.test_step(*args)
        return output

    def sync_tensor(self,
                    tensor: Union[torch.Tensor],
                    group: Optional[Any] = None,
                    reduce_op: Optional[Union[ReduceOp, str]] = None) -> torch.Tensor:
        return tensor
cpu backend (#2712) * cpu backend * cpu backend * cpu backend 2020-07-26 02:55:09 +00:00			`# Copyright The PyTorch Lightning team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
optimizer clean up (#4658) * add LightningOptimizer * typo * add mock closure * typo * remove logic in optimizer_step * update * update * update * desactivate LightningOptimizer for hovorod * resolve flake * typo * check optimizer name * change name * added backward to LightningOptimizer * remove use_lightning_optimizer * move update * simplify init * resolve comments * resolve bug * update * update * resolve bugs * resolve flake8 * set state * work manual_optimizer_step * add doc * add enable_pl_optimizer * make optimizer_step * add make_optimizer_step * add examples * resolve test * add test_optimizer_return_options_enable_pl_optimizer * add enable_pl_optimizer=True * update * update tests * resolve bugs * update * set Trainer to False * update * resolve bugs * update * remove from doc * resolve bug * typo * update * set to True * simplification * typo * resolve horovod * unwrap horovod * remove Optimizer * resolve horovod * move logic to amp_backend * doesn't seem to be pickable * update * add again * resolve some bugs * cleanup * resolve bug with AMP * change __repr__ * round at -12 * udpate * update * update * remove from horovod * typo * add convert_to_lightning_optimizers in each accelerators * typo * forgot * forgot a convert_to_lightning_optimizers * update * update * update * increase coverage * update * resolve flake8 * update * remove useless code * resolve comments + add support for LightningOptimizer base class * resolve flake * check optimizer get wrapped back * resolve DDPSharded * reduce code * lightningoptimizer * Update pytorch_lightning/core/optimizer.py Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> * Update pytorch_lightning/core/lightning.py * remove reference to step function * Apply suggestions from code review * update on comments * resolve * Update CHANGELOG.md * add back training_step in apex and native_amp * rename optimizer_step Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu> Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> 2020-12-01 00:09:46 +00:00			`from typing import Any, Optional, Union`
Prevent crash if sync_dist=True on CPU (#4626) * Added test/fix for sync_dist raising NotImplementedError * Fixed comments/formatting * Revert base class change, enforce sync tensors across accelerators, added GPU test 2020-11-11 22:04:05 +00:00
training amp scaling refactor (#3135) 2020-08-24 23:59:46 +00:00			`import torch`
disable optimizers setup during testing (#3059) * disable configure_optimizers during testing * minor changes * hvd and ddp * fix precision during testing * fix ddp * fix amp * fix cpu * update dp * simplify optimizers * add test * codefactor * ref optimizer setup * chlog * suggestions * isort * rebased with master 2020-09-28 23:09:04 +00:00
Prevent crash if sync_dist=True on CPU (#4626) * Added test/fix for sync_dist raising NotImplementedError * Fixed comments/formatting * Revert base class change, enforce sync tensors across accelerators, added GPU test 2020-11-11 22:04:05 +00:00			`from pytorch_lightning.accelerators.accelerator import Accelerator, ReduceOp`
ref: inner train loop (intermediate step) 1/n (#3359) 2020-09-05 12:55:22 +00:00			`from pytorch_lightning.utilities import AMPType, rank_zero_warn`
disable optimizers setup during testing (#3059) * disable configure_optimizers during testing * minor changes * hvd and ddp * fix precision during testing * fix ddp * fix amp * fix cpu * update dp * simplify optimizers * add test * codefactor * ref optimizer setup * chlog * suggestions * isort * rebased with master 2020-09-28 23:09:04 +00:00			`from pytorch_lightning.utilities.exceptions import MisconfigurationException`
cpu backend (#2712) * cpu backend * cpu backend * cpu backend 2020-07-26 02:55:09 +00:00

ref: accelerator names (#4066) * ref: accelerator names * docs 2020-10-11 05:05:14 +00:00			`class CPUAccelerator(Accelerator):`
cpu backend (#2712) * cpu backend * cpu backend * cpu backend 2020-07-26 02:55:09 +00:00
ref: adding compute environments (2/n) (#3842) * ref: adding compute environments (2/n) * ref: adding compute environments (2/n) * ref: adding compute environments (2/n) * ref: adding compute environments (2/n) 2020-10-04 12:48:46 +00:00			`def __init__(self, trainer, cluster_environment=None):`
Accelerator docs (#4583) * accelerator docs * accelerator docs 2020-11-08 22:24:41 +00:00			`"""`
			`Runs training on CPU`

			`Example::`

			`# default`
			`trainer = Trainer(accelerator=CPUAccelerator())`

			`"""`
ref: adding compute environments (2/n) (#3842) * ref: adding compute environments (2/n) * ref: adding compute environments (2/n) * ref: adding compute environments (2/n) * ref: adding compute environments (2/n) 2020-10-04 12:48:46 +00:00			`super().__init__(trainer, cluster_environment)`
enable passing in custom accelerators (#4050) * enable custom accelerators * ref: finish decoupling apex, LM and backward * ref: finish decoupling apex, LM and backward * ref: finish decoupling apex, LM and backward 2020-10-10 13:21:08 +00:00			`self.nickname = None`
cpu backend (#2712) * cpu backend * cpu backend * cpu backend 2020-07-26 02:55:09 +00:00
			`def setup(self, model):`
			`# run through amp wrapper`
add apex test (#2921) * add apex test * rename * level * events * wrap * evt * miss * apex * apex * apex * apex * apex * apex * Update tests/models/test_amp.py Co-authored-by: William Falcon <waf2107@columbia.edu> * notes * notes Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-08-13 14:03:13 +00:00			`if self.trainer.amp_backend:`
cpu backend (#2712) * cpu backend * cpu backend * cpu backend 2020-07-26 02:55:09 +00:00			`raise MisconfigurationException('amp + cpu is not supported. Please use a GPU option')`

			`# call setup after the ddp process has connected`
Call DataModule hooks implicitly in trainer (#2755) * :sparkles: call dm hooks in trainer implicitly * :white_check_mark: update tests * :pencil: remove unused stage arg from dm docs * :white_check_mark: update tests * :white_check_mark: update tests * :construction: include stage in datamodule.setup * :pencil: docs * :pencil: docs * added more dm tests * added more dm tests * :bug: call dm.setup everywhere * :fire: pickle tests now implied by accelerator tests * :art: set dm as attr of trainer * :bug: . * :construction: wip * add can prepare test * add can prepare test * verified setup in fit * fixed setup call * fixed setup call * fixed setup call Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-08-02 00:17:57 +00:00			`self.trainer.call_setup_hook(model)`
cpu backend (#2712) * cpu backend * cpu backend * cpu backend 2020-07-26 02:55:09 +00:00
			`# CHOOSE OPTIMIZER`
			`# allow for lr schedulers as well`
disable optimizers setup during testing (#3059) * disable configure_optimizers during testing * minor changes * hvd and ddp * fix precision during testing * fix ddp * fix amp * fix cpu * update dp * simplify optimizers * add test * codefactor * ref optimizer setup * chlog * suggestions * isort * rebased with master 2020-09-28 23:09:04 +00:00			`self.setup_optimizers(model)`

optimizer clean up (#4658) * add LightningOptimizer * typo * add mock closure * typo * remove logic in optimizer_step * update * update * update * desactivate LightningOptimizer for hovorod * resolve flake * typo * check optimizer name * change name * added backward to LightningOptimizer * remove use_lightning_optimizer * move update * simplify init * resolve comments * resolve bug * update * update * resolve bugs * resolve flake8 * set state * work manual_optimizer_step * add doc * add enable_pl_optimizer * make optimizer_step * add make_optimizer_step * add examples * resolve test * add test_optimizer_return_options_enable_pl_optimizer * add enable_pl_optimizer=True * update * update tests * resolve bugs * update * set Trainer to False * update * resolve bugs * update * remove from doc * resolve bug * typo * update * set to True * simplification * typo * resolve horovod * unwrap horovod * remove Optimizer * resolve horovod * move logic to amp_backend * doesn't seem to be pickable * update * add again * resolve some bugs * cleanup * resolve bug with AMP * change __repr__ * round at -12 * udpate * update * update * remove from horovod * typo * add convert_to_lightning_optimizers in each accelerators * typo * forgot * forgot a convert_to_lightning_optimizers * update * update * update * increase coverage * update * resolve flake8 * update * remove useless code * resolve comments + add support for LightningOptimizer base class * resolve flake * check optimizer get wrapped back * resolve DDPSharded * reduce code * lightningoptimizer * Update pytorch_lightning/core/optimizer.py Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> * Update pytorch_lightning/core/lightning.py * remove reference to step function * Apply suggestions from code review * update on comments * resolve * Update CHANGELOG.md * add back training_step in apex and native_amp * rename optimizer_step Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu> Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> 2020-12-01 00:09:46 +00:00			`self.trainer.convert_to_lightning_optimizers()`

ref: ddp backend refactor (3) (#3208) * ddp backend refactor * ddp backend refactor 2020-08-27 00:03:09 +00:00			`self.trainer.model = model`
cpu backend (#2712) * cpu backend * cpu backend * cpu backend 2020-07-26 02:55:09 +00:00
ref: ddp backend refactor (3) (#3208) * ddp backend refactor * ddp backend refactor 2020-08-27 00:03:09 +00:00			`def train(self):`
			`model = self.trainer.model`
ref: move train outside of setup training (#3297) * ref: move train outside of setup training * ref: move train outside of setup training * ref: move train outside of setup training * ref: move train outside of setup training 2020-09-01 00:36:52 +00:00
			`# set up training routine`
ref: organize args 4/n (#3456) 2020-09-11 01:58:47 +00:00			`self.trainer.train_loop.setup_training(model)`
ref: move train outside of setup training (#3297) * ref: move train outside of setup training * ref: move train outside of setup training * ref: move train outside of setup training * ref: move train outside of setup training 2020-09-01 00:36:52 +00:00
			`# train or test`
ref: organize args 4/n (#3456) 2020-09-11 01:58:47 +00:00			`results = self.train_or_test()`
cpu backend (#2712) * cpu backend * cpu backend * cpu backend 2020-07-26 02:55:09 +00:00			`return results`
ref: remove obscure forward call in eval + CPU backend ___step (#3123) * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval 2020-08-24 16:31:40 +00:00
			`def training_step(self, args):`
training amp scaling refactor (#3135) 2020-08-24 23:59:46 +00:00			`if self.trainer.amp_backend == AMPType.NATIVE:`
			`with torch.cuda.amp.autocast():`
			`output = self.trainer.model.training_step(*args)`
			`else:`
			`output = self.trainer.model.training_step(*args)`
			`return output`
ref: remove obscure forward call in eval + CPU backend ___step (#3123) * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval 2020-08-24 16:31:40 +00:00
			`def validation_step(self, args):`
eval step scaling factor (#3136) 2020-08-25 00:26:39 +00:00			`if self.trainer.amp_backend == AMPType.NATIVE:`
			`with torch.cuda.amp.autocast():`
			`output = self.trainer.model.validation_step(*args)`
			`else:`
			`output = self.trainer.model.validation_step(*args)`
			`return output`
ref: remove obscure forward call in eval + CPU backend ___step (#3123) * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval * remove obscure forward call in eval 2020-08-24 16:31:40 +00:00
			`def test_step(self, args):`
eval step scaling factor (#3136) 2020-08-25 00:26:39 +00:00			`if self.trainer.amp_backend == AMPType.NATIVE:`
			`with torch.cuda.amp.autocast():`
			`output = self.trainer.model.test_step(*args)`
			`else:`
			`output = self.trainer.model.test_step(*args)`
			`return output`
Prevent crash if sync_dist=True on CPU (#4626) * Added test/fix for sync_dist raising NotImplementedError * Fixed comments/formatting * Revert base class change, enforce sync tensors across accelerators, added GPU test 2020-11-11 22:04:05 +00:00
			`def sync_tensor(self,`
			`tensor: Union[torch.Tensor],`
			`group: Optional[Any] = None,`
			`reduce_op: Optional[Union[ReduceOp, str]] = None) -> torch.Tensor:`
			`return tensor`