lightning/pytorch_lightning/trainer/trainer.py

1473 lines
56 KiB
Python
Raw Normal View History

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
2019-07-09 00:11:20 +00:00
import os
import warnings
from argparse import ArgumentParser, Namespace
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
2019-07-09 00:11:20 +00:00
2019-03-31 01:45:16 +00:00
import torch
import torch.distributed as torch_distrib
from torch.utils.data import DataLoader
2019-07-09 00:11:20 +00:00
from pytorch_lightning.callbacks import Callback, EarlyStopping, ModelCheckpoint
from pytorch_lightning.core.datamodule import LightningDataModule
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
from pytorch_lightning.core.lightning import LightningModule
from pytorch_lightning.core.memory import ModelSummary
from pytorch_lightning.core.step_result import EvalResult
from pytorch_lightning.loggers import LightningLoggerBase
from pytorch_lightning.profiler import BaseProfiler, PassThroughProfiler, SimpleProfiler
from pytorch_lightning.trainer.auto_mix_precision import TrainerAMPMixin
from pytorch_lightning.trainer.callback_config import TrainerCallbackConfigMixin
from pytorch_lightning.trainer.callback_hook import TrainerCallbackHookMixin
from pytorch_lightning.trainer.configuration_validator import ConfigValidator
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
from pytorch_lightning.trainer.deprecated_api import TrainerDeprecatedAPITillVer0_10
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
from pytorch_lightning.trainer.distrib_data_parallel import TrainerDDPMixin
from pytorch_lightning.trainer.distrib_parts import (TrainerDPMixin, _parse_gpu_ids, _parse_tpu_cores,
determine_root_gpu_device, pick_multiple_gpus)
from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin
from pytorch_lightning.trainer.logging import TrainerLoggingMixin
from pytorch_lightning.trainer.lr_finder import TrainerLRFinderMixin
from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin
from pytorch_lightning.trainer.optimizers import TrainerOptimizersMixin
from pytorch_lightning.trainer.states import TrainerState, trainer_state
Added accumulation of loggers' metrics for the same steps (#1278) * `add_argparse_args` method fixed (argument types added) * autopep8 fixes * --gpus=0 removed from test (for ci tests) * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * test_with_accumulate_grad_batches added * agg_and_log_metrics logic added to the base logger class * small format fix * agg metrics strategies removed (not to complicate stuff) * agg metrics: handle zero step * autopep8 * changelog upd * flake fix * metrics aggregators factored out, metrics_agg.py added + tests * metrics agg default value added * Update pytorch_lightning/loggers/metrics_agg.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * metrics aggregators factored out, metrics_agg.py added + tests * metrics agg default value added * Update pytorch_lightning/loggers/metrics_agg.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * remove .item which causes sync issues (#1254) * remove .item which causes sync issues * fixed gradient acc sched * fixed gradient acc sched * test_metrics_agg.py removed (all tested in doctrings), agg metrics refactored * test_metrics_agg.py removed (all tested in doctrings), agg metrics refactored * autopep8 * loggers base.py types fixed * test * test * metrics aggregation for loggers: each key now has a specific function (or default one) * metrics aggregation for loggers: each key now has a specific function (or default one) * docstrings upd * manual typehints removed from docstrings * batch_size decreased for test `test_with_accumulate_grad_batches` * extend running accum * refactor * fix tests * fix tests * allowed_types generator scoped * trainer.py distutils was imported twice, fixed * TensorRunningAccum refactored * TensorRunningAccum added to change log (Changed) * change log pull link added Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu> Co-authored-by: J. Borovec <jirka.borovec@seznam.cz>
2020-04-08 12:35:47 +00:00
from pytorch_lightning.trainer.supporters import TensorRunningAccum
from pytorch_lightning.trainer.training_io import TrainerIOMixin
from pytorch_lightning.trainer.training_loop import TrainerTrainLoopMixin
from pytorch_lightning.trainer.training_tricks import TrainerTrainingTricksMixin
from pytorch_lightning.utilities import parsing, rank_zero_info, rank_zero_only, rank_zero_warn, AMPType
Structured results (train loop only. val loop separate PR) (PR 2/5) (#2615) * r * r * r * patched optimizer closure with sr * patched optimizer closure with sr * patched optimizer closure with sr * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added autoreduce for train step * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added hooks * added hooks * added hooks * added hooks * added hooks * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * cache * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py * Update pytorch_lightning/core/step_result.py * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Apply suggestions from code review Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * simple * finished tests for structured results on train epoch * simple * simple * revert * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update tests/base/deterministic_model.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * finished tests for structured results on train epoch * docstring typos * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update pytorch_lightning/core/step_result.py Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Update pytorch_lightning/overrides/data_parallel.py Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
2020-07-20 23:00:20 +00:00
from pytorch_lightning.utilities.debugging import InternalDebugger
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.cloud_io import get_filesystem
from pytorch_lightning.trainer.evaluate_loop import EvaluationLoop
from pytorch_lightning.trainer.data_connector import DataConnector
from pytorch_lightning.accelerators.accelerator_connector import AcceleratorConnector
from pytorch_lightning.trainer.training_loop_temp import TrainLoop
from pytorch_lightning import _logger as log
from pytorch_lightning.utilities.model_utils import is_overridden
# warnings to ignore in trainer
warnings.filterwarnings(
'ignore', message='torch.distributed.reduce_op is deprecated, ' 'please use torch.distributed.ReduceOp instead'
)
2019-05-14 00:40:07 +00:00
try:
from apex import amp
2019-08-05 21:28:04 +00:00
except ImportError:
amp = None
2019-03-31 01:45:16 +00:00
Enable TPU support (#868) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * fix test pkg create (#873) * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Luis Capelo <luiscape@gmail.com> * Fix segmentation example (#876) * removed torchvision model and added custom model * minor fix * Fixed relative imports issue * Fix/typo (#880) * Update greetings.yml * Update greetings.yml * Changelog (#869) * Create CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Update PULL_REQUEST_TEMPLATE.md * Add PR links to Version 0.6.0 in CHANGELOG.md * Add PR links for Unreleased in CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Fixing Function Signatures (#871) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Luis Capelo <luiscape@gmail.com> Co-authored-by: Akshay Kulkarni <akshayk.vnit@gmail.com> Co-authored-by: Ethan Harris <ewah1g13@soton.ac.uk> Co-authored-by: Shikhar Chauhan <xssChauhan@users.noreply.github.com>
2020-02-17 21:01:20 +00:00
try:
import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp
except ImportError:
XLA_AVAILABLE = False
else:
XLA_AVAILABLE = True
Enable TPU support (#868) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * fix test pkg create (#873) * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Luis Capelo <luiscape@gmail.com> * Fix segmentation example (#876) * removed torchvision model and added custom model * minor fix * Fixed relative imports issue * Fix/typo (#880) * Update greetings.yml * Update greetings.yml * Changelog (#869) * Create CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Update PULL_REQUEST_TEMPLATE.md * Add PR links to Version 0.6.0 in CHANGELOG.md * Add PR links for Unreleased in CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Fixing Function Signatures (#871) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Luis Capelo <luiscape@gmail.com> Co-authored-by: Akshay Kulkarni <akshayk.vnit@gmail.com> Co-authored-by: Ethan Harris <ewah1g13@soton.ac.uk> Co-authored-by: Shikhar Chauhan <xssChauhan@users.noreply.github.com>
2020-02-17 21:01:20 +00:00
try:
import horovod.torch as hvd
except (ModuleNotFoundError, ImportError):
HOROVOD_AVAILABLE = False
else:
HOROVOD_AVAILABLE = True
2019-07-09 00:12:27 +00:00
class Trainer(
TrainerIOMixin,
TrainerCallbackHookMixin,
TrainerModelHooksMixin,
TrainerOptimizersMixin,
TrainerAMPMixin,
TrainerDPMixin,
TrainerDDPMixin,
TrainerLoggingMixin,
TrainerTrainingTricksMixin,
TrainerDataLoadingMixin,
TrainerEvaluationLoopMixin,
TrainerTrainLoopMixin,
TrainerCallbackConfigMixin,
TrainerLRFinderMixin,
TrainerDeprecatedAPITillVer0_10,
):
"""
Example:
>>> import torch
>>> from torch.nn import functional as F
>>> from torch.utils.data import Dataset, DataLoader
>>> # Define model
>>> class SimpleModel(LightningModule):
... def __init__(self):
... super().__init__()
... self.l1 = torch.nn.Linear(in_features=64, out_features=4)
...
... def forward(self, x):
... return torch.relu(self.l1(x.view(x.size(0), -1)))
...
... def training_step(self, batch, batch_nb):
... x, y = batch
... loss = F.cross_entropy(self(x), y)
... return {'loss': loss, 'log': {'train_loss': loss}}
...
... def test_step(self, batch, batch_nb):
... x, y = batch
... loss = F.cross_entropy(self(x), y)
2020-07-20 18:13:56 +00:00
... return {'loss': loss, 'log': {'test_loss': loss}}
...
... def configure_optimizers(self):
... return torch.optim.Adam(self.parameters(), lr=0.02)
...
>>> # Define dataset
>>> class SimpleDataset(Dataset):
... def __init__(self, num_samples=200):
... self.input_seq = torch.randn(num_samples, 64)
... self.output_seq = torch.randint(0, 4, (num_samples,))
...
... def __len__(self):
... return len(self.input_seq)
...
... def __getitem__(self, item):
... return self.input_seq[item], self.output_seq[item]
...
>>> train_loader = DataLoader(SimpleDataset(), batch_size=8)
>>> model = SimpleModel()
>>> # Define Trainer and fit model
>>> trainer = Trainer(max_epochs=1, progress_bar_refresh_rate=0)
>>> trainer.fit(model, train_loader)
1
>>> test_outputs = trainer.test(model, train_loader, verbose=False)
>>> len(test_outputs)
25
"""
def __init__(
self,
logger: Union[LightningLoggerBase, Iterable[LightningLoggerBase], bool] = True,
checkpoint_callback: Union[ModelCheckpoint, bool] = True,
early_stop_callback: Optional[Union[EarlyStopping, bool]] = False,
callbacks: Optional[List[Callback]] = None,
default_root_dir: Optional[str] = None,
gradient_clip_val: float = 0,
process_position: int = 0,
num_nodes: int = 1,
num_processes: int = 1,
gpus: Optional[Union[List[int], str, int]] = None,
auto_select_gpus: bool = False,
tpu_cores: Optional[Union[List[int], str, int]] = None,
log_gpu_memory: Optional[str] = None,
progress_bar_refresh_rate: int = 1,
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
overfit_batches: Union[int, float] = 0.0,
track_grad_norm: Union[int, float, str] = -1,
check_val_every_n_epoch: int = 1,
fast_dev_run: bool = False,
accumulate_grad_batches: Union[int, Dict[int, int], List[list]] = 1,
max_epochs: int = 1000,
min_epochs: int = 1,
max_steps: Optional[int] = None,
min_steps: Optional[int] = None,
limit_train_batches: Union[int, float] = 1.0,
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
limit_val_batches: Union[int, float] = 1.0,
limit_test_batches: Union[int, float] = 1.0,
val_check_interval: Union[int, float] = 1.0,
log_save_interval: int = 100,
row_log_interval: int = 50,
distributed_backend: Optional[str] = None,
sync_batchnorm: bool = False,
precision: int = 32,
weights_summary: Optional[str] = ModelSummary.MODE_DEFAULT,
weights_save_path: Optional[str] = None,
num_sanity_val_steps: int = 2,
truncated_bptt_steps: Optional[int] = None,
resume_from_checkpoint: Optional[str] = None,
profiler: Optional[Union[BaseProfiler, bool]] = None,
benchmark: bool = False,
deterministic: bool = False,
reload_dataloaders_every_epoch: bool = False,
auto_lr_find: Union[bool, str] = False,
replace_sampler_ddp: bool = True,
terminate_on_nan: bool = False,
auto_scale_batch_size: Union[str, bool] = False,
prepare_data_per_node: bool = True,
amp_backend: str = 'native',
2020-06-25 22:54:32 +00:00
amp_level: str = 'O2', # backward compatible, todo: remove in v1.0.0
val_percent_check: float = None, # backward compatible, todo: remove in v0.10.0
test_percent_check: float = None, # backward compatible, todo: remove in v0.10.0
train_percent_check: float = None, # backward compatible, todo: remove in v0.10.0
overfit_pct: float = None, # backward compatible, todo: remove in v1.0.0
):
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
r"""
Customize every aspect of training via flags
Args:
logger: Logger (or iterable collection of loggers) for experiment tracking.
checkpoint_callback: Callback for checkpointing.
resolving documentation warnings (#833) * add more underline * fix LightningMudule import error * remove unneeded blank line * escape asterisk to fix inline emphasis warning * add PULL_REQUEST_TEMPLATE.md * add __init__.py and import imagenet_example * fix duplicate label * add noindex option to fix duplicate object warnings * remove unexpected indent * refer explicit LightningModule * fix minor bug * refer EarlyStopping explicitly * restore exclude patterns * change the way how to refer class * remove unused import * update badges & drop Travis/Appveyor (#826) * drop Travis * drop Appveyor * update badges * fix missing PyPI images & CI badges (#853) * docs - anchor links (#848) * docs - add links * add desc. * add Greeting action (#843) * add Greeting action * Update greetings.yml Co-authored-by: William Falcon <waf2107@columbia.edu> * add pep8speaks (#842) * advanced profiler describe + cleaned up tests (#837) * add py36 compatibility * add test case to capture previous bug * clean up tests * clean up tests * Update lightning_module_template.py * Update lightning.py * respond lint issues * break long line * break more lines * checkout conflicting files from master * shorten url * checkout from upstream/master * remove trailing whitespaces * remove unused import LightningModule * fix sphinx bot warnings * Apply suggestions from code review just to trigger CI * Update .github/workflows/greetings.yml Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-02-27 21:07:51 +00:00
early_stop_callback (:class:`pytorch_lightning.callbacks.EarlyStopping`):
callbacks: Add a list of callbacks.
default_root_dir: Default path for logs and weights when no logger/ckpt_callback passed.
Default: ``os.getcwd()``.
Can be remote file paths such as `s3://mybucket/path` or 'hdfs://path/'
gradient_clip_val: 0 means don't clip.
Progress bar callback (#1450) * squash and rebase sanity check hooks sanity check callback hook finish moved core progress bar functionality into callback wip remove duplicate merge clean up imports docs sanity check progress bar main sanity move callback calls init progrss bar callback configuration and docs changelog rate decorator pass process_position disable on rank > 0 position index is_enabled remove decorator refactor init tqdm bars callback method ordering cannot reset when disabled sequence -> list default values fix has no attr _time() move on_val_end to proper place fix the pickle issue update warning properties check for None remove old comment switch order pull out non-tqdm functionality into base class documentation for the base class docs fix refresh rate issue in validation restrict type hint of trainer arg more docs update trainer docs rst docs fix lines too long fix test add missing type hints fix typo move docstring to __init__ solves doctest failures remove doctest :(( can't fix the pickle error fix example simplify by saving trainer reference fix docs errors move docstring initial value multiple val checks per epoch simpler handling of inf dataset sizes update inf docs renamed training_tqdm_dict rename get_tqdm_dict rename occurences of tqdm update changelog fix doctest fix formatting errors added callback tests progress bar on off test more tests for progress bar weird test fix? add ignored property disable default progress bar in LR finder change enable/disable behavior trying doctest in CI again undo doctest pickle error undo doctest pickle error :(( remove progress_bar_callback Trainer arg and fix tests restore progress bar after auto lr find update docs fix rebase fix wrong negation * fix fast dev run total * more thorough testing * remove old args * fix merge * fix merge * separate tests * type hint total batches * reduce if Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_disabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_enabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * rename enabled/disabled * move deprecated api * remove duplicated test from merge * fix rename is_disabled * newline * test also testprogress for fast dev run Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-04-24 00:46:18 +00:00
process_position: orders the progress bar when running multiple models on same machine.
num_nodes: number of GPU nodes for distributed training.
gpus: number of gpus to train on (int) or which GPUs to train on (list or str) applied per node
auto_select_gpus:
If enabled and `gpus` is an integer, pick available
gpus automatically. This is especially useful when
GPUs are configured to be in "exclusive mode", such
that only one process at a time can access them.
tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on [1]
log_gpu_memory: None, 'min_max', 'all'. Might slow performance
progress_bar_refresh_rate: How often to refresh progress bar (in steps). Value ``0`` disables progress bar.
Progress bar callback (#1450) * squash and rebase sanity check hooks sanity check callback hook finish moved core progress bar functionality into callback wip remove duplicate merge clean up imports docs sanity check progress bar main sanity move callback calls init progrss bar callback configuration and docs changelog rate decorator pass process_position disable on rank > 0 position index is_enabled remove decorator refactor init tqdm bars callback method ordering cannot reset when disabled sequence -> list default values fix has no attr _time() move on_val_end to proper place fix the pickle issue update warning properties check for None remove old comment switch order pull out non-tqdm functionality into base class documentation for the base class docs fix refresh rate issue in validation restrict type hint of trainer arg more docs update trainer docs rst docs fix lines too long fix test add missing type hints fix typo move docstring to __init__ solves doctest failures remove doctest :(( can't fix the pickle error fix example simplify by saving trainer reference fix docs errors move docstring initial value multiple val checks per epoch simpler handling of inf dataset sizes update inf docs renamed training_tqdm_dict rename get_tqdm_dict rename occurences of tqdm update changelog fix doctest fix formatting errors added callback tests progress bar on off test more tests for progress bar weird test fix? add ignored property disable default progress bar in LR finder change enable/disable behavior trying doctest in CI again undo doctest pickle error undo doctest pickle error :(( remove progress_bar_callback Trainer arg and fix tests restore progress bar after auto lr find update docs fix rebase fix wrong negation * fix fast dev run total * more thorough testing * remove old args * fix merge * fix merge * separate tests * type hint total batches * reduce if Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_disabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_enabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * rename enabled/disabled * move deprecated api * remove duplicated test from merge * fix rename is_disabled * newline * test also testprogress for fast dev run Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-04-24 00:46:18 +00:00
Ignored when a custom callback is passed to :paramref:`~Trainer.callbacks`.
overfit_batches: Overfit a percent of training data (float) or a set number of batches (int). Default: 0.0
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
overfit_pct:
.. warning:: .. deprecated:: 0.8.0
Use `overfit_batches` instead. Will be removed in 0.10.0.
track_grad_norm: -1 no tracking. Otherwise tracks that p-norm. May be set to 'inf' infinity-norm.
check_val_every_n_epoch: Check val every n train epochs.
fast_dev_run: runs 1 batch of train, test and val to find any bugs (ie: a sort of unit test).
accumulate_grad_batches: Accumulates grads every k batches or as set up in the dict.
max_epochs: Stop training once this number of epochs is reached.
min_epochs: Force training for at least these many epochs
max_steps: Stop training after this number of steps. Disabled by default (None).
min_steps: Force training for at least these number of steps. Disabled by default (None).
limit_train_batches: How much of training dataset to check (floats = percent, int = num_batches)
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
limit_val_batches: How much of validation dataset to check (floats = percent, int = num_batches)
limit_test_batches: How much of test dataset to check (floats = percent, int = num_batches)
train_percent_check:
.. warning:: .. deprecated:: 0.8.0
Use `limit_train_batches` instead. Will remove v0.10.0.
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
val_percent_check:
.. warning:: .. deprecated:: 0.8.0
Use `limit_val_batches` instead. Will remove v0.10.0.
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
test_percent_check:
.. warning:: .. deprecated:: 0.8.0
Use `limit_test_batches` instead. Will remove v0.10.0.
val_check_interval: How often to check the validation set. Use float to check within a training epoch,
use int to check every n steps (batches).
log_save_interval: Writes logs to disk this often
row_log_interval: How often to add logging rows (does not write to disk)
distributed_backend: The distributed backend to use (dp, ddp, ddp2, ddp_spawn, ddp_cpu)
sync_batchnorm: Synchronize batch norm layers between process groups/whole world.
precision: Full precision (32), half precision (16). Can be used on CPU, GPU or TPUs.
weights_summary: Prints a summary of the weights when training begins.
weights_save_path: Where to save weights if specified. Will override default_root_dir
for checkpoints only. Use this if for whatever reason you need the checkpoints
stored in a different place than the logs written in `default_root_dir`.
Can be remote file paths such as `s3://mybucket/path` or 'hdfs://path/'
Defaults to `default_root_dir`.
amp_backend: The mixed precision backend to use ("native" or "apex")
amp_level: The optimization level to use (O1, O2, etc...).
num_sanity_val_steps: Sanity check runs n validation batches before starting the training routine.
Set it to `-1` to run all batches in all validation dataloaders. Default: 2
truncated_bptt_steps: Truncated back prop breaks performs backprop every k steps of much longer
sequence.
resume_from_checkpoint: To resume training from a specific checkpoint pass in the path here.
This can be a URL.
profiler: To profile individual steps during training and assist in identifying bottlenecks.
reload_dataloaders_every_epoch: Set to True to reload dataloaders every epoch.
auto_lr_find: If set to True, will `initially` run a learning rate finder,
trying to optimize initial learning for faster convergence. Sets learning
replace Hparams by init args (#1896) * remove the need for hparams * remove the need for hparams * remove the need for hparams * remove the need for hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * finished moco * basic * testing * todo * recurse * hparams * persist * hparams * chlog * tests * tests * tests * tests * tests * tests * review * saving * tests * tests * tests * docs * finished moco * hparams * review * Apply suggestions from code review Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * hparams * overwrite * transform * transform * transform * transform * cleaning * cleaning * tests * examples * examples * examples * Apply suggestions from code review Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * chp key * tests * Apply suggestions from code review * class * updated docs * updated docs * updated docs * updated docs * save * wip * fix * flake8 Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
2020-05-24 22:59:08 +00:00
rate in self.lr or self.learning_rate in the LightningModule.
To use a different key, set a string instead of True with the key name.
replace_sampler_ddp: Explicitly enables or disables sampler replacement. If not specified this
will toggled automatically when DDP is used. By default it will add ``shuffle=True`` for
train sampler and ``shuffle=False`` for val/test sampler. If you want to customize it,
you can set ``replace_sampler_ddp=False`` and add your own distributed sampler.
benchmark: If true enables cudnn.benchmark.
deterministic: If true enables cudnn.deterministic.
Option to provide seed to random generators to ensure reproducibility (#1572) * Option to provide seed to random generators to ensure reproducibility I added small function in utilities which imports torch, numpy, python random and sets seed for all of the libraries to ensure reproducibility of results. * Apply recommendations from core contributors on seeding 1. Moved the seeding code to another file 2. Make deterministic as a parameter for trainer class 3. Add assertions for seeding numpy 4. Added warnings 5. torch.manual_seed should be enough for seeding torch * Revert "Apply recommendations from core contributors on seeding" This reverts commit a213c8e6882eec8a9e7408b9418926d2db7c5461. * Revert "Revert "Apply recommendations from core contributors on seeding"" This reverts commit 59b2da53c62878de7aab0aa3feb3115e105eea06. * Change in test, for correct seeding * Allow seed equal to 0 * Allow seed to be uint32.max * Added deterministic to benchmarks * Cuda manual seed as in benchmark seeding * Seeding should be done before model initialization * cuda manual_seed is not necessary * Fixing seed test_cpu_lbfgs On some seeds seems like lbfgs doesn't converge. So I fixed the seed during testing. * rebasing issue with old reproducibility.py * Improved documentation and ability to seed before initializing Train class * Change in docs * Removed seed from trainer, update for documentation * Typo in the docs * Added seed_everything to _all_ * Fixing old changes * Model initialization should be earlier then Trainer * Update pytorch_lightning/trainer/__init__.py From Example to testcode Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Fixing according to the contributors suggestions * Moving horovod deterministic to Trainer class * deterministic flag affects horovod docs update * Improved static typing * Added deterministic to test runners of horovod It is failing on some versions, not very predictable * static seeds for horovod tests * Change for reset_seed function in tests * Seeding horovod using reset_seed from tutils * Update pytorch_lightning/trainer/__init__.py * chlog * Update trainer.py * change "testcode" to "Example" in trainer init documentation * Update pytorch_lightning/trainer/seed.py, first line in comment Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-05-12 11:53:20 +00:00
terminate_on_nan: If set to True, will terminate training (by raising a `ValueError`) at the
end of each training batch, if any of the parameters or the loss are NaN or +/-inf.
auto_scale_batch_size: If set to True, will `initially` run a batch size
finder trying to find the largest batch size that fits into memory.
replace Hparams by init args (#1896) * remove the need for hparams * remove the need for hparams * remove the need for hparams * remove the need for hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * replace self.hparams * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * fixed * finished moco * basic * testing * todo * recurse * hparams * persist * hparams * chlog * tests * tests * tests * tests * tests * tests * review * saving * tests * tests * tests * docs * finished moco * hparams * review * Apply suggestions from code review Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * hparams * overwrite * transform * transform * transform * transform * cleaning * cleaning * tests * examples * examples * examples * Apply suggestions from code review Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * chp key * tests * Apply suggestions from code review * class * updated docs * updated docs * updated docs * updated docs * save * wip * fix * flake8 Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
2020-05-24 22:59:08 +00:00
The result will be stored in self.batch_size in the LightningModule.
Additionally, can be set to either `power` that estimates the batch size through
a power search or `binsearch` that estimates the batch size through a binary search.
prepare_data_per_node: If True, each LOCAL_RANK=0 will call prepare data.
Otherwise only NODE_RANK=0, LOCAL_RANK=0 will prepare data
2019-07-18 16:04:19 +00:00
"""
super().__init__()
2019-07-18 16:04:19 +00:00
Option to provide seed to random generators to ensure reproducibility (#1572) * Option to provide seed to random generators to ensure reproducibility I added small function in utilities which imports torch, numpy, python random and sets seed for all of the libraries to ensure reproducibility of results. * Apply recommendations from core contributors on seeding 1. Moved the seeding code to another file 2. Make deterministic as a parameter for trainer class 3. Add assertions for seeding numpy 4. Added warnings 5. torch.manual_seed should be enough for seeding torch * Revert "Apply recommendations from core contributors on seeding" This reverts commit a213c8e6882eec8a9e7408b9418926d2db7c5461. * Revert "Revert "Apply recommendations from core contributors on seeding"" This reverts commit 59b2da53c62878de7aab0aa3feb3115e105eea06. * Change in test, for correct seeding * Allow seed equal to 0 * Allow seed to be uint32.max * Added deterministic to benchmarks * Cuda manual seed as in benchmark seeding * Seeding should be done before model initialization * cuda manual_seed is not necessary * Fixing seed test_cpu_lbfgs On some seeds seems like lbfgs doesn't converge. So I fixed the seed during testing. * rebasing issue with old reproducibility.py * Improved documentation and ability to seed before initializing Train class * Change in docs * Removed seed from trainer, update for documentation * Typo in the docs * Added seed_everything to _all_ * Fixing old changes * Model initialization should be earlier then Trainer * Update pytorch_lightning/trainer/__init__.py From Example to testcode Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Fixing according to the contributors suggestions * Moving horovod deterministic to Trainer class * deterministic flag affects horovod docs update * Improved static typing * Added deterministic to test runners of horovod It is failing on some versions, not very predictable * static seeds for horovod tests * Change for reset_seed function in tests * Seeding horovod using reset_seed from tutils * Update pytorch_lightning/trainer/__init__.py * chlog * Update trainer.py * change "testcode" to "Example" in trainer init documentation * Update pytorch_lightning/trainer/seed.py, first line in comment Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-05-12 11:53:20 +00:00
self.deterministic = deterministic
torch.backends.cudnn.deterministic = self.deterministic
if self.deterministic:
# fixing non-deterministic part of horovod
# https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383
os.environ["HOROVOD_FUSION_THRESHOLD"] = str(0)
# init the default rank if exists
# we need to call this here or NVIDIA flags and other messaging in init will show on all ranks
# this way we only show it on rank 0
if 'LOCAL_RANK' in os.environ:
rank_zero_only.rank = int(os.environ['LOCAL_RANK'])
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
# training bookeeping
self.total_batch_idx = 0
self.running_loss = TensorRunningAccum(window_length=20)
self.batch_idx = 0
self.progress_bar_metrics = {}
self.callback_metrics = {}
self.logged_metrics = {}
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
self.num_training_batches = 0
self.num_val_batches = []
self.num_sanity_val_batches = []
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
self.num_test_batches = []
self.train_dataloader = None
self.test_dataloaders = None
self.val_dataloaders = None
# when true, prints test results
self.verbose_test = True
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
# when .test() is called, it sets this
self.tested_ckpt_path = None
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
# training state
self.model = None
self.datamodule = None
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
self.testing = False
self.prepare_data_per_node = prepare_data_per_node
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
self.lr_schedulers = []
self.optimizers = None
self.optimizer_frequencies = []
self.global_step = 0
self.current_epoch = 0
self.interrupted = False
self.should_stop = False
self.running_sanity_check = False
self._state = TrainerState.INITIALIZING
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
self._default_root_dir = default_root_dir or os.getcwd()
self._weights_save_path = weights_save_path or self._default_root_dir
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
# init callbacks
self.callbacks = callbacks or []
Continue Jeremy's early stopping PR #1504 (#2391) * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * cannot pass an int as default_save_path * refactor log message * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * add state_dict for early stopping * move best attr after monitor_op defined * improve early stopping and model checkpoint callbacks * fix formatting * fix attr init order * clean up setting of default_root_dir attr * logger needs default root dir set first * reorg trainer init * remove direct references to checkpoint callback * more fixes * more bugfixes * run callbacks at epoch end * update tests to use on epoch end * PR cleanup * address failing tests * refactor for homogeneity * fix merge conflict * separate tests * tests for early stopping bug regressions * small fixes * revert model checkpoint change * typo fix * fix tests * update train loop * fix test case * appease the linter * fix some doctests * move config to callback * fixes from rebase * fixes from rebase * chlog * docs * reformat * formatting * fix * fix * fixes from rebase * add new test for patience * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/callbacks/test_early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix formatting * remove enable_early_stop attribute * fix test with new epoch indexing * fix progress bar totals * fix off by one error (see #2289) epoch starts at 0 now * added missing imports * fix hpc_save folderpath * fix formatting * fix tests * small fixes from a rebase * fix * tmpdir * tmpdir * tmpdir * wandb * fix merge conflict * add back evaluation after training * test_resume_early_stopping_from_checkpoint TODO * undo the horovod check * update changelog * remove a duplicate test from merge error * try fix dp_resume test * add the logger fix from master * try remove default_root_dir * try mocking numpy * try import numpy in docs test * fix wandb test * pep 8 fix * skip if no amp * dont mock when doctesting * install extra * fix the resume ES test * undo conf.py changes * revert remove comet pickle from test * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update weights_loading.rst * Update weights_loading.rst * Update weights_loading.rst * renamed flag * renamed flag * revert the None check in logger experiment name/version * add the old comments * _experiment * test chckpointing on DDP * skip the ddp test on windows * cloudpickle * renamed flag * renamed flag * parentheses for clarity * apply suggestion max epochs Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <jtjordan@ncsu.edu> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-06-29 01:36:46 +00:00
# configure early stop callback
# creates a default one if none passed in
early_stop_callback = self.configure_early_stopping(early_stop_callback)
if early_stop_callback:
self.callbacks.append(early_stop_callback)
# configure checkpoint callback
# it is important that this is the last callback to run
# pass through the required args to figure out defaults
checkpoint_callback = self.configure_checkpoint_callback(checkpoint_callback)
if checkpoint_callback:
self.callbacks.append(checkpoint_callback)
# TODO refactor codebase (tests) to not directly reach into these callbacks
self.checkpoint_callback = checkpoint_callback
self.early_stop_callback = early_stop_callback
self.on_init_start()
# benchmarking
self.benchmark = benchmark
torch.backends.cudnn.benchmark = self.benchmark
2019-03-31 01:45:16 +00:00
# Transfer params
self.num_nodes = num_nodes
self.log_gpu_memory = log_gpu_memory
# sync-bn backend
self.sync_batchnorm = sync_batchnorm
self.gradient_clip_val = gradient_clip_val
2019-03-31 01:45:16 +00:00
self.check_val_every_n_epoch = check_val_every_n_epoch
if not isinstance(track_grad_norm, (int, float)) and track_grad_norm != 'inf':
raise MisconfigurationException("track_grad_norm can be an int, a float or 'inf' (infinity norm).")
self.track_grad_norm = float(track_grad_norm)
self.tpu_cores = _parse_tpu_cores(tpu_cores)
self.on_tpu = self.tpu_cores is not None
self.tpu_id = self.tpu_cores[0] if isinstance(self.tpu_cores, list) else None
Enable TPU support (#868) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * fix test pkg create (#873) * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Luis Capelo <luiscape@gmail.com> * Fix segmentation example (#876) * removed torchvision model and added custom model * minor fix * Fixed relative imports issue * Fix/typo (#880) * Update greetings.yml * Update greetings.yml * Changelog (#869) * Create CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Update PULL_REQUEST_TEMPLATE.md * Add PR links to Version 0.6.0 in CHANGELOG.md * Add PR links for Unreleased in CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Fixing Function Signatures (#871) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Luis Capelo <luiscape@gmail.com> Co-authored-by: Akshay Kulkarni <akshayk.vnit@gmail.com> Co-authored-by: Ethan Harris <ewah1g13@soton.ac.uk> Co-authored-by: Shikhar Chauhan <xssChauhan@users.noreply.github.com>
2020-02-17 21:01:20 +00:00
if num_processes != 1 and distributed_backend != "ddp_cpu":
rank_zero_warn("num_processes is only used for distributed_backend=\"ddp_cpu\". Ignoring it.")
self.num_processes = num_processes
self.weights_summary = weights_summary
self.max_epochs = max_epochs
self.min_epochs = min_epochs
self.max_steps = max_steps
self.min_steps = min_steps
if num_sanity_val_steps == -1:
self.num_sanity_val_steps = float('inf')
else:
self.num_sanity_val_steps = num_sanity_val_steps
self.reload_dataloaders_every_epoch = reload_dataloaders_every_epoch
self.auto_lr_find = auto_lr_find
self.auto_scale_batch_size = auto_scale_batch_size
self._is_data_prepared = False
self.replace_sampler_ddp = replace_sampler_ddp
self.truncated_bptt_steps = truncated_bptt_steps
self.resume_from_checkpoint = resume_from_checkpoint
self.terminate_on_nan = terminate_on_nan
self.shown_warnings = set()
2019-07-08 13:42:13 +00:00
self.fast_dev_run = fast_dev_run
if self.fast_dev_run:
limit_train_batches = 1
limit_val_batches = 1
limit_test_batches = 1
self.num_sanity_val_steps = 0
self.max_epochs = 1
rank_zero_info(
'Running in fast_dev_run mode: will run a full train,' ' val and test loop using a single batch'
)
# configure profiler
if profiler is True:
profiler = SimpleProfiler()
self.profiler = profiler or PassThroughProfiler()
2019-09-06 04:29:38 +00:00
# accumulated grads
self.accumulate_grad_batches = accumulate_grad_batches
self.configure_accumulated_gradients(accumulate_grad_batches)
2019-09-06 04:29:38 +00:00
# override with environment flag
gpus = os.environ.get('PL_TRAINER_GPUS', gpus)
# for gpus allow int, string and gpu list
if auto_select_gpus and isinstance(gpus, int):
self.gpus = pick_multiple_gpus(gpus)
else:
self.gpus = gpus
self.data_parallel_device_ids = _parse_gpu_ids(self.gpus)
parse_gpu_ids fix (#382) * Unit tests for num_gpu property as proxy for __parse_gpu_ids. * Refactoring __parse_gpu_ids * Moved the function outside the class as it is an utility function and did not depend on class in any way. * Added unit tests for it. * Mocked torch.cuda.device_count function in tests. This allows the tests to be run on machines that do not have gpus. * Fixed the parse_gpu_ids function to handle -1 case. Function now handles -1 the same way as it does for '-1'. * Unit tests for root_gpu added. Added backend as a parameter as currently depending on backend set or not, code fails with exception in certain circumstances, before giving a wrong answer. * Moved __set_root_gpu function out of the class. This function does not depend on the class and can be tested more easily this way. Also added unit tests for this function. They simply reuse data for the root_gpu property. * determine_root_gpu_device passes unit tests. * num_gpus passes unit tests. Also added a None test for this function. * parse_gpu_ids tests changed to reflect desired state after refactoring. Planning to refactor parse_gpu_ids to always return list of ints. This will simplify code that use output of this function. * * parse_gpu_ids always returns lists * parse_gpu_ids checks given ids against available ids * parse_gpu_ids raises exception for non existant ids * parse_gpu_ids returns None when no gpus are available * cleaned up determine_root_gpu_device * cleaned up num_gpus property * Updated unit tests to reflect changes in the functions * Flake8 fixes * Moved fixture code up before where it is used. * Updated documentation. * Changed tests to match the API: * gpus=-1 or gpus='-1' should use all available gpu devices * gpus=N * N=0: no gpus should be used. * N>0: N gpus should be used * gpus=list of ints or a comma separated string of numbers: Use the gpus indicated by the list or the string. * Fixed code to pass all the changed tests for parsing gpus param. * Refactoring parse_gpu_ids function. * flake8 fixes. * Updating documentation. * flake8 fixes. * flake8 fixes. * flake8 fixes * Update trainer.py * Update dp_mixin.py * Make reduce_distributed_output a stand alone function. Fix imports. Fix flake8. * Add comet_ml dependency to tests requirements.txt * Revert "Make reduce_distributed_output a stand alone function. Fix imports. Fix flake8." This reverts commit eac0338 * Merge with master.
2019-10-23 09:05:09 +00:00
self.root_gpu = determine_root_gpu_device(self.data_parallel_device_ids)
self.root_device = torch.device("cpu")
2019-09-06 04:29:38 +00:00
self.on_gpu = True if (self.data_parallel_device_ids and torch.cuda.is_available()) else False
Enable TPU support (#868) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * fix test pkg create (#873) * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Luis Capelo <luiscape@gmail.com> * Fix segmentation example (#876) * removed torchvision model and added custom model * minor fix * Fixed relative imports issue * Fix/typo (#880) * Update greetings.yml * Update greetings.yml * Changelog (#869) * Create CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Update PULL_REQUEST_TEMPLATE.md * Add PR links to Version 0.6.0 in CHANGELOG.md * Add PR links for Unreleased in CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Fixing Function Signatures (#871) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Luis Capelo <luiscape@gmail.com> Co-authored-by: Akshay Kulkarni <akshayk.vnit@gmail.com> Co-authored-by: Ethan Harris <ewah1g13@soton.ac.uk> Co-authored-by: Shikhar Chauhan <xssChauhan@users.noreply.github.com>
2020-02-17 21:01:20 +00:00
# tpu state flags
self.use_tpu = False
self.tpu_local_core_rank = None
self.tpu_global_core_rank = None
2019-09-06 04:29:38 +00:00
# distributed backend choice
self.distributed_backend = distributed_backend
self.set_distributed_mode(distributed_backend)
2019-09-06 04:29:38 +00:00
Enable TPU support (#868) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * fix test pkg create (#873) * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Luis Capelo <luiscape@gmail.com> * Fix segmentation example (#876) * removed torchvision model and added custom model * minor fix * Fixed relative imports issue * Fix/typo (#880) * Update greetings.yml * Update greetings.yml * Changelog (#869) * Create CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Update PULL_REQUEST_TEMPLATE.md * Add PR links to Version 0.6.0 in CHANGELOG.md * Add PR links for Unreleased in CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Fixing Function Signatures (#871) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Luis Capelo <luiscape@gmail.com> Co-authored-by: Akshay Kulkarni <akshayk.vnit@gmail.com> Co-authored-by: Ethan Harris <ewah1g13@soton.ac.uk> Co-authored-by: Shikhar Chauhan <xssChauhan@users.noreply.github.com>
2020-02-17 21:01:20 +00:00
# override dist backend when using tpus
if self.on_tpu:
self.distributed_backend = 'tpu'
Enable TPU support (#868) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * fix test pkg create (#873) * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Luis Capelo <luiscape@gmail.com> * Fix segmentation example (#876) * removed torchvision model and added custom model * minor fix * Fixed relative imports issue * Fix/typo (#880) * Update greetings.yml * Update greetings.yml * Changelog (#869) * Create CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Update PULL_REQUEST_TEMPLATE.md * Add PR links to Version 0.6.0 in CHANGELOG.md * Add PR links for Unreleased in CHANGELOG.md * Update PULL_REQUEST_TEMPLATE.md * Fixing Function Signatures (#871) * added tpu docs * added tpu flags * add tpu docs + init training call * amp * amp * amp * amp * optimizer step * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added auto data transfer to TPU * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print * added test return and print Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Luis Capelo <luiscape@gmail.com> Co-authored-by: Akshay Kulkarni <akshayk.vnit@gmail.com> Co-authored-by: Ethan Harris <ewah1g13@soton.ac.uk> Co-authored-by: Shikhar Chauhan <xssChauhan@users.noreply.github.com>
2020-02-17 21:01:20 +00:00
self.init_tpu()
# init flags for SLURM+DDP to work
2019-09-06 04:29:38 +00:00
self.world_size = 1
Replaces ddp .spawn with subprocess (#2029) * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * replace ddp spawn with subprocess * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix * hot fix
2020-06-01 15:00:32 +00:00
self.interactive_ddp_procs = []
self.configure_slurm_ddp(self.num_nodes)
self.node_rank = self.determine_ddp_node_rank()
self.local_rank = self.determine_local_rank()
self.global_rank = 0
# NVIDIA setup
self.set_nvidia_flags(self.is_slurm_managing_tasks, self.data_parallel_device_ids)
2019-09-06 04:29:38 +00:00
self._progress_bar_callback = self.configure_progress_bar(progress_bar_refresh_rate, process_position)
Progress bar callback (#1450) * squash and rebase sanity check hooks sanity check callback hook finish moved core progress bar functionality into callback wip remove duplicate merge clean up imports docs sanity check progress bar main sanity move callback calls init progrss bar callback configuration and docs changelog rate decorator pass process_position disable on rank > 0 position index is_enabled remove decorator refactor init tqdm bars callback method ordering cannot reset when disabled sequence -> list default values fix has no attr _time() move on_val_end to proper place fix the pickle issue update warning properties check for None remove old comment switch order pull out non-tqdm functionality into base class documentation for the base class docs fix refresh rate issue in validation restrict type hint of trainer arg more docs update trainer docs rst docs fix lines too long fix test add missing type hints fix typo move docstring to __init__ solves doctest failures remove doctest :(( can't fix the pickle error fix example simplify by saving trainer reference fix docs errors move docstring initial value multiple val checks per epoch simpler handling of inf dataset sizes update inf docs renamed training_tqdm_dict rename get_tqdm_dict rename occurences of tqdm update changelog fix doctest fix formatting errors added callback tests progress bar on off test more tests for progress bar weird test fix? add ignored property disable default progress bar in LR finder change enable/disable behavior trying doctest in CI again undo doctest pickle error undo doctest pickle error :(( remove progress_bar_callback Trainer arg and fix tests restore progress bar after auto lr find update docs fix rebase fix wrong negation * fix fast dev run total * more thorough testing * remove old args * fix merge * fix merge * separate tests * type hint total batches * reduce if Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_disabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_enabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * rename enabled/disabled * move deprecated api * remove duplicated test from merge * fix rename is_disabled * newline * test also testprogress for fast dev run Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-04-24 00:46:18 +00:00
2019-09-06 04:29:38 +00:00
# logging
self.configure_logger(logger)
2019-09-06 04:29:38 +00:00
self.log_save_interval = log_save_interval
self.row_log_interval = row_log_interval
2019-09-06 04:29:38 +00:00
# how much of the data to use
# TODO: remove in 0.10.0
if overfit_pct is not None:
rank_zero_warn(
"Argument `overfit_pct` is now set by `overfit_batches` since v0.8.0"
" and this argument will be removed in v0.10.0",
DeprecationWarning,
)
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
overfit_batches = overfit_pct
# TODO: remove in 0.10.0
if val_percent_check is not None:
rank_zero_warn(
"Argument `val_percent_check` is now set by `limit_val_batches` since v0.8.0"
" and this argument will be removed in v0.10.0",
DeprecationWarning,
)
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
limit_val_batches = val_percent_check
# TODO: remove in 0.10.0
if test_percent_check is not None:
rank_zero_warn(
"Argument `test_percent_check` is now set by `limit_test_batches` since v0.8.0"
" and this argument will be removed in v0.10.0",
DeprecationWarning,
)
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
limit_test_batches = test_percent_check
# TODO: remove in 0.10.0
if train_percent_check is not None:
rank_zero_warn(
"Argument `train_percent_check` is now set by `limit_train_batches` since v0.8.0"
" and this argument will be removed in v0.10.0",
DeprecationWarning,
)
limit_train_batches = train_percent_check
[WIP] Rename overfit_pct to overfit_batches (and fix) and val_percent_check and test_percent_check (and fix) (#2213) * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * fixed percent check for val/test * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * overfit_pct now uses train loaders for val and test and does not shuffle * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks * add on fit_start on fit_end hooks Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-06-17 12:03:28 +00:00
self.limit_train_batches = _determine_batch_limits(limit_train_batches, 'limit_train_batches')
self.limit_val_batches = _determine_batch_limits(limit_val_batches, 'limit_val_batches')
self.limit_test_batches = _determine_batch_limits(limit_test_batches, 'limit_test_batches')
self.val_check_interval = _determine_batch_limits(val_check_interval, 'val_check_interval')
self.overfit_batches = _determine_batch_limits(overfit_batches, 'overfit_batches')
self.determine_data_use_amount(self.overfit_batches)
2019-09-06 04:29:38 +00:00
# AMP init
# These are the only lines needed after v0.8.0
# we wrap the user's forward with autocast and give it back at the end of fit
self.autocast_original_forward = None
self.precision = precision
self.scaler = None
2019-09-06 04:29:38 +00:00
self.amp_level = amp_level
self.init_amp(amp_backend)
2019-09-06 04:29:38 +00:00
self.on_colab_kaggle = os.getenv('COLAB_GPU') or os.getenv('KAGGLE_URL_BASE')
Structured results (train loop only. val loop separate PR) (PR 2/5) (#2615) * r * r * r * patched optimizer closure with sr * patched optimizer closure with sr * patched optimizer closure with sr * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added autoreduce for train step * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added hooks * added hooks * added hooks * added hooks * added hooks * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * cache * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py * Update pytorch_lightning/core/step_result.py * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Apply suggestions from code review Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * simple * finished tests for structured results on train epoch * simple * simple * revert * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update tests/base/deterministic_model.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * finished tests for structured results on train epoch * docstring typos * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update pytorch_lightning/core/step_result.py Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Update pytorch_lightning/overrides/data_parallel.py Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
2020-07-20 23:00:20 +00:00
# tracks internal state for debugging
self.dev_debugger = InternalDebugger(self)
self.config_validator = ConfigValidator(self)
self.data_connector = DataConnector(self)
self.accelerator_connector = AcceleratorConnector(self)
self.accelerator_backend = None
Structured results (train loop only. val loop separate PR) (PR 2/5) (#2615) * r * r * r * patched optimizer closure with sr * patched optimizer closure with sr * patched optimizer closure with sr * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added train step structured result * added autoreduce for train step * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added auto reduce on train * added hooks * added hooks * added hooks * added hooks * added hooks * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * cache * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/early_stopping.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/callbacks/model_checkpoint.py * Update pytorch_lightning/core/step_result.py * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Apply suggestions from code review Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * simple * finished tests for structured results on train epoch * simple * simple * revert * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update tests/base/deterministic_model.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * finished tests for structured results on train epoch * docstring typos * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * finished tests for structured results on train epoch * Update pytorch_lightning/core/step_result.py Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Update pytorch_lightning/overrides/data_parallel.py Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
2020-07-20 23:00:20 +00:00
# loops
self.evaluation_loop = EvaluationLoop(self)
self.train_loop = TrainLoop(self)
# Callback system
self.on_init_end()
@property
def state(self) -> TrainerState:
return self._state
@property
def is_global_zero(self) -> bool:
return self.global_rank == 0
@property
def slurm_job_id(self) -> Optional[int]:
try:
job_id = os.environ['SLURM_JOB_ID']
job_id = int(job_id)
2020-04-25 20:01:15 +00:00
# in interactive mode, don't make logs use the same job id
in_slurm_interactive_mode = os.environ['SLURM_JOB_NAME'] == 'bash'
if in_slurm_interactive_mode:
job_id = None
except Exception:
job_id = None
return job_id
@classmethod
def default_attributes(cls):
init_signature = inspect.signature(Trainer)
args = {}
for param_name in init_signature.parameters:
value = init_signature.parameters[param_name].default
args[param_name] = value
return args
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
@classmethod
def get_init_arguments_and_types(cls) -> List[Tuple[str, Tuple, Any]]:
r"""Scans the Trainer signature and returns argument names, types and default values.
Returns:
List with tuples of 3 values:
(argument name, set with argument types, argument default value).
Examples:
>>> args = Trainer.get_init_arguments_and_types()
>>> import pprint
>>> pprint.pprint(sorted(args)) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
[('accumulate_grad_batches',
(<class 'int'>, typing.Dict[int, int], typing.List[list]),
1),
...
('callbacks',
(typing.List[pytorch_lightning.callbacks.base.Callback],
<class 'NoneType'>),
None),
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
('check_val_every_n_epoch', (<class 'int'>,), 1),
...
('max_epochs', (<class 'int'>,), 1000),
...
('precision', (<class 'int'>,), 32),
('prepare_data_per_node', (<class 'bool'>,), True),
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
('process_position', (<class 'int'>,), 0),
('profiler',
(<class 'pytorch_lightning.profiler.profilers.BaseProfiler'>,
<class 'bool'>,
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
<class 'NoneType'>),
None),
...
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
"""
trainer_default_params = inspect.signature(cls).parameters
name_type_default = []
for arg in trainer_default_params:
arg_type = trainer_default_params[arg].annotation
arg_default = trainer_default_params[arg].default
try:
arg_types = tuple(arg_type.__args__)
except AttributeError:
arg_types = (arg_type,)
name_type_default.append((arg, arg_types, arg_default))
return name_type_default
@classmethod
def get_deprecated_arg_names(cls) -> List:
"""Returns a list with deprecated Trainer arguments."""
depr_arg_names = []
for name, val in cls.__dict__.items():
if name.startswith('DEPRECATED') and isinstance(val, (tuple, list)):
depr_arg_names.extend(val)
return depr_arg_names
@classmethod
def add_argparse_args(cls, parent_parser: ArgumentParser) -> ArgumentParser:
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
r"""Extends existing argparse by default `Trainer` attributes.
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
Args:
parent_parser:
The custom cli arguments parser, which will be extended by
the Trainer default arguments.
Only arguments of the allowed types (str, float, int, bool) will
extend the `parent_parser`.
Examples:
>>> import argparse
>>> import pprint
>>> parser = argparse.ArgumentParser()
>>> parser = Trainer.add_argparse_args(parser)
>>> args = parser.parse_args([])
>>> pprint.pprint(vars(args)) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
{...
'check_val_every_n_epoch': 1,
'checkpoint_callback': True,
'default_root_dir': None,
2020-05-12 21:06:38 +00:00
'deterministic': False,
'distributed_backend': None,
'early_stop_callback': False,
...
'logger': True,
'max_epochs': 1000,
'max_steps': None,
'min_epochs': 1,
'min_steps': None,
...
'profiler': None,
'progress_bar_refresh_rate': 1,
...}
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
"""
parser = ArgumentParser(parents=[parent_parser], add_help=False,)
blacklist = ['kwargs']
depr_arg_names = cls.get_deprecated_arg_names() + blacklist
Custom argparser extension with Trainer arguments (argument types added) (#1147) * `add_argparse_args` method fixed (argument types added) * CHANGELOG.md upd * autopep8 fixes * --gpus=0 removed from test (for ci tests) * typo fixed * reduce on plateau scheduler fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Trainer cli related tests moved to test_trainer_cli.py * refactored: get_init_arguments_and_types is a public classmethod of the Trainer now * test_get_init_arguments_and_types added * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * autopep8 fixes * Trainer cli related tests moved to test_trainer_cli.py * Trainer cli related tests moved to test_trainer_cli.py * test_get_init_arguments_and_types added * autopep8 fixes * autopep8 fixes * Apply suggestions from code review * cosmetics * cosmetics * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * `Trainer.get_init_arguments_and_types` now returns arg types wrapped in tuples (not in sets) * deprecated args are now ignored in argparser * get_deprecated_arg_names small refactor * get_deprecated_arg_names bug fixed * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> * Update pytorch_lightning/trainer/trainer.py Co-Authored-By: Joe Davison <joe@huggingface.co> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Joe Davison <joe@huggingface.co> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-03-24 18:55:27 +00:00
2020-08-14 01:44:55 +00:00
allowed_types = (str, int, float, bool)
# TODO: get "help" from docstring :)
for arg, arg_types, arg_default in (
at for at in cls.get_init_arguments_and_types() if at[0] not in depr_arg_names
):
arg_types = [at for at in allowed_types if at in arg_types]
if not arg_types:
# skip argument with not supported type
continue
arg_kwargs = {}
if bool in arg_types:
arg_kwargs.update(nargs="?", const=True)
# if the only arg type is bool
if len(arg_types) == 1:
use_type = parsing.str_to_bool
# if only two args (str, bool)
elif len(arg_types) == 2 and set(arg_types) == {str, bool}:
use_type = parsing.str_to_bool_or_str
else:
# filter out the bool as we need to use more general
use_type = [at for at in arg_types if at is not bool][0]
else:
use_type = arg_types[0]
if arg == 'gpus' or arg == 'tpu_cores':
use_type = Trainer._gpus_allowed_type
arg_default = Trainer._gpus_arg_default
# hack for types in (int, float)
if len(arg_types) == 2 and int in set(arg_types) and float in set(arg_types):
use_type = Trainer._int_or_float_type
# hack for track_grad_norm
if arg == 'track_grad_norm':
use_type = float
parser.add_argument(
f'--{arg}',
dest=arg,
default=arg_default,
type=use_type,
help='autogenerated by pl.Trainer',
**arg_kwargs,
)
return parser
def _gpus_allowed_type(x) -> Union[int, str]:
2020-04-27 11:08:34 +00:00
if ',' in x:
return str(x)
else:
return int(x)
def _gpus_arg_default(x) -> Union[int, str]:
2020-04-27 11:08:34 +00:00
if ',' in x:
return str(x)
else:
return int(x)
def _int_or_float_type(x) -> Union[int, float]:
if '.' in str(x):
return float(x)
else:
return int(x)
@classmethod
def parse_argparser(cls, arg_parser: Union[ArgumentParser, Namespace]) -> Namespace:
"""Parse CLI arguments, required for custom bool types."""
args = arg_parser.parse_args() if isinstance(arg_parser, ArgumentParser) else arg_parser
types_default = {
arg: (arg_types, arg_default) for arg, arg_types, arg_default in cls.get_init_arguments_and_types()
}
modified_args = {}
for k, v in vars(args).items():
if k in types_default and v is None:
# We need to figure out if the None is due to using nargs="?" or if it comes from the default value
arg_types, arg_default = types_default[k]
if bool in arg_types and isinstance(arg_default, bool):
# Value has been passed as a flag => It is currently None, so we need to set it to True
# We always set to True, regardless of the default value.
# Users must pass False directly, but when passing nothing True is assumed.
# i.e. the only way to disable somthing that defaults to True is to use the long form:
# "--a_default_true_arg False" becomes False, while "--a_default_false_arg" becomes None,
# which then becomes True here.
v = True
modified_args[k] = v
return Namespace(**modified_args)
@classmethod
def from_argparse_args(cls, args: Union[Namespace, ArgumentParser], **kwargs) -> 'Trainer':
"""
Create an instance from CLI arguments.
Args:
args: The parser or namespace to take arguments from. Only known arguments will be
parsed and passed to the :class:`Trainer`.
**kwargs: Additional keyword arguments that may override ones in the parser or namespace.
These must be valid Trainer arguments.
Example:
>>> parser = ArgumentParser(add_help=False)
>>> parser = Trainer.add_argparse_args(parser)
>>> parser.add_argument('--my_custom_arg', default='something') # doctest: +SKIP
>>> args = Trainer.parse_argparser(parser.parse_args(""))
>>> trainer = Trainer.from_argparse_args(args, logger=False)
"""
if isinstance(args, ArgumentParser):
args = cls.parse_argparser(args)
params = vars(args)
# we only want to pass in valid Trainer args, the rest may be user specific
valid_kwargs = inspect.signature(cls.__init__).parameters
trainer_kwargs = dict((name, params[name]) for name in valid_kwargs if name in params)
trainer_kwargs.update(**kwargs)
return cls(**trainer_kwargs)
@property
def num_gpus(self) -> int:
gpus = self.data_parallel_device_ids
if gpus is None:
return 0
Resolve some codefactor issues (#756) * remove unnecessary pass statements * use isinstance for type checks * remove unnecessary else/elif after return * remove unnecessary return statements * move doc string to top * merge isinstance calls * remove unnecessary else/elif after raise * use list comprehension * do not use len without comparison * add missing shebang * revert isinstance check back to type broke tests, because bool is actually subclass of int * add missing period to doc string * remove unnecessary pass statements * use isinstance for type checks * remove unnecessary else/elif after return * remove unnecessary return statements * move doc string to top * merge isinstance calls * remove unnecessary else/elif after raise * use list comprehension * do not use len without comparison * add missing shebang * revert isinstance check back to type broke tests, because bool is actually subclass of int * add missing period to doc string * Fix default ckpt path when logger exists (#771) * rename logging -> loggers (#767) * move logging >> loggers * add warning * fix tests * logging alias * formatting * formatting * use isinstance for type checks * revert isinstance check back to type broke tests, because bool is actually subclass of int * add more detail to tbptt example (#755) * add more detail to tbptt example * warn user about new arg in training_step Co-authored-by: Vadim Bereznyuk <kuynzereb@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-02-01 23:44:05 +00:00
return len(gpus)
2019-07-18 15:08:48 +00:00
@property
def data_parallel(self) -> bool:
return self.use_dp or self.use_ddp or self.use_ddp2
2019-07-18 15:08:48 +00:00
@property
def progress_bar_callback(self):
return self._progress_bar_callback
@property
Progress bar callback (#1450) * squash and rebase sanity check hooks sanity check callback hook finish moved core progress bar functionality into callback wip remove duplicate merge clean up imports docs sanity check progress bar main sanity move callback calls init progrss bar callback configuration and docs changelog rate decorator pass process_position disable on rank > 0 position index is_enabled remove decorator refactor init tqdm bars callback method ordering cannot reset when disabled sequence -> list default values fix has no attr _time() move on_val_end to proper place fix the pickle issue update warning properties check for None remove old comment switch order pull out non-tqdm functionality into base class documentation for the base class docs fix refresh rate issue in validation restrict type hint of trainer arg more docs update trainer docs rst docs fix lines too long fix test add missing type hints fix typo move docstring to __init__ solves doctest failures remove doctest :(( can't fix the pickle error fix example simplify by saving trainer reference fix docs errors move docstring initial value multiple val checks per epoch simpler handling of inf dataset sizes update inf docs renamed training_tqdm_dict rename get_tqdm_dict rename occurences of tqdm update changelog fix doctest fix formatting errors added callback tests progress bar on off test more tests for progress bar weird test fix? add ignored property disable default progress bar in LR finder change enable/disable behavior trying doctest in CI again undo doctest pickle error undo doctest pickle error :(( remove progress_bar_callback Trainer arg and fix tests restore progress bar after auto lr find update docs fix rebase fix wrong negation * fix fast dev run total * more thorough testing * remove old args * fix merge * fix merge * separate tests * type hint total batches * reduce if Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_disabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_enabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * rename enabled/disabled * move deprecated api * remove duplicated test from merge * fix rename is_disabled * newline * test also testprogress for fast dev run Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-04-24 00:46:18 +00:00
def progress_bar_dict(self) -> dict:
""" Read-only for progress bar metrics. """
ref_model = self.model if not self.data_parallel else self.model.module
Progress bar callback (#1450) * squash and rebase sanity check hooks sanity check callback hook finish moved core progress bar functionality into callback wip remove duplicate merge clean up imports docs sanity check progress bar main sanity move callback calls init progrss bar callback configuration and docs changelog rate decorator pass process_position disable on rank > 0 position index is_enabled remove decorator refactor init tqdm bars callback method ordering cannot reset when disabled sequence -> list default values fix has no attr _time() move on_val_end to proper place fix the pickle issue update warning properties check for None remove old comment switch order pull out non-tqdm functionality into base class documentation for the base class docs fix refresh rate issue in validation restrict type hint of trainer arg more docs update trainer docs rst docs fix lines too long fix test add missing type hints fix typo move docstring to __init__ solves doctest failures remove doctest :(( can't fix the pickle error fix example simplify by saving trainer reference fix docs errors move docstring initial value multiple val checks per epoch simpler handling of inf dataset sizes update inf docs renamed training_tqdm_dict rename get_tqdm_dict rename occurences of tqdm update changelog fix doctest fix formatting errors added callback tests progress bar on off test more tests for progress bar weird test fix? add ignored property disable default progress bar in LR finder change enable/disable behavior trying doctest in CI again undo doctest pickle error undo doctest pickle error :(( remove progress_bar_callback Trainer arg and fix tests restore progress bar after auto lr find update docs fix rebase fix wrong negation * fix fast dev run total * more thorough testing * remove old args * fix merge * fix merge * separate tests * type hint total batches * reduce if Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_disabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_enabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * rename enabled/disabled * move deprecated api * remove duplicated test from merge * fix rename is_disabled * newline * test also testprogress for fast dev run Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-04-24 00:46:18 +00:00
return dict(**ref_model.get_progress_bar_dict(), **self.progress_bar_metrics)
@property
def disable_validation(self) -> bool:
""" Check if validation is disabled during training. """
return not self.enable_validation
@property
def enable_validation(self) -> bool:
""" Check if we should run validation during training. """
val_loop_enabled = is_overridden('validation_step', self.get_model()) and self.limit_val_batches > 0
return val_loop_enabled or self.fast_dev_run
@property
def default_root_dir(self) -> str:
"""
The default location to save artifacts of loggers, checkpoints etc.
It is used as a fallback if logger or checkpoint callback do not define specific save paths.
"""
if get_filesystem(self._default_root_dir).protocol == "file":
return os.path.normpath(self._default_root_dir)
return self._default_root_dir
@property
def weights_save_path(self) -> str:
"""
The default root location to save weights (checkpoints), e.g., when the
:class:`~pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint` does not define a file path.
"""
if get_filesystem(self._weights_save_path).protocol == "file":
return os.path.normpath(self._weights_save_path)
return self._weights_save_path
def tune(
self,
model: LightningModule,
train_dataloader: Optional[DataLoader] = None,
val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
datamodule: Optional[LightningDataModule] = None,
):
# TODO: temporary, need to decide if tune or separate object
# setup data, etc...
self.setup_fit(model, train_dataloader, val_dataloaders, datamodule)
# hook
self.call_hook('on_fit_start', model)
# hook
self.data_connector.prepare_data(model)
# Run auto batch size scaling
if self.auto_scale_batch_size:
if isinstance(self.auto_scale_batch_size, bool):
self.auto_scale_batch_size = 'power'
self.scale_batch_size(
model,
mode=self.auto_scale_batch_size,
train_dataloader=train_dataloader,
val_dataloaders=val_dataloaders,
datamodule=datamodule,
)
model.logger = self.logger # reset logger binding
# Run learning rate finder:
if self.auto_lr_find:
self._run_lr_finder_internally(model)
model.logger = self.logger # reset logger binding
2019-03-31 01:45:16 +00:00
# -----------------------------
# MODEL TRAINING
# -----------------------------
@trainer_state(entering=TrainerState.RUNNING, exiting=TrainerState.FINISHED)
def fit(
self,
model: LightningModule,
train_dataloader: Optional[DataLoader] = None,
val_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
datamodule: Optional[LightningDataModule] = None,
):
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
r"""
Runs the full optimization routine.
Args:
model: Model to fit.
train_dataloader: A Pytorch
DataLoader with training samples. If the model has
a predefined train_dataloader method this will be skipped.
Clean up dataloader logic (#926) * added get dataloaders directly using a getter * deleted decorator * added prepare_data hook * refactored dataloader init * refactored dataloader init * added dataloader reset flag and main loop * added dataloader reset flag and main loop * added dataloader reset flag and main loop * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixes #909 * fixes #909 * bug fix * Fixes #902
2020-02-25 03:23:25 +00:00
val_dataloaders: Either a single
Pytorch Dataloader or a list of them, specifying validation samples.
Clean up dataloader logic (#926) * added get dataloaders directly using a getter * deleted decorator * added prepare_data hook * refactored dataloader init * refactored dataloader init * added dataloader reset flag and main loop * added dataloader reset flag and main loop * added dataloader reset flag and main loop * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * made changes * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed bad loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixed error in .fit with loaders * fixes #909 * fixes #909 * bug fix * Fixes #902
2020-02-25 03:23:25 +00:00
If the model has a predefined val_dataloaders method this will be skipped
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
Example::
# Option 1,
# Define the train_dataloader() and val_dataloader() fxs
# in the lightningModule
# RECOMMENDED FOR MOST RESEARCH AND APPLICATIONS TO MAINTAIN READABILITY
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
trainer = Trainer()
model = LightningModule()
trainer.fit(model)
# Option 2
# in production cases we might want to pass different datasets to the same model
# Recommended for PRODUCTION SYSTEMS
train, val = DataLoader(...), DataLoader(...)
trainer = Trainer()
model = LightningModule()
trainer.fit(model, train_dataloader=train, val_dataloaders=val)
# Option 1 & 2 can be mixed, for example the training set can be
# defined as part of the model, and validation can then be feed to .fit()
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
"""
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
results = None
# setup data, etc...
self.setup_fit(model, train_dataloader, val_dataloaders, datamodule)
# hook
self.call_hook('on_fit_start', model)
# hook
self.data_connector.prepare_data(model)
2020-08-02 12:13:31 +00:00
# set testing if set in environ
self.testing = os.environ.get('PL_TESTING_MODE', self.testing)
# -------------------------
# TRAIN
# -------------------------
self.accelerator_backend = self.accelerator_connector.select_accelerator()
2020-08-27 01:29:10 +00:00
self.accelerator_backend.setup(model)
results = self.accelerator_backend.train()
self.accelerator_backend.teardown()
# -------------------------
# POST-Training
# -------------------------
2020-08-27 01:29:10 +00:00
# hook
self.call_hook('on_fit_end')
# hook
self.teardown('fit')
if self.is_function_implemented('teardown'):
model.teardown('fit')
# return 1 when finished
# used for testing or when we need to know that training succeeded
return results or 1
def setup_fit(self, model, train_dataloader, val_dataloaders, datamodule):
# bind logger and other properties
self.copy_trainer_model_properties(model)
# clean hparams
if hasattr(model, 'hparams'):
parsing.clean_namespace(model.hparams)
# links data to the trainer
self.data_connector.attach_data(model, train_dataloader, val_dataloaders, datamodule)
# check that model is configured correctly
self.config_validator.verify_loop_configurations(model)
def setup_training(self, model: LightningModule):
"""Sanity check a few things before starting actual training.
Args:
model: The model to run sanity test on.
2019-07-03 19:09:49 +00:00
"""
# --------------------------
# Setup??
# --------------------------
2019-07-08 21:38:57 +00:00
ref_model = model
2019-07-14 02:21:17 +00:00
if self.data_parallel:
2019-07-08 21:38:57 +00:00
ref_model = model.module
Expectopatronum implement #89 (#182) * rename validate -> evaluate; implement test logic; allow multiple test_loaders * add test_step and test_end to LightningModule * add in_test_mode to pretraining to implement case 2 (test pretrained model) * fix code style issues * LightningTestModel: add optional second test set, implement test_step and test_end * implemented test for multiple test_dataloaders; fixed typo * add two test cases for #89 * add documentation for test_step, test_end; fix computation of loss in validation_step example * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Added proper dp ddp routing calls for test mode * Update trainer.py * Update test_models.py * Update trainer.py * Update trainer.py * Update override_data_parallel.py * Update test_models.py * Update test_models.py * Update trainer.py * Update trainer.py * Update trainer.py * Update test_models.py * Update test_models.py * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * Update trainer.py * Update override_data_parallel.py * Update debug.py * Update lm_test_module.py * Update test_models.py
2019-08-30 22:56:09 +00:00
# give model convenience properties
2019-07-08 22:55:05 +00:00
ref_model.trainer = self
2019-07-08 21:15:26 +00:00
# set local properties on the model
self.copy_trainer_model_properties(ref_model)
2019-07-08 21:15:26 +00:00
# init amp. Must be done here instead of __init__ to allow ddp to work
if self.amp_backend == AMPType.NATIVE and self.precision == 16 and not self.use_tpu:
self.scaler = torch.cuda.amp.GradScaler()
# log hyper-parameters
if self.logger is not None:
# save exp to get started
self.logger.log_hyperparams(ref_model.hparams)
self.logger.log_graph(ref_model)
self.logger.save()
if self.use_ddp or self.use_ddp2:
torch_distrib.barrier()
2020-02-22 01:39:12 +00:00
# wait for all models to restore weights
if self.on_tpu and XLA_AVAILABLE:
# wait for all processes to catch up
torch_xla.core.xla_model.rendezvous("pl.Trainer.setup_training")
2020-02-22 01:39:12 +00:00
elif self.use_horovod:
# wait for all processes to catch up
hvd.join()
# register auto-resubmit when on SLURM
self.register_slurm_signal_handlers()
# --------------------------
# Pre-train
# --------------------------
# on pretrain routine start
self.on_pretrain_routine_start(ref_model)
if self.is_function_implemented('on_pretrain_routine_start'):
ref_model.on_pretrain_routine_start()
2019-07-08 21:15:26 +00:00
# print model summary
if self.is_global_zero and self.weights_summary is not None and not self.testing:
if self.weights_summary in ModelSummary.MODES:
ref_model.summarize(mode=self.weights_summary)
else:
raise MisconfigurationException("weights_summary can be None, " + ", ".join(ModelSummary.MODES))
2019-07-08 21:15:26 +00:00
2019-07-27 02:57:49 +00:00
# track model now.
# if cluster resets state, the model will update with the saved weights
self.model = model
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
# restore training and model before hpc is called
self.restore_weights(model)
2019-03-31 01:45:16 +00:00
# on pretrain routine end
self.on_pretrain_routine_end(ref_model)
if self.is_function_implemented('on_pretrain_routine_end'):
ref_model.on_pretrain_routine_end()
def train(self):
self.run_sanity_check(self.get_model())
# enable train mode
model = self.get_model()
model.train()
torch.set_grad_enabled(True)
# reload data when needed
self.train_loop.reset_train_val_dataloaders(model)
# hook
self.train_loop.on_train_start()
try:
# run all epochs
for epoch in range(self.current_epoch, self.max_epochs):
# reset train dataloader
if self.reload_dataloaders_every_epoch:
self.reset_train_dataloader(model)
# hook
self.train_loop.on_train_epoch_start(epoch)
# run train epoch
self.run_training_epoch()
if self.max_steps and self.max_steps <= self.global_step:
# hook
self.train_loop.on_train_end()
return
# update LR schedulers
self.update_learning_rates(interval='epoch')
# early stopping
met_min_epochs = epoch >= self.min_epochs - 1
met_min_steps = self.global_step >= self.min_steps if self.min_steps else True
if self.should_stop:
if (met_min_epochs and met_min_steps):
self.train_loop.on_train_end()
return
else:
log.info('Trainer was signaled to stop but required minimum epochs'
f' ({self.min_epochs}) or minimum steps ({self.min_steps}) has'
' not been met. Training will continue...')
# hook
self.train_loop.on_train_end()
except KeyboardInterrupt:
rank_zero_warn('Detected KeyboardInterrupt, attempting graceful shutdown...')
# user could press ctrl+c many times... only shutdown once
if not self.interrupted:
self.interrupted = True
self._state = TrainerState.INTERRUPTED
self.on_keyboard_interrupt()
# hook
self.train_loop.on_train_end()
def run_test(self):
# only load test dataloader for testing
# self.reset_test_dataloader(ref_model)
eval_loop_results, _ = self.run_evaluation(test_mode=True)
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
if len(eval_loop_results) == 0:
return 1
Expectopatronum implement #89 (#182) * rename validate -> evaluate; implement test logic; allow multiple test_loaders * add test_step and test_end to LightningModule * add in_test_mode to pretraining to implement case 2 (test pretrained model) * fix code style issues * LightningTestModel: add optional second test set, implement test_step and test_end * implemented test for multiple test_dataloaders; fixed typo * add two test cases for #89 * add documentation for test_step, test_end; fix computation of loss in validation_step example * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Added proper dp ddp routing calls for test mode * Update trainer.py * Update test_models.py * Update trainer.py * Update trainer.py * Update override_data_parallel.py * Update test_models.py * Update test_models.py * Update trainer.py * Update trainer.py * Update trainer.py * Update test_models.py * Update test_models.py * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * Update trainer.py * Update override_data_parallel.py * Update debug.py * Update lm_test_module.py * Update test_models.py
2019-08-30 22:56:09 +00:00
# remove the tensors from the eval results
for i, result in enumerate(eval_loop_results):
if isinstance(result, dict):
for k, v in result.items():
if isinstance(v, torch.Tensor):
result[k] = v.cpu().item()
return eval_loop_results
def train_or_test(self):
if self.testing:
results = self.run_test()
else:
results = self.train()
return results
def run_sanity_check(self, ref_model):
using_val_step = ref_model.val_dataloader is not None and is_overridden('validation_step', ref_model)
should_sanity_check = using_val_step and self.num_sanity_val_steps > 0 and self.limit_val_batches > 0
Expectopatronum implement #89 (#182) * rename validate -> evaluate; implement test logic; allow multiple test_loaders * add test_step and test_end to LightningModule * add in_test_mode to pretraining to implement case 2 (test pretrained model) * fix code style issues * LightningTestModel: add optional second test set, implement test_step and test_end * implemented test for multiple test_dataloaders; fixed typo * add two test cases for #89 * add documentation for test_step, test_end; fix computation of loss in validation_step example * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Update trainer.py * Added proper dp ddp routing calls for test mode * Update trainer.py * Update test_models.py * Update trainer.py * Update trainer.py * Update override_data_parallel.py * Update test_models.py * Update test_models.py * Update trainer.py * Update trainer.py * Update trainer.py * Update test_models.py * Update test_models.py * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * Update trainer.py * Update override_data_parallel.py * Update debug.py * Update lm_test_module.py * Update test_models.py
2019-08-30 22:56:09 +00:00
# run tiny validation (if validation defined)
# to make sure program won't crash during val
if should_sanity_check:
self.reset_val_dataloader(ref_model)
self.num_sanity_val_batches = [
min(self.num_sanity_val_steps, val_batches) for val_batches in self.num_val_batches
]
Progress bar callback (#1450) * squash and rebase sanity check hooks sanity check callback hook finish moved core progress bar functionality into callback wip remove duplicate merge clean up imports docs sanity check progress bar main sanity move callback calls init progrss bar callback configuration and docs changelog rate decorator pass process_position disable on rank > 0 position index is_enabled remove decorator refactor init tqdm bars callback method ordering cannot reset when disabled sequence -> list default values fix has no attr _time() move on_val_end to proper place fix the pickle issue update warning properties check for None remove old comment switch order pull out non-tqdm functionality into base class documentation for the base class docs fix refresh rate issue in validation restrict type hint of trainer arg more docs update trainer docs rst docs fix lines too long fix test add missing type hints fix typo move docstring to __init__ solves doctest failures remove doctest :(( can't fix the pickle error fix example simplify by saving trainer reference fix docs errors move docstring initial value multiple val checks per epoch simpler handling of inf dataset sizes update inf docs renamed training_tqdm_dict rename get_tqdm_dict rename occurences of tqdm update changelog fix doctest fix formatting errors added callback tests progress bar on off test more tests for progress bar weird test fix? add ignored property disable default progress bar in LR finder change enable/disable behavior trying doctest in CI again undo doctest pickle error undo doctest pickle error :(( remove progress_bar_callback Trainer arg and fix tests restore progress bar after auto lr find update docs fix rebase fix wrong negation * fix fast dev run total * more thorough testing * remove old args * fix merge * fix merge * separate tests * type hint total batches * reduce if Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_disabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_enabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * rename enabled/disabled * move deprecated api * remove duplicated test from merge * fix rename is_disabled * newline * test also testprogress for fast dev run Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-04-24 00:46:18 +00:00
# hook and callback
self.running_sanity_check = True
Progress bar callback (#1450) * squash and rebase sanity check hooks sanity check callback hook finish moved core progress bar functionality into callback wip remove duplicate merge clean up imports docs sanity check progress bar main sanity move callback calls init progrss bar callback configuration and docs changelog rate decorator pass process_position disable on rank > 0 position index is_enabled remove decorator refactor init tqdm bars callback method ordering cannot reset when disabled sequence -> list default values fix has no attr _time() move on_val_end to proper place fix the pickle issue update warning properties check for None remove old comment switch order pull out non-tqdm functionality into base class documentation for the base class docs fix refresh rate issue in validation restrict type hint of trainer arg more docs update trainer docs rst docs fix lines too long fix test add missing type hints fix typo move docstring to __init__ solves doctest failures remove doctest :(( can't fix the pickle error fix example simplify by saving trainer reference fix docs errors move docstring initial value multiple val checks per epoch simpler handling of inf dataset sizes update inf docs renamed training_tqdm_dict rename get_tqdm_dict rename occurences of tqdm update changelog fix doctest fix formatting errors added callback tests progress bar on off test more tests for progress bar weird test fix? add ignored property disable default progress bar in LR finder change enable/disable behavior trying doctest in CI again undo doctest pickle error undo doctest pickle error :(( remove progress_bar_callback Trainer arg and fix tests restore progress bar after auto lr find update docs fix rebase fix wrong negation * fix fast dev run total * more thorough testing * remove old args * fix merge * fix merge * separate tests * type hint total batches * reduce if Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_disabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_enabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * rename enabled/disabled * move deprecated api * remove duplicated test from merge * fix rename is_disabled * newline * test also testprogress for fast dev run Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-04-24 00:46:18 +00:00
self.on_sanity_check_start()
# run eval step
_, eval_results = self.run_evaluation(test_mode=False, max_batches=self.num_sanity_val_batches)
# allow no returns from eval
if eval_results is not None and len(eval_results) > 0:
# when we get a list back, used only the last item
if isinstance(eval_results, list):
eval_results = eval_results[-1]
if isinstance(eval_results, EvalResult):
callback_metrics = eval_results.callback_metrics
else:
_, _, _, callback_metrics, _ = self.process_output(eval_results)
self.callback_metrics = callback_metrics
2019-08-07 11:51:55 +00:00
Progress bar callback (#1450) * squash and rebase sanity check hooks sanity check callback hook finish moved core progress bar functionality into callback wip remove duplicate merge clean up imports docs sanity check progress bar main sanity move callback calls init progrss bar callback configuration and docs changelog rate decorator pass process_position disable on rank > 0 position index is_enabled remove decorator refactor init tqdm bars callback method ordering cannot reset when disabled sequence -> list default values fix has no attr _time() move on_val_end to proper place fix the pickle issue update warning properties check for None remove old comment switch order pull out non-tqdm functionality into base class documentation for the base class docs fix refresh rate issue in validation restrict type hint of trainer arg more docs update trainer docs rst docs fix lines too long fix test add missing type hints fix typo move docstring to __init__ solves doctest failures remove doctest :(( can't fix the pickle error fix example simplify by saving trainer reference fix docs errors move docstring initial value multiple val checks per epoch simpler handling of inf dataset sizes update inf docs renamed training_tqdm_dict rename get_tqdm_dict rename occurences of tqdm update changelog fix doctest fix formatting errors added callback tests progress bar on off test more tests for progress bar weird test fix? add ignored property disable default progress bar in LR finder change enable/disable behavior trying doctest in CI again undo doctest pickle error undo doctest pickle error :(( remove progress_bar_callback Trainer arg and fix tests restore progress bar after auto lr find update docs fix rebase fix wrong negation * fix fast dev run total * more thorough testing * remove old args * fix merge * fix merge * separate tests * type hint total batches * reduce if Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_disabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * is_enabled Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> * rename enabled/disabled * move deprecated api * remove duplicated test from merge * fix rename is_disabled * newline * test also testprogress for fast dev run Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-04-24 00:46:18 +00:00
self.on_sanity_check_end()
self.running_sanity_check = False
@trainer_state(entering=TrainerState.RUNNING, exiting=TrainerState.FINISHED)
def test(
self,
model: Optional[LightningModule] = None,
test_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None,
ckpt_path: Optional[str] = 'best',
verbose: bool = True,
datamodule: Optional[LightningDataModule] = None,
):
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
r"""
Separates from fit to make sure you never run on your test set until you want to.
Args:
CI: Force docs warnings to be raised as errors (+ fix all) (#1191) * add argument to force warn * fix automodule error * fix permalink error * fix indentation warning * fix warning * fix import warnings * fix duplicate label warning * fix bullet point indentation warning * fix duplicate label warning * fix "import not top level" warning * line too long * fix indentation * fix bullet points indentation warning * fix hooks warnings * fix reference problem with excluded test_tube * fix indentation in print * change imports for trains logger * remove pandas type annotation * Update pytorch_lightning/core/lightning.py * include bullet points inside note * remove old quick start guide (unused) * fix unused warning * fix formatting * fix duplicate label issue * fix duplicate label warning (replaced by class ref) * fix tick * fix indentation warnings * docstring ticks * remove obsolete docstring typing * Revert "remove old quick start guide (unused)" This reverts commit d51bb40695442c8fa11bc9df74f6db56264f7509. * added old quick start guide to navigation * remove unused tutorials file * ignore some modules that got deprecated and are not used anymore * fix duplicate label warning * move examples doc and exclude pl_examples from autodoc * fix formatting for configure_optimizer * fix no blank line warnings * fix "see also" labels and add paramref extension * fix more reference problems * fix multi-gpu reference * fix weird warning * fix indentation and unrecognized characters in code block * fix warning "... not included in toctree" * fix PIL import error * fix duplicate target "here" warning * fix broken link * revert accidentally moved pl_examples * changelog * stdout * note some things to know Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: J. Borovec <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-03-20 19:49:01 +00:00
model: The model to test.
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
test_dataloaders: Either a single
Pytorch Dataloader or a list of them, specifying validation samples.
ckpt_path: Either ``best`` or path to the checkpoint you wish to test.
If ``None``, use the weights from the last epoch to test. Default to ``best``.
verbose: If True, prints the test results
Returns:
The final test result dictionary. If no test_epoch_end is defined returns a list of dictionaries
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
Example::
# Option 1
# run test with the best checkpoint from ``ModelCheckpoint`` after fitting.
test = DataLoader(...)
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
trainer = Trainer()
model = LightningModule()
trainer.fit(model)
trainer.test(test_dataloaders=test)
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
# Option 2
# run test with the specified checkpoint after fitting
test = DataLoader(...)
trainer = Trainer()
model = LightningModule()
trainer.fit(model)
trainer.test(test_dataloaders=test, ckpt_path='path/to/checkpoint.ckpt')
# Option 3
# run test with the weights from the end of training after fitting
test = DataLoader(...)
trainer = Trainer()
model = LightningModule()
trainer.fit(model)
trainer.test(test_dataloaders=test, ckpt_path=None)
# Option 4
# run test from a loaded model. ``ckpt_path`` is ignored in this case.
test = DataLoader(...)
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
model = LightningModule.load_from_checkpoint('path/to/checkpoint.ckpt')
trainer = Trainer()
trainer.test(model, test_dataloaders=test)
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
"""
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
# --------------------
# SETUP HOOK
# --------------------
self.verbose_test = verbose
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
if self.global_rank != 0:
return
# If you supply a datamodule you can't supply train_dataloader or val_dataloaders
if test_dataloaders and datamodule:
raise MisconfigurationException(
'You cannot pass test_dataloaders to trainer.test if you supply a datamodule'
)
# Attach datamodule to get setup/prepare_data added to model before the call to it below
self.data_connector.attach_datamodule(model or self.get_model(), datamodule, 'test')
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
if model is not None:
results = self.__test_given_model(model, test_dataloaders)
else:
results = self.__test_using_best_weights(ckpt_path, test_dataloaders)
self.teardown('test')
return results
def __test_using_best_weights(self, ckpt_path, test_dataloaders):
model = self.get_model()
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
# if user requests the best checkpoint but we don't have it, error
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
if ckpt_path == 'best' and self.checkpoint_callback.save_top_k <= 0:
raise MisconfigurationException(
'ckpt_path is "best", but ModelCheckpoint is not configured to save the best model.'
)
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
# load best weights
if ckpt_path is not None:
# ckpt_path is 'best' so load the best model
if ckpt_path == 'best':
ckpt_path = self.checkpoint_callback.best_model_path
if len(ckpt_path) == 0:
rank_zero_warn(
f'.test() found no path for the best weights, {ckpt_path}. Please '
f'specify a path for a checkpoint .test(ckpt_path=PATH)'
)
return {}
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
ckpt = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
model.load_state_dict(ckpt['state_dict'])
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
# attach dataloaders
if test_dataloaders is not None:
self.data_connector.attach_dataloaders(model, test_dataloaders=test_dataloaders)
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
# run tests
self.tested_ckpt_path = ckpt_path
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
self.testing = True
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
os.environ['PL_TESTING_MODE'] = '1'
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
self.model = model
results = self.fit(model)
2020-03-06 11:57:14 +00:00
self.testing = False
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
del os.environ['PL_TESTING_MODE']
2020-03-06 11:57:14 +00:00
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
# teardown
if self.is_function_implemented('teardown'):
2020-06-25 15:10:17 +00:00
model_ref = self.get_model()
model_ref.teardown('test')
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
return results
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
def __test_given_model(self, model, test_dataloaders):
# attach data
if test_dataloaders is not None:
self.data_connector.attach_dataloaders(model, test_dataloaders=test_dataloaders)
Fixes .test() for ddp (#2570) * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint * enable none checkpoint
2020-07-09 22:36:36 +00:00
# run test
# sets up testing so we short circuit to eval
self.testing = True
self.model = model
results = self.fit(model)
self.testing = False
# teardown
if self.is_function_implemented('teardown'):
model.teardown('test')
return results
def barrier(self, name):
if self.use_ddp or self.use_ddp2:
Fix ddp tests + .test() (#2512) * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * fix deprecation warnings * added base tests for tpu * added base tests for tpu * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com> * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu * added base tests for tpu Co-authored-by: Jirka <jirka@pytorchlightning.ai> Co-authored-by: Jeremy Jordan <13970565+jeremyjordan@users.noreply.github.com>
2020-07-07 16:24:56 +00:00
pass
# torch_distrib.barrier()
if self.on_tpu and XLA_AVAILABLE:
# wait for all processes to catch up
torch_xla.core.xla_model.rendezvous(f'pl.Trainer.{name}')
def call_setup_hook(self, model):
# call setup after the ddp process has connected
stage_name = 'test' if self.testing else 'fit'
if self.datamodule is not None:
called = self.datamodule.has_setup_test if self.testing else self.datamodule.has_setup_fit
if not called:
self.datamodule.setup(stage_name)
self.setup(stage_name)
model.setup(stage_name)
def init_amp(self, amp_type: str):
assert self.precision in (16, 32), 'only 32 or 16 bit precision supported'
self.amp_backend = None
self._setup_amp_backend(amp_type)
def call_hook(self, hook_name, *args, **kwargs):
# always profile hooks
with self.profiler.profile(hook_name):
# first call trainer hook
if hasattr(self, hook_name):
trainer_hook = getattr(self, hook_name)
trainer_hook(*args, **kwargs)
# next call hook in lightningModule
output = None
model_ref = self.get_model()
if is_overridden(hook_name, model_ref):
hook_fx = getattr(model_ref, hook_name)
output = hook_fx(*args, **kwargs)
# if the PL module doesn't have the hook then call the accelator
# used to auto-reduce things for the user with Results obj
elif hasattr(self.accelerator_backend, hook_name):
accelerator_hook = getattr(self.accelerator_backend, hook_name)
output = accelerator_hook(*args, **kwargs)
return output
def _determine_batch_limits(batches: Union[int, float], name: str) -> Union[int, float]:
if 0 <= batches <= 1:
return batches
elif batches > 1 and batches % 1.0 == 0:
return int(batches)
else:
raise MisconfigurationException(
f'You have passed invalid value {batches} for {name}, it has to be in [0.0, 1.0] or an int.'
)