fixing miss-leading tested acc values (#5876)

* fixing tested values

* .

* tests

* yapf

* softmax

* hvd

* rename

* lr

* duplicate

* drop

* classif

* rm EvalModel

* Revert "rm EvalModel"

This reverts commit 6c3fb39ebe.

* update tests

* fix

* azure

* azure

* self

* cpu

* Apply suggestions from code review

Co-authored-by: rohitgr7 <rohitgr1998@gmail.com>
This commit is contained in:
Jirka Borovec 2021-02-23 23:08:46 +01:00 committed by GitHub
parent ebabe56f4e
commit 1c851b89e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 207 additions and 167 deletions

View File

@ -20,7 +20,8 @@ from argparse import ArgumentParser
import torch
from pytorch_lightning import seed_everything, Trainer
from tests.base import EvalModelTemplate
from tests.helpers.datamodules import ClassifDataModule
from tests.helpers.simple_models import ClassificationModel
def main():
@ -35,24 +36,28 @@ def main():
parser.set_defaults(accelerator="ddp")
args = parser.parse_args()
model = EvalModelTemplate()
dm = ClassifDataModule()
model = ClassificationModel()
trainer = Trainer.from_argparse_args(args)
result = {}
if args.trainer_method == 'fit':
trainer.fit(model)
result = {'status': 'complete', 'method': args.trainer_method, 'result': None}
if args.trainer_method == 'test':
result = trainer.test(model)
result = {'status': 'complete', 'method': args.trainer_method, 'result': result}
if args.trainer_method == 'fit_test':
trainer.fit(model)
result = trainer.test(model)
result = {'status': 'complete', 'method': args.trainer_method, 'result': result}
trainer.fit(model, datamodule=dm)
result = None
elif args.trainer_method == 'test':
result = trainer.test(model, datamodule=dm)
elif args.trainer_method == 'fit_test':
trainer.fit(model, datamodule=dm)
result = trainer.test(model, datamodule=dm)
else:
raise ValueError(f'Unsupported: {args.trainer_method}')
if len(result) > 0:
file_path = os.path.join(args.tmpdir, 'ddp.result')
torch.save(result, file_path)
result_ext = {
'status': 'complete',
'method': args.trainer_method,
'result': result,
}
file_path = os.path.join(args.tmpdir, 'ddp.result')
torch.save(result_ext, file_path)
if __name__ == '__main__':

View File

@ -23,14 +23,13 @@ from tests.accelerators import ddp_model, DDPLauncher
from tests.helpers.boring_model import BoringModel
from tests.utilities.distributed import call_training_script
CLI_ARGS = '--max_epochs 1 --gpus 2 --accelerator ddp'
@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
def test_multi_gpu_model_ddp_fit_only(tmpdir):
# call the script
std, err = call_training_script(ddp_model, cli_args, 'fit', tmpdir, timeout=120)
call_training_script(ddp_model, CLI_ARGS, 'fit', tmpdir, timeout=120)
# load the results of the script
result_path = os.path.join(tmpdir, 'ddp.result')
@ -40,13 +39,10 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
assert result['status'] == 'complete'
@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
def test_multi_gpu_model_ddp_test_only(tmpdir):
# call the script
call_training_script(ddp_model, cli_args, 'test', tmpdir)
call_training_script(ddp_model, CLI_ARGS, 'test', tmpdir)
# load the results of the script
result_path = os.path.join(tmpdir, 'ddp.result')
@ -56,13 +52,10 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
assert result['status'] == 'complete'
@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):
def test_multi_gpu_model_ddp_fit_test(tmpdir):
# call the script
call_training_script(ddp_model, cli_args, 'fit_test', tmpdir, timeout=20)
call_training_script(ddp_model, CLI_ARGS, 'fit_test', tmpdir, timeout=20)
# load the results of the script
result_path = os.path.join(tmpdir, 'ddp.result')
@ -73,7 +66,7 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):
model_outs = result['result']
for out in model_outs:
assert out['test_acc'] > 0.90
assert out['test_acc'] > 0.7
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")

View File

@ -20,7 +20,9 @@ from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.core import memory
from pytorch_lightning.trainer import Trainer
from pytorch_lightning.trainer.states import TrainerState
from tests.base import EvalModelTemplate
from tests.helpers import BoringModel
from tests.helpers.datamodules import ClassifDataModule
from tests.helpers.simple_models import ClassificationModel
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -29,7 +31,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
trainer_options = dict(
default_root_dir=tmpdir,
callbacks=[EarlyStopping()],
callbacks=[EarlyStopping(monitor='train_acc')],
max_epochs=50,
limit_train_batches=10,
limit_val_batches=10,
@ -37,8 +39,9 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
accelerator='ddp_spawn',
)
model = EvalModelTemplate()
tpipes.run_model_test(trainer_options, model)
dm = ClassifDataModule()
model = ClassificationModel()
tpipes.run_model_test(trainer_options, model, dm)
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -55,7 +58,7 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
progress_bar_refresh_rate=0,
)
model = EvalModelTemplate()
model = BoringModel()
tpipes.run_model_test(trainer_options, model)
@ -68,7 +71,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
"""Make sure DDP works with dataloaders passed to fit()"""
tutils.set_random_master_port()
model = EvalModelTemplate()
model = BoringModel()
fit_options = dict(train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader())
trainer = Trainer(

View File

@ -11,27 +11,61 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from unittest import mock
import pytest
import torch
import torch.nn.functional as F
import pytorch_lightning as pl
import tests.helpers.pipelines as tpipes
import tests.helpers.utils as tutils
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.core import memory
from tests.base import EvalModelTemplate
from tests.helpers import BoringModel
from tests.helpers.datamodules import ClassifDataModule
from tests.helpers.simple_models import ClassificationModel
PRETEND_N_OF_GPUS = 16
class CustomClassificationModelDP(ClassificationModel):
def _step(self, batch, batch_idx):
x, y = batch
logits = self(x)
return {'logits': logits, 'y': y}
def training_step(self, batch, batch_idx):
out = self._step(batch, batch_idx)
loss = F.cross_entropy(out['logits'], out['y'])
return loss
def validation_step(self, batch, batch_idx):
return self._step(batch, batch_idx)
def test_step(self, batch, batch_idx):
return self._step(batch, batch_idx)
def validation_step_end(self, outputs):
self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y']))
def test_step_end(self, outputs):
self.log('test_acc', self.test_acc(outputs['logits'], outputs['y']))
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_early_stop_dp(tmpdir):
"""Make sure DDP works. with early stopping"""
tutils.set_random_master_port()
dm = ClassifDataModule()
model = CustomClassificationModelDP()
trainer_options = dict(
default_root_dir=tmpdir,
callbacks=[EarlyStopping()],
callbacks=[EarlyStopping(monitor='val_acc')],
max_epochs=50,
limit_train_batches=10,
limit_val_batches=10,
@ -39,8 +73,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
accelerator='dp',
)
model = EvalModelTemplate()
tpipes.run_model_test(trainer_options, model)
tpipes.run_model_test(trainer_options, model, dm)
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -57,7 +90,7 @@ def test_multi_gpu_model_dp(tmpdir):
progress_bar_refresh_rate=0,
)
model = EvalModelTemplate()
model = BoringModel()
tpipes.run_model_test(trainer_options, model)
@ -65,14 +98,13 @@ def test_multi_gpu_model_dp(tmpdir):
memory.get_memory_profile('min_max')
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_dp_test(tmpdir):
tutils.set_random_master_port()
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
model = EvalModelTemplate()
dm = ClassifDataModule()
model = CustomClassificationModelDP()
trainer = pl.Trainer(
default_root_dir=tmpdir,
max_epochs=2,
@ -81,17 +113,17 @@ def test_dp_test(tmpdir):
gpus=[0, 1],
accelerator='dp',
)
trainer.fit(model)
trainer.fit(model, datamodule=dm)
assert 'ckpt' in trainer.checkpoint_callback.best_model_path
results = trainer.test()
results = trainer.test(datamodule=dm)
assert 'test_acc' in results[0]
old_weights = model.c_d1.weight.clone().detach().cpu()
old_weights = model.layer_0.weight.clone().detach().cpu()
results = trainer.test(model)
results = trainer.test(model, datamodule=dm)
assert 'test_acc' in results[0]
# make sure weights didn't change
new_weights = model.c_d1.weight.clone().detach().cpu()
new_weights = model.layer_0.weight.clone().detach().cpu()
assert torch.all(torch.eq(old_weights, new_weights))

View File

@ -111,7 +111,7 @@ class EvalModelTemplate(
x = self.c_d1_drop(x)
x = self.c_d2(x)
logits = F.log_softmax(x, dim=1)
logits = F.softmax(x, dim=1)
return logits

View File

@ -385,9 +385,8 @@ def test_full_loop_dp(tmpdir):
return {'logits': logits, 'y': y}
def training_step(self, batch, batch_idx):
_, y = batch
out = self._step(batch, batch_idx)
loss = F.cross_entropy(out['logits'], y)
loss = F.cross_entropy(out['logits'], out['y'])
return loss
def validation_step(self, batch, batch_idx):

View File

@ -13,39 +13,41 @@
# limitations under the License.
import torch
from pytorch_lightning import LightningDataModule, Trainer
from pytorch_lightning import LightningDataModule, LightningModule, Trainer
from pytorch_lightning.metrics.functional import accuracy
from pytorch_lightning.trainer.states import TrainerState
from pytorch_lightning.utilities import DistributedType
from tests.helpers import BoringModel
from tests.helpers.utils import get_default_logger, load_model_from_checkpoint, reset_seed
def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50):
def run_model_test_without_loggers(
trainer_options: dict, model: LightningModule, data: LightningDataModule = None, min_acc: float = 0.50
):
reset_seed()
# fit model
trainer = Trainer(**trainer_options)
trainer.fit(model)
trainer.fit(model, datamodule=data)
# correct result and ok accuracy
assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
pretrained_model = load_model_from_checkpoint(
trainer.logger, trainer.checkpoint_callback.best_model_path, type(model)
)
model2 = load_model_from_checkpoint(trainer.logger, trainer.checkpoint_callback.best_model_path, type(model))
# test new model accuracy
test_loaders = model.test_dataloader()
test_loaders = model2.test_dataloader() if not data else data.test_dataloader()
if not isinstance(test_loaders, list):
test_loaders = [test_loaders]
for dataloader in test_loaders:
run_prediction(pretrained_model, dataloader, min_acc=min_acc)
if not isinstance(model2, BoringModel):
for dataloader in test_loaders:
run_prediction_eval_model_template(model2, dataloader, min_acc=min_acc)
def run_model_test(
trainer_options,
model,
model: LightningModule,
data: LightningDataModule = None,
on_gpu: bool = True,
version=None,
@ -76,8 +78,9 @@ def run_model_test(
if not isinstance(test_loaders, list):
test_loaders = [test_loaders]
for dataloader in test_loaders:
run_prediction(pretrained_model, dataloader, min_acc=min_acc)
if not isinstance(model, BoringModel):
for dataloader in test_loaders:
run_prediction_eval_model_template(model, dataloader, min_acc=min_acc)
if with_hpc:
if trainer._distrib_type in (DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2):
@ -92,50 +95,17 @@ def run_model_test(
trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu)
def run_prediction(trained_model, dataloader, dp=False, min_acc=0.25):
if isinstance(trained_model, BoringModel):
return _boring_model_run_prediction(trained_model, dataloader, min_acc)
else:
return _eval_model_template_run_prediction(trained_model, dataloader, dp, min_acc=min_acc)
def _eval_model_template_run_prediction(trained_model, dataloader, dp=False, min_acc=0.50):
# run prediction on 1 batch
batch = next(iter(dataloader))
x, y = batch
x = x.view(x.size(0), -1)
if dp:
with torch.no_grad():
output = trained_model(batch, 0)
acc = output['val_acc']
acc = torch.mean(acc).item()
else:
with torch.no_grad():
y_hat = trained_model(x)
y_hat = y_hat.cpu()
# acc
labels_hat = torch.argmax(y_hat, dim=1)
y = y.cpu()
acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
acc = torch.tensor(acc)
acc = acc.item()
assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})"
# TODO: This test compares a loss value with a min accuracy - complete non-sense!
# create BoringModels that make actual predictions!
def _boring_model_run_prediction(trained_model, dataloader, min_acc=0.25):
@torch.no_grad()
def run_prediction_eval_model_template(trained_model, dataloader, min_acc=0.50):
# run prediction on 1 batch
trained_model.cpu()
trained_model.eval()
batch = next(iter(dataloader))
x, y = batch
x = x.flatten(1)
with torch.no_grad():
output = trained_model(batch)
y_hat = trained_model(x)
acc = accuracy(y_hat.cpu(), y.cpu(), top_k=2).item()
acc = trained_model.loss(batch, output)
assert acc >= min_acc, f"This model is expected to get, {min_acc} in test set but got {acc}"
assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})"

View File

@ -0,0 +1,30 @@
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from tests.models.data.horovod.train_default_model import run_test_from_config
def test_horovod_model_script(tmpdir):
"""This just for testing/debugging horovod script without horovod..."""
trainer_options = dict(
default_root_dir=str(tmpdir),
weights_save_path=str(tmpdir),
gradient_clip_val=1.0,
progress_bar_refresh_rate=0,
max_epochs=1,
limit_train_batches=0.4,
limit_val_batches=0.2,
deterministic=True,
)
run_test_from_config(trainer_options, check_size=False, on_gpu=False)

View File

@ -37,7 +37,6 @@ else:
print('You requested to import Horovod which is missing or not supported for your OS.')
from tests.helpers import BoringModel # noqa: E402
from tests.helpers.pipelines import run_prediction # noqa: E402
from tests.helpers.utils import reset_seed, set_random_master_port # noqa: E402
parser = argparse.ArgumentParser()
@ -45,7 +44,7 @@ parser.add_argument('--trainer-options', required=True)
parser.add_argument('--on-gpu', action='store_true', default=False)
def run_test_from_config(trainer_options):
def run_test_from_config(trainer_options, on_gpu, check_size=True):
"""Trains the default model with the given config."""
set_random_master_port()
reset_seed()
@ -60,7 +59,8 @@ def run_test_from_config(trainer_options):
assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
# Horovod should be initialized following training. If not, this will raise an exception.
assert hvd.size() == 2
if check_size:
assert hvd.size() == 2
if trainer.global_rank > 0:
return
@ -74,15 +74,16 @@ def run_test_from_config(trainer_options):
test_loaders = [test_loaders]
for dataloader in test_loaders:
run_prediction(pretrained_model, dataloader)
batch = next(iter(dataloader))
pretrained_model(batch)
# test HPC saving
trainer.checkpoint_connector.hpc_save(ckpt_path, trainer.logger)
# test HPC loading
checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(ckpt_path)
trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=args.on_gpu)
trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu)
if args.on_gpu:
if on_gpu:
trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1)
# Test the root_gpu property
assert trainer.root_gpu == hvd.local_rank()
@ -90,4 +91,4 @@ def run_test_from_config(trainer_options):
if __name__ == "__main__":
args = parser.parse_args()
run_test_from_config(json.loads(args.trainer_options))
run_test_from_config(json.loads(args.trainer_options), args.on_gpu)

View File

@ -24,6 +24,8 @@ from pytorch_lightning import Trainer
from pytorch_lightning.utilities import device_parser
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers import BoringModel
from tests.helpers.datamodules import ClassifDataModule
from tests.helpers.simple_models import ClassificationModel
PRETEND_N_OF_GPUS = 16
@ -41,8 +43,9 @@ def test_multi_gpu_none_backend(tmpdir):
gpus=2,
)
model = BoringModel()
tpipes.run_model_test(trainer_options, model, min_acc=0.20)
dm = ClassifDataModule()
model = ClassificationModel()
tpipes.run_model_test(trainer_options, model, dm)
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")

View File

@ -85,6 +85,28 @@ class GenericValTestLossBoringModel(GenericParentValTestLossBoringModel[int]):
pass
class CustomClassificationModelDP(ClassificationModel):
def _step(self, batch, batch_idx):
x, y = batch
logits = self(x)
return {'logits': logits, 'y': y}
def training_step(self, batch, batch_idx):
out = self._step(batch, batch_idx)
loss = F.cross_entropy(out['logits'], out['y'])
return loss
def validation_step(self, batch, batch_idx):
return self._step(batch, batch_idx)
def test_step(self, batch, batch_idx):
return self._step(batch, batch_idx)
def validation_step_end(self, outputs):
self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y']))
def test_model_properties_resume_from_checkpoint(tmpdir):
"""
Test that properties like `current_epoch` and `global_step`
@ -198,28 +220,6 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
tutils.set_random_master_port()
class CustomClassificationModelDP(ClassificationModel):
def _step(self, batch, batch_idx):
x, y = batch
logits = self(x)
return {'logits': logits, 'y': y}
def training_step(self, batch, batch_idx):
_, y = batch
out = self._step(batch, batch_idx)
loss = F.cross_entropy(out['logits'], y)
return loss
def validation_step(self, batch, batch_idx):
return self._step(batch, batch_idx)
def test_step(self, batch, batch_idx):
return self._step(batch, batch_idx)
def validation_step_end(self, outputs):
self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y']))
dm = ClassifDataModule()
model = CustomClassificationModelDP(lr=0.1)
@ -259,7 +259,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
dataloaders = [dataloaders]
for dataloader in dataloaders:
tpipes.run_prediction(pretrained_model, dataloader)
tpipes.run_prediction_eval_model_template(pretrained_model, dataloader)
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -307,7 +307,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
dataloaders = [dataloaders]
for dataloader in dataloaders:
tpipes.run_prediction(pretrained_model, dataloader, min_acc=0.1)
tpipes.run_prediction_eval_model_template(pretrained_model, dataloader, min_acc=0.1)
def test_running_test_pretrained_model_cpu(tmpdir):
@ -398,7 +398,8 @@ def test_load_model_from_checkpoint(tmpdir, model_template):
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_dp_resume(tmpdir):
"""Make sure DP continues training correctly."""
model = BoringModel()
model = CustomClassificationModelDP(lr=0.1)
dm = ClassifDataModule()
trainer_options = dict(max_epochs=1, gpus=2, accelerator='dp', default_root_dir=tmpdir)
@ -416,7 +417,7 @@ def test_dp_resume(tmpdir):
# fit model
trainer = Trainer(**trainer_options)
trainer.is_slurm_managing_tasks = True
trainer.fit(model)
trainer.fit(model, datamodule=dm)
# track epoch before saving. Increment since we finished the current epoch, don't want to rerun
real_global_epoch = trainer.current_epoch + 1
@ -439,7 +440,7 @@ def test_dp_resume(tmpdir):
trainer_options['max_epochs'] = 1
new_trainer = Trainer(**trainer_options)
class CustomModel(BoringModel):
class CustomModel(CustomClassificationModelDP):
def __init__(self):
super().__init__()
@ -451,19 +452,17 @@ def test_dp_resume(tmpdir):
# if model and state loaded correctly, predictions will be good even though we
# haven't trained with the new loaded model
dp_model = new_trainer.model
dp_model.eval()
new_trainer._running_stage = RunningStage.EVALUATING
dataloader = self.train_dataloader()
tpipes.run_prediction(self.trainer.lightning_module, dataloader)
tpipes.run_prediction_eval_model_template(self.trainer.lightning_module, dataloader=dataloader)
self.on_train_start_called = True
# new model
model = CustomModel()
# fit new model which should load hpc weights
new_trainer.fit(model)
new_trainer.fit(model, datamodule=dm)
assert model.on_train_start_called
# test freeze on gpu

View File

@ -223,12 +223,19 @@ def test_tpu_grad_norm(tmpdir):
@pl_multi_process_test
def test_dataloaders_passed_to_fit(tmpdir):
"""Test if dataloaders passed to trainer works on TPU"""
tutils.reset_seed()
model = BoringModel()
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, tpu_cores=8)
trainer.fit(model, train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader())
trainer = Trainer(
default_root_dir=tmpdir,
max_epochs=1,
tpu_cores=8,
)
trainer.fit(
model,
train_dataloader=model.train_dataloader(),
val_dataloaders=model.val_dataloader(),
)
assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"

View File

@ -130,7 +130,7 @@ def test_multiple_val_dataloader(tmpdir):
# make sure predictions are good for each val set
for dataloader in trainer.val_dataloaders:
tpipes.run_prediction(trained_model=model, dataloader=dataloader)
tpipes.run_prediction_eval_model_template(trained_model=model, dataloader=dataloader)
@pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific'])
@ -153,8 +153,8 @@ def test_multiple_test_dataloader(tmpdir, ckpt_path):
trainer = Trainer(
default_root_dir=tmpdir,
max_epochs=1,
limit_val_batches=0.1,
limit_train_batches=0.2,
limit_val_batches=10,
limit_train_batches=100,
)
trainer.fit(model)
if ckpt_path == 'specific':
@ -162,12 +162,11 @@ def test_multiple_test_dataloader(tmpdir, ckpt_path):
trainer.test(ckpt_path=ckpt_path)
# verify there are 2 test loaders
assert len(trainer.test_dataloaders) == 2, \
'Multiple test_dataloaders not initiated properly'
assert len(trainer.test_dataloaders) == 2, 'Multiple test_dataloaders not initiated properly'
# make sure predictions are good for each test set
for dataloader in trainer.test_dataloaders:
tpipes.run_prediction(trainer.model, dataloader)
tpipes.run_prediction_eval_model_template(trainer.model, dataloader)
# run the test method
trainer.test(ckpt_path=ckpt_path)

View File

@ -229,8 +229,8 @@ def test_accumulation_and_early_stopping(tmpdir):
def test_suggestion_parameters_work(tmpdir):
""" Test that default skipping does not alter results in basic case """
hparams = EvalModelTemplate.get_default_hparams()
model = EvalModelTemplate(**hparams)
dm = ClassifDataModule()
model = ClassificationModel()
# logger file to get meta
trainer = Trainer(
@ -238,12 +238,11 @@ def test_suggestion_parameters_work(tmpdir):
max_epochs=3,
)
lrfinder = trainer.tuner.lr_find(model)
lrfinder = trainer.tuner.lr_find(model, datamodule=dm)
lr1 = lrfinder.suggestion(skip_begin=10) # default
lr2 = lrfinder.suggestion(skip_begin=80) # way too high, should have an impact
lr2 = lrfinder.suggestion(skip_begin=150) # way too high, should have an impact
assert lr1 != lr2, \
'Skipping parameter did not influence learning rate'
assert lr1 != lr2, 'Skipping parameter did not influence learning rate'
def test_suggestion_with_non_finite_values(tmpdir):

View File

@ -113,8 +113,8 @@ def test_lightning_getattr(tmpdir):
for m in models:
with pytest.raises(
AttributeError,
match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule."
AttributeError,
match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule."
):
lightning_getattr(m, "this_attr_not_exist")
@ -140,7 +140,7 @@ def test_lightning_setattr(tmpdir):
for m in models:
with pytest.raises(
AttributeError,
match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule."
AttributeError,
match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule."
):
lightning_setattr(m, "this_attr_not_exist", None)