619 lines
24 KiB
Python
619 lines
24 KiB
Python
# Copyright The PyTorch Lightning team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
from unittest import mock
|
|
|
|
import pytest
|
|
import torch
|
|
from torch import optim
|
|
|
|
from pytorch_lightning import Callback, Trainer
|
|
from pytorch_lightning.callbacks import ModelCheckpoint
|
|
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
|
from tests.helpers.boring_model import BoringModel
|
|
from tests.helpers.runif import RunIf
|
|
|
|
|
|
def test_optimizer_with_scheduling(tmpdir):
|
|
"""Verify that learning rate scheduling is working."""
|
|
|
|
model = BoringModel()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, val_check_interval=0.5
|
|
)
|
|
trainer.fit(model)
|
|
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
|
|
|
init_lr = 0.1
|
|
adjusted_lr = [pg["lr"] for pg in trainer.optimizers[0].param_groups]
|
|
|
|
assert len(trainer.lr_schedulers) == 1
|
|
assert all(a == adjusted_lr[0] for a in adjusted_lr)
|
|
assert init_lr * 0.1 == adjusted_lr[0]
|
|
|
|
|
|
def test_multi_optimizer_with_scheduling(tmpdir):
|
|
"""Verify that learning rate scheduling is working."""
|
|
|
|
class TestModel(BoringModel):
|
|
init_lr = 5e-4
|
|
|
|
def training_step(self, batch, batch_idx, optimizer_idx):
|
|
return super().training_step(batch, batch_idx)
|
|
|
|
def configure_optimizers(self):
|
|
optimizer1 = optim.Adam(self.parameters(), lr=self.init_lr)
|
|
optimizer2 = optim.Adam(self.parameters(), lr=self.init_lr)
|
|
lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, step_size=1)
|
|
lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, step_size=1)
|
|
return [optimizer1, optimizer2], [lr_scheduler1, lr_scheduler2]
|
|
|
|
model = TestModel()
|
|
model.training_epoch_end = None
|
|
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2)
|
|
trainer.fit(model)
|
|
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
|
|
|
adjusted_lr1 = [pg["lr"] for pg in trainer.optimizers[0].param_groups]
|
|
adjusted_lr2 = [pg["lr"] for pg in trainer.optimizers[1].param_groups]
|
|
|
|
assert len(trainer.lr_schedulers) == 2
|
|
assert all(a == adjusted_lr1[0] for a in adjusted_lr1)
|
|
assert all(a == adjusted_lr2[0] for a in adjusted_lr2)
|
|
assert model.init_lr * 0.1 == adjusted_lr1[0]
|
|
assert model.init_lr * 0.1 == adjusted_lr2[0]
|
|
|
|
|
|
def test_reducelronplateau_with_no_monitor_raises(tmpdir):
|
|
"""Test exception when a ReduceLROnPlateau is used with no monitor."""
|
|
model = BoringModel()
|
|
optimizer = optim.Adam(model.parameters())
|
|
model.configure_optimizers = lambda: ([optimizer], [optim.lr_scheduler.ReduceLROnPlateau(optimizer)])
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.raises(
|
|
MisconfigurationException, match="`configure_optimizers` must include a monitor when a `ReduceLROnPlateau`"
|
|
):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_reducelronplateau_with_no_monitor_in_lr_scheduler_dict_raises(tmpdir):
|
|
"""Test exception when lr_scheduler dict has a ReduceLROnPlateau with no monitor."""
|
|
model = BoringModel()
|
|
optimizer = optim.Adam(model.parameters())
|
|
model.configure_optimizers = lambda: {
|
|
"optimizer": optimizer,
|
|
"lr_scheduler": {"scheduler": optim.lr_scheduler.ReduceLROnPlateau(optimizer)},
|
|
}
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.raises(MisconfigurationException, match="must include a monitor when a `ReduceLROnPlateau`"):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_reducelronplateau_scheduling(tmpdir):
|
|
class TestModel(BoringModel):
|
|
def training_step(self, batch, batch_idx):
|
|
self.log("foo", batch_idx)
|
|
return super().training_step(batch, batch_idx)
|
|
|
|
def configure_optimizers(self):
|
|
optimizer = optim.Adam(self.parameters())
|
|
return {
|
|
"optimizer": optimizer,
|
|
"lr_scheduler": optim.lr_scheduler.ReduceLROnPlateau(optimizer),
|
|
"monitor": "foo",
|
|
}
|
|
|
|
model = TestModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
trainer.fit(model)
|
|
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
|
|
|
lr_scheduler = trainer.lr_schedulers[0]
|
|
assert lr_scheduler == dict(
|
|
scheduler=lr_scheduler["scheduler"],
|
|
monitor="foo",
|
|
interval="epoch",
|
|
frequency=1,
|
|
reduce_on_plateau=True,
|
|
strict=True,
|
|
opt_idx=None,
|
|
name=None,
|
|
)
|
|
|
|
|
|
def test_optimizer_return_options(tmpdir):
|
|
trainer = Trainer(default_root_dir=tmpdir)
|
|
model = BoringModel()
|
|
|
|
# single optimizer
|
|
opt_a = optim.Adam(model.parameters(), lr=0.002)
|
|
opt_b = optim.SGD(model.parameters(), lr=0.002)
|
|
scheduler_a = optim.lr_scheduler.StepLR(opt_a, 10)
|
|
scheduler_b = optim.lr_scheduler.StepLR(opt_b, 10)
|
|
|
|
# single optimizer
|
|
model.configure_optimizers = lambda: opt_a
|
|
opt, lr_sched, freq = trainer.init_optimizers(model)
|
|
assert len(opt) == 1 and len(lr_sched) == len(freq) == 0
|
|
|
|
# opt tuple
|
|
model.configure_optimizers = lambda: (opt_a, opt_b)
|
|
opt, lr_sched, freq = trainer.init_optimizers(model)
|
|
assert opt == [opt_a, opt_b]
|
|
assert len(lr_sched) == len(freq) == 0
|
|
|
|
# opt list
|
|
model.configure_optimizers = lambda: [opt_a, opt_b]
|
|
opt, lr_sched, freq = trainer.init_optimizers(model)
|
|
assert opt == [opt_a, opt_b]
|
|
assert len(lr_sched) == len(freq) == 0
|
|
|
|
ref_lr_sched = dict(
|
|
scheduler=scheduler_a,
|
|
interval="epoch",
|
|
frequency=1,
|
|
reduce_on_plateau=False,
|
|
monitor=None,
|
|
strict=True,
|
|
name=None,
|
|
opt_idx=None,
|
|
)
|
|
|
|
# opt tuple of 2 lists
|
|
model.configure_optimizers = lambda: ([opt_a], [scheduler_a])
|
|
opt, lr_sched, freq = trainer.init_optimizers(model)
|
|
assert len(opt) == len(lr_sched) == 1
|
|
assert len(freq) == 0
|
|
assert opt[0] == opt_a
|
|
assert lr_sched[0] == ref_lr_sched
|
|
|
|
# opt tuple of 1 list
|
|
model.configure_optimizers = lambda: ([opt_a], scheduler_a)
|
|
opt, lr_sched, freq = trainer.init_optimizers(model)
|
|
assert len(opt) == len(lr_sched) == 1
|
|
assert len(freq) == 0
|
|
assert opt[0] == opt_a
|
|
assert lr_sched[0] == ref_lr_sched
|
|
|
|
# opt single dictionary
|
|
model.configure_optimizers = lambda: {"optimizer": opt_a, "lr_scheduler": scheduler_a}
|
|
opt, lr_sched, freq = trainer.init_optimizers(model)
|
|
assert len(opt) == len(lr_sched) == 1
|
|
assert len(freq) == 0
|
|
assert opt[0] == opt_a
|
|
assert lr_sched[0] == ref_lr_sched
|
|
|
|
# opt multiple dictionaries with frequencies
|
|
model.configure_optimizers = lambda: (
|
|
{"optimizer": opt_a, "lr_scheduler": scheduler_a, "frequency": 1},
|
|
{"optimizer": opt_b, "lr_scheduler": scheduler_b, "frequency": 5},
|
|
)
|
|
opt, lr_sched, freq = trainer.init_optimizers(model)
|
|
assert len(opt) == len(lr_sched) == len(freq) == 2
|
|
assert opt[0] == opt_a
|
|
ref_lr_sched["opt_idx"] = 0
|
|
assert lr_sched[0] == ref_lr_sched
|
|
ref_lr_sched["scheduler"] = scheduler_b
|
|
ref_lr_sched["opt_idx"] = 1
|
|
assert lr_sched[1] == ref_lr_sched
|
|
assert freq == [1, 5]
|
|
|
|
|
|
def test_none_optimizer(tmpdir):
|
|
model = BoringModel()
|
|
model.configure_optimizers = lambda: None
|
|
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2)
|
|
with pytest.warns(UserWarning, match="will run with no optimizer"):
|
|
trainer.fit(model)
|
|
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
|
|
|
|
|
def test_configure_optimizer_from_dict(tmpdir):
|
|
"""Tests if `configure_optimizer` method could return a dictionary with `optimizer` field only."""
|
|
|
|
class TestModel(BoringModel):
|
|
def configure_optimizers(self):
|
|
config = {"optimizer": optim.SGD(params=self.parameters(), lr=1e-03)}
|
|
return config
|
|
|
|
model = TestModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
trainer.fit(model)
|
|
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"schedulers, kwargs, intervals, frequencies, expected_steps, max_epochs",
|
|
[
|
|
(
|
|
(optim.lr_scheduler.OneCycleLR, optim.lr_scheduler.OneCycleLR),
|
|
(dict(max_lr=0.01, total_steps=3), dict(max_lr=0.01, total_steps=2)),
|
|
("step", "step"),
|
|
(3, 2),
|
|
(4, 3),
|
|
1,
|
|
),
|
|
(
|
|
(optim.lr_scheduler.OneCycleLR, optim.lr_scheduler.OneCycleLR),
|
|
(dict(max_lr=0.01, total_steps=5), dict(max_lr=0.01, total_steps=5)),
|
|
("step", "step"),
|
|
(None, None),
|
|
(6, 6),
|
|
1,
|
|
),
|
|
(
|
|
(optim.lr_scheduler.StepLR, optim.lr_scheduler.CosineAnnealingLR),
|
|
(dict(step_size=5), dict(T_max=2)),
|
|
("epoch", "epoch"),
|
|
(5, 10),
|
|
(2, 3),
|
|
3,
|
|
),
|
|
],
|
|
)
|
|
def test_step_scheduling_for_multiple_optimizers_with_frequency(
|
|
tmpdir, schedulers, kwargs, intervals, frequencies, expected_steps, max_epochs
|
|
):
|
|
"""Test that step LR schedulers for multiple optimizers follow the optimizer frequencies when corresponding
|
|
frequency is set."""
|
|
|
|
class DummyModel(BoringModel):
|
|
def training_step(self, batch, batch_idx, optimizer_idx):
|
|
return super().training_step(batch, batch_idx)
|
|
|
|
def training_epoch_end(self, outputs) -> None:
|
|
pass
|
|
|
|
def configure_optimizers(self):
|
|
optimizer1 = optim.Adam(self.parameters(), lr=0.01)
|
|
optimizer2 = optim.Adam(self.parameters(), lr=0.01)
|
|
|
|
lr_scheduler_config_1 = {"scheduler": schedulers[0](optimizer1, **kwargs[0]), "interval": intervals[0]}
|
|
lr_scheduler_config_2 = {"scheduler": schedulers[1](optimizer2, **kwargs[1]), "interval": intervals[1]}
|
|
|
|
return [
|
|
{"optimizer": optimizer1, "frequency": frequencies[0], "lr_scheduler": lr_scheduler_config_1},
|
|
{"optimizer": optimizer2, "frequency": frequencies[1], "lr_scheduler": lr_scheduler_config_2},
|
|
]
|
|
|
|
model = DummyModel()
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, limit_val_batches=1, limit_train_batches=5, max_epochs=max_epochs)
|
|
trainer.fit(model)
|
|
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
|
|
|
assert trainer.lr_schedulers[0]["opt_idx"] == 0
|
|
assert trainer.lr_schedulers[1]["opt_idx"] == 1
|
|
# Step count is 1 greater than the expected value because scheduler.step() is called once during initialization
|
|
assert trainer.lr_schedulers[0]["scheduler"]._step_count == expected_steps[0]
|
|
assert trainer.lr_schedulers[1]["scheduler"]._step_count == expected_steps[1]
|
|
|
|
|
|
@pytest.mark.parametrize("fn", ("validate", "test"))
|
|
def test_init_optimizers_during_evaluation(tmpdir, fn):
|
|
"""Test that optimizers is an empty list during evaluation."""
|
|
|
|
class TestModel(BoringModel):
|
|
def configure_optimizers(self):
|
|
optimizer1 = optim.Adam(self.parameters(), lr=0.1)
|
|
optimizer2 = optim.Adam(self.parameters(), lr=0.1)
|
|
lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, step_size=1)
|
|
lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, step_size=1)
|
|
return [optimizer1, optimizer2], [lr_scheduler1, lr_scheduler2]
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, limit_val_batches=10, limit_test_batches=10)
|
|
validate_or_test = getattr(trainer, fn)
|
|
validate_or_test(TestModel(), ckpt_path=None)
|
|
|
|
assert len(trainer.lr_schedulers) == 0
|
|
assert len(trainer.optimizers) == 0
|
|
assert len(trainer.optimizer_frequencies) == 0
|
|
|
|
|
|
def test_multiple_optimizers_callbacks(tmpdir):
|
|
"""Tests that multiple optimizers can be used with callbacks."""
|
|
|
|
class CB(Callback):
|
|
def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
|
|
pass
|
|
|
|
def on_train_epoch_start(self, trainer, pl_module):
|
|
pass
|
|
|
|
class TestModel(BoringModel):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.layer_1 = torch.nn.Linear(32, 2)
|
|
self.layer_2 = torch.nn.Linear(32, 2)
|
|
|
|
def training_step(self, batch, batch_idx, optimizer_idx):
|
|
if optimizer_idx == 0:
|
|
a = batch[0]
|
|
acc = self.layer_1(a)
|
|
else:
|
|
a = batch[0]
|
|
acc = self.layer_2(a)
|
|
|
|
acc = self.loss(acc, acc)
|
|
return acc
|
|
|
|
def configure_optimizers(self):
|
|
a = optim.RMSprop(self.layer_1.parameters(), 1e-2)
|
|
b = optim.RMSprop(self.layer_2.parameters(), 1e-2)
|
|
return a, b
|
|
|
|
model = TestModel()
|
|
model.training_epoch_end = None
|
|
trainer = Trainer(
|
|
callbacks=[CB()],
|
|
default_root_dir=tmpdir,
|
|
limit_train_batches=1,
|
|
limit_val_batches=2,
|
|
max_epochs=1,
|
|
weights_summary=None,
|
|
)
|
|
trainer.fit(model)
|
|
|
|
|
|
@pytest.mark.parametrize("complete_epoch", [True, False])
|
|
@mock.patch("torch.optim.lr_scheduler.ReduceLROnPlateau.step")
|
|
def test_lr_scheduler_strict(step_mock, tmpdir, complete_epoch):
|
|
"""Test "strict" support in lr_scheduler dict."""
|
|
model = BoringModel()
|
|
optimizer = optim.Adam(model.parameters())
|
|
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)
|
|
max_epochs = 1 if complete_epoch else None
|
|
max_steps = None if complete_epoch else 1
|
|
trainer = Trainer(default_root_dir=tmpdir, max_epochs=max_epochs, max_steps=max_steps)
|
|
|
|
model.configure_optimizers = lambda: {
|
|
"optimizer": optimizer,
|
|
"lr_scheduler": {"scheduler": scheduler, "monitor": "giraffe", "strict": True},
|
|
}
|
|
|
|
if complete_epoch:
|
|
with pytest.raises(
|
|
MisconfigurationException,
|
|
match=r"ReduceLROnPlateau conditioned on metric .* which is not available\. Available metrics are:",
|
|
):
|
|
trainer.fit(model)
|
|
else:
|
|
trainer.fit(model)
|
|
|
|
step_mock.assert_not_called()
|
|
|
|
model.configure_optimizers = lambda: {
|
|
"optimizer": optimizer,
|
|
"lr_scheduler": {"scheduler": scheduler, "monitor": "giraffe", "strict": False},
|
|
}
|
|
|
|
if complete_epoch:
|
|
with pytest.warns(
|
|
RuntimeWarning, match=r"ReduceLROnPlateau conditioned on metric .* which is not available but strict"
|
|
):
|
|
trainer.fit(model)
|
|
|
|
step_mock.assert_not_called()
|
|
|
|
|
|
def test_unknown_configure_optimizers_raises(tmpdir):
|
|
"""Test exception with an unsupported configure_optimizers return."""
|
|
model = BoringModel()
|
|
model.configure_optimizers = lambda: 1
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.raises(MisconfigurationException, match="Unknown configuration for model optimizers"):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_lr_scheduler_with_unknown_interval_raises(tmpdir):
|
|
"""Test exception when lr_scheduler dict has unknown interval param value."""
|
|
model = BoringModel()
|
|
optimizer = optim.Adam(model.parameters())
|
|
model.configure_optimizers = lambda: {
|
|
"optimizer": optimizer,
|
|
"lr_scheduler": {"scheduler": optim.lr_scheduler.StepLR(optimizer, 1), "interval": "incorrect_unknown_value"},
|
|
}
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.raises(MisconfigurationException, match=r'The "interval" key in lr scheduler dict must be'):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_lr_scheduler_with_extra_keys_warns(tmpdir):
|
|
"""Test warning when lr_scheduler dict has extra keys."""
|
|
model = BoringModel()
|
|
optimizer = optim.Adam(model.parameters())
|
|
model.configure_optimizers = lambda: {
|
|
"optimizer": optimizer,
|
|
"lr_scheduler": {"scheduler": optim.lr_scheduler.StepLR(optimizer, 1), "foo": 1, "bar": 2},
|
|
}
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.warns(RuntimeWarning, match=r"Found unsupported keys in the lr scheduler dict: \[.+\]"):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_lr_scheduler_with_no_actual_scheduler_raises(tmpdir):
|
|
"""Test exception when lr_scheduler dict has no scheduler."""
|
|
model = BoringModel()
|
|
model.configure_optimizers = lambda: {"optimizer": optim.Adam(model.parameters()), "lr_scheduler": {}}
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.raises(MisconfigurationException, match='The lr scheduler dict must have the key "scheduler"'):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_invalid_optimizer_in_scheduler(tmpdir):
|
|
"""Test exception when optimizer attatched to lr_schedulers wasn't returned."""
|
|
|
|
class InvalidOptimizerModel(BoringModel):
|
|
def configure_optimizers(self):
|
|
opt1 = optim.SGD(self.layer.parameters(), lr=0.1)
|
|
opt2 = optim.SGD(self.layer.parameters(), lr=0.1)
|
|
lr_scheduler = optim.lr_scheduler.StepLR(opt2, step_size=1)
|
|
return [opt1], [lr_scheduler]
|
|
|
|
model = InvalidOptimizerModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.raises(MisconfigurationException, match="attatched with an optimizer that wasn't returned"):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_invalid_optimizer_dict_raises(tmpdir):
|
|
"""Test exception when lr_scheduler dict has no scheduler."""
|
|
|
|
class DummyModel(BoringModel):
|
|
def configure_optimizers(self):
|
|
return [{"optimizer": optim.Adam(self.parameters())}, optim.Adam(self.parameters())]
|
|
|
|
model = DummyModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.raises(MisconfigurationException, match="Unknown configuration for model optimizers"):
|
|
trainer.fit(model)
|
|
|
|
|
|
def test_warn_invalid_scheduler_key_in_manual_optimization(tmpdir):
|
|
"""Test warning when invalid scheduler keys are provided in manual optimization."""
|
|
|
|
class TestModel(BoringModel):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.automatic_optimization = False
|
|
|
|
def configure_optimizers(self):
|
|
opt = optim.SGD(self.layer.parameters(), lr=0.1)
|
|
sch = optim.lr_scheduler.StepLR(opt, step_size=1)
|
|
return [opt], [{"scheduler": sch, "interval": "epoch"}]
|
|
|
|
model = TestModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
|
|
with pytest.warns(RuntimeWarning, match="the keys will be ignored"):
|
|
trainer.fit(model)
|
|
|
|
|
|
@RunIf(min_gpus=2, special=True)
|
|
def test_optimizer_state_on_device(tmpdir):
|
|
"""Test that optimizers that create state initially at instantiation still end up with the state on the GPU."""
|
|
|
|
class TestModel(BoringModel):
|
|
def configure_optimizers(self):
|
|
# Adagrad creates state tensors immediately, model is not yet on GPU.
|
|
return optim.Adagrad(self.parameters())
|
|
|
|
def on_train_start(self, *args, **kwargs):
|
|
opt = self.optimizers()
|
|
_, state = next(iter(opt.state.items()))
|
|
assert state["sum"].device == torch.device("cuda", self.local_rank) == self.device
|
|
|
|
model = TestModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, gpus=2, accelerator="ddp", fast_dev_run=True)
|
|
trainer.fit(model)
|
|
|
|
|
|
@pytest.mark.parametrize("check_val_every_n_epoch", [1, 2])
|
|
@mock.patch("torch.optim.lr_scheduler.StepLR.step")
|
|
def test_lr_scheduler_epoch_step_frequency(mocked_sched, check_val_every_n_epoch, tmpdir):
|
|
epochs = 4
|
|
expected_steps = epochs + 1 # every LRScheduler gets called once at init
|
|
|
|
model = BoringModel()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
limit_train_batches=2,
|
|
limit_val_batches=2,
|
|
check_val_every_n_epoch=check_val_every_n_epoch,
|
|
max_epochs=epochs,
|
|
)
|
|
trainer.fit(model)
|
|
assert mocked_sched.call_count == expected_steps
|
|
|
|
|
|
@pytest.mark.parametrize("every_n_train_steps, epoch_interval", [(None, True), (2, False), (2, True)])
|
|
def test_lr_scheduler_state_updated_before_saving(tmpdir, every_n_train_steps, epoch_interval):
|
|
batches = 2
|
|
max_epochs = 1
|
|
lr, gamma = 1, 10
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
progress_bar_refresh_rate=0,
|
|
logger=False,
|
|
max_epochs=max_epochs,
|
|
limit_train_batches=batches,
|
|
limit_val_batches=1,
|
|
callbacks=[ModelCheckpoint(dirpath=tmpdir, every_n_train_steps=every_n_train_steps)],
|
|
)
|
|
|
|
class TestModel(BoringModel):
|
|
def configure_optimizers(self):
|
|
optimizer = torch.optim.SGD(self.parameters(), lr=lr)
|
|
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=gamma)
|
|
lr_scheduler_config = {"scheduler": lr_scheduler}
|
|
if not epoch_interval:
|
|
lr_scheduler_config["interval"] = "step"
|
|
return [optimizer], [lr_scheduler_config]
|
|
|
|
def on_save_checkpoint(self, checkpoint):
|
|
lr_scheduler_config = checkpoint["lr_schedulers"][0]
|
|
# 2 batches ran. since the lr_scheduler_config interval is `step`, the step count should be 2
|
|
assert self.trainer.global_step + 1 == batches # the global step hasn't been increased yet
|
|
compare_to = max_epochs if epoch_interval else batches
|
|
assert lr_scheduler_config["_step_count"] - 1 == compare_to # step count starts at 1
|
|
assert lr_scheduler_config["_last_lr"] == [lr * gamma ** compare_to]
|
|
self.on_save_checkpoint_called = True
|
|
|
|
model = TestModel()
|
|
trainer.fit(model)
|
|
assert model.on_save_checkpoint_called
|
|
|
|
|
|
@pytest.mark.parametrize("save_on_train_epoch_end", (False, True))
|
|
def test_plateau_scheduler_lr_step_interval_updated_after_saving(tmpdir, save_on_train_epoch_end):
|
|
batches = 4
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
progress_bar_refresh_rate=0,
|
|
logger=False,
|
|
max_epochs=1,
|
|
limit_train_batches=batches,
|
|
limit_val_batches=1,
|
|
callbacks=[ModelCheckpoint(dirpath=tmpdir, save_on_train_epoch_end=save_on_train_epoch_end)],
|
|
)
|
|
|
|
class TestModel(BoringModel):
|
|
def training_step(self, batch, batch_idx, optimizer_idx):
|
|
self.log("foo", batch_idx)
|
|
return super().training_step(batch, batch_idx)
|
|
|
|
def configure_optimizers(self):
|
|
optimizer_1 = torch.optim.Adam(self.parameters())
|
|
optimizer_2 = torch.optim.Adam(self.parameters())
|
|
|
|
lr_scheduler1 = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_1)
|
|
lr_scheduler_config_1 = {"scheduler": lr_scheduler1, "interval": "step", "monitor": "foo"}
|
|
|
|
lr_scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer_2, step_size=1)
|
|
lr_scheduler_config_2 = {"scheduler": lr_scheduler2, "interval": "step"}
|
|
return [optimizer_1, optimizer_2], [lr_scheduler_config_1, lr_scheduler_config_2]
|
|
|
|
def on_save_checkpoint(self, checkpoint):
|
|
lr_scheduler_config_1 = checkpoint["lr_schedulers"][0]
|
|
last_epoch = lr_scheduler_config_1["last_epoch"]
|
|
assert last_epoch == batches - (not save_on_train_epoch_end) # last epoch starts at 0
|
|
|
|
lr_scheduler_config_2 = checkpoint["lr_schedulers"][1]
|
|
assert lr_scheduler_config_2["_step_count"] - 1 == batches # step count starts at 1
|
|
|
|
self.on_save_checkpoint_called = True
|
|
|
|
model = TestModel()
|
|
model.training_epoch_end = None
|
|
trainer.fit(model)
|
|
assert model.on_save_checkpoint_called
|