lightning/tests/checkpointing/test_legacy_checkpoints.py

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import os
import sys
import threading
from unittest.mock import patch

import pytest
import torch

import pytorch_lightning as pl
from pytorch_lightning import Callback, Trainer
from tests import _PATH_LEGACY, _PROJECT_ROOT

LEGACY_CHECKPOINTS_PATH = os.path.join(_PATH_LEGACY, "checkpoints")
CHECKPOINT_EXTENSION = ".ckpt"
# load list of all back compatible versions
with open(os.path.join(_PROJECT_ROOT, "legacy", "back-compatible-versions.txt")) as fp:
    LEGACY_BACK_COMPATIBLE_PL_VERSIONS = [ln.strip() for ln in fp.readlines()]


@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
def test_load_legacy_checkpoints(tmpdir, pl_version: str):
    PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)
    with patch("sys.path", [PATH_LEGACY] + sys.path):
        from simple_classif_training import ClassifDataModule, ClassificationModel

        path_ckpts = sorted(glob.glob(os.path.join(PATH_LEGACY, f"*{CHECKPOINT_EXTENSION}")))
        assert path_ckpts, f'No checkpoints found in folder "{PATH_LEGACY}"'
        path_ckpt = path_ckpts[-1]

        model = ClassificationModel.load_from_checkpoint(path_ckpt)
        trainer = Trainer(default_root_dir=str(tmpdir))
        dm = ClassifDataModule()
        res = trainer.test(model, datamodule=dm)
        assert res[0]["test_loss"] <= 0.7
        assert res[0]["test_acc"] >= 0.85
        print(res)


class LimitNbEpochs(Callback):
    def __init__(self, nb: int):
        self.limit = nb
        self._count = 0

    def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        self._count += 1
        if self._count >= self.limit:
            trainer.should_stop = True


@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
def test_legacy_ckpt_threading(tmpdir, pl_version: str):
    def load_model():
        import torch

        from pytorch_lightning.utilities.migration import pl_legacy_patch

        with pl_legacy_patch():
            _ = torch.load(PATH_LEGACY)

    PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)
    with patch("sys.path", [PATH_LEGACY] + sys.path):
        t1 = threading.Thread(target=load_model)
        t2 = threading.Thread(target=load_model)

        t1.start()
        t2.start()

        t1.join()
        t2.join()


@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
def test_resume_legacy_checkpoints(tmpdir, pl_version: str):
    PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)
    with patch("sys.path", [PATH_LEGACY] + sys.path):
        from simple_classif_training import ClassifDataModule, ClassificationModel

        path_ckpts = sorted(glob.glob(os.path.join(PATH_LEGACY, f"*{CHECKPOINT_EXTENSION}")))
        assert path_ckpts, f'No checkpoints found in folder "{PATH_LEGACY}"'
        path_ckpt = path_ckpts[-1]

        dm = ClassifDataModule()
        model = ClassificationModel()
        stop = LimitNbEpochs(1)

        trainer = Trainer(
            default_root_dir=str(tmpdir),
            accelerator="auto",
            devices=1,
            precision=(16 if torch.cuda.is_available() else 32),
            callbacks=[stop],
            max_epochs=21,
            accumulate_grad_batches=2,
        )
        torch.backends.cudnn.deterministic = True
        trainer.fit(model, datamodule=dm, ckpt_path=path_ckpt)
        res = trainer.test(model, datamodule=dm)
        assert res[0]["test_loss"] <= 0.7
        assert res[0]["test_acc"] >= 0.85
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00			`# Copyright The PyTorch Lightning team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`import glob`
			`import os`
			`import sys`
Threading support for legacy loading of checkpoints (#12814) 2022-04-28 20:37:58 +00:00			`import threading`
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`from unittest.mock import patch`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00
			`import pytest`
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`import torch`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`import pytorch_lightning as pl`
			`from pytorch_lightning import Callback, Trainer`
			`from tests import _PATH_LEGACY, _PROJECT_ROOT`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00
Replace `yapf` with `black` (#7783) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2021-07-26 11:37:35 +00:00			`LEGACY_CHECKPOINTS_PATH = os.path.join(_PATH_LEGACY, "checkpoints")`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00			`CHECKPOINT_EXTENSION = ".ckpt"`
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`# load list of all back compatible versions`
			`with open(os.path.join(_PROJECT_ROOT, "legacy", "back-compatible-versions.txt")) as fp:`
			`LEGACY_BACK_COMPATIBLE_PL_VERSIONS = [ln.strip() for ln in fp.readlines()]`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00

Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)`
			`def test_load_legacy_checkpoints(tmpdir, pl_version: str):`
			`PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)`
			`with patch("sys.path", [PATH_LEGACY] + sys.path):`
			`from simple_classif_training import ClassifDataModule, ClassificationModel`

			`path_ckpts = sorted(glob.glob(os.path.join(PATH_LEGACY, f"*{CHECKPOINT_EXTENSION}")))`
			`assert path_ckpts, f'No checkpoints found in folder "{PATH_LEGACY}"'`
			`path_ckpt = path_ckpts[-1]`

			`model = ClassificationModel.load_from_checkpoint(path_ckpt)`
			`trainer = Trainer(default_root_dir=str(tmpdir))`
			`dm = ClassifDataModule()`
			`res = trainer.test(model, datamodule=dm)`
			`assert res[0]["test_loss"] <= 0.7`
			`assert res[0]["test_acc"] >= 0.85`
			`print(res)`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00

Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`class LimitNbEpochs(Callback):`
			`def __init__(self, nb: int):`
			`self.limit = nb`
			`self._count = 0`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00
Deprecate `on_epoch_start/on_epoch_end` hook (#11578) 2022-02-07 14:15:27 +00:00			`def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:`
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`self._count += 1`
			`if self._count >= self.limit:`
			`trainer.should_stop = True`


Threading support for legacy loading of checkpoints (#12814) 2022-04-28 20:37:58 +00:00			`@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)`
			`def test_legacy_ckpt_threading(tmpdir, pl_version: str):`
			`def load_model():`
			`import torch`

			`from pytorch_lightning.utilities.migration import pl_legacy_patch`

			`with pl_legacy_patch():`
			`_ = torch.load(PATH_LEGACY)`

			`PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)`
			`with patch("sys.path", [PATH_LEGACY] + sys.path):`
			`t1 = threading.Thread(target=load_model)`
			`t2 = threading.Thread(target=load_model)`

			`t1.start()`
			`t2.start()`

			`t1.join()`
			`t2.join()`


Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)`
			`def test_resume_legacy_checkpoints(tmpdir, pl_version: str):`
			`PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)`
			`with patch("sys.path", [PATH_LEGACY] + sys.path):`
			`from simple_classif_training import ClassifDataModule, ClassificationModel`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`path_ckpts = sorted(glob.glob(os.path.join(PATH_LEGACY, f"*{CHECKPOINT_EXTENSION}")))`
			`assert path_ckpts, f'No checkpoints found in folder "{PATH_LEGACY}"'`
			`path_ckpt = path_ckpts[-1]`
tests for legacy checkpoints (#5223) * wip * generate * clean * tests * copy * download * download * download * download * download * download * download * download * download * download * download * flake8 * extend * aws * extension * pull * pull * pull * pull * pull * pull * pull * try * try * try * got it * Apply suggestions from code review (cherry picked from commit 72525f0a8396ae6dce5cf78ddf71e75fbba2dbfc) 2021-01-08 15:36:49 +00:00
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`dm = ClassifDataModule()`
			`model = ClassificationModel()`
			`stop = LimitNbEpochs(1)`
Update `tests/checkpointing/*.py` to use `devices` instead of `gpus` or `ipus` (#11408) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-01-12 05:47:01 +00:00
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`trainer = Trainer(`
			`default_root_dir=str(tmpdir),`
Update `tests/checkpointing/*.py` to use `devices` instead of `gpus` or `ipus` (#11408) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-01-12 05:47:01 +00:00			`accelerator="auto",`
			`devices=1,`
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`precision=(16 if torch.cuda.is_available() else 32),`
Deprecate `on_epoch_start/on_epoch_end` hook (#11578) 2022-02-07 14:15:27 +00:00			`callbacks=[stop],`
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`max_epochs=21,`
			`accumulate_grad_batches=2,`
			`)`
Add support for `torch.use_deterministic_algorithms` (#9121) * re-add changes * Update test_data_parallel.py * Update CHANGELOG.md * Update test_legacy_checkpoints.py * Update test_horovod.py * Update test_horovod.py * Update accelerator_connector.py * update tests 2021-09-30 04:40:09 +00:00			`torch.backends.cudnn.deterministic = True`
Unify checkpoint load paths [redo #9693] (#10061) 2021-10-25 19:05:31 +00:00			`trainer.fit(model, datamodule=dm, ckpt_path=path_ckpt)`
Legacy: simple classif training (#8535) * simple_classif_training * fix test * pt1.6 * automate Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: thomas chaton <thomas@grid.ai> 2021-08-10 08:13:31 +00:00			`res = trainer.test(model, datamodule=dm)`
			`assert res[0]["test_loss"] <= 0.7`
			`assert res[0]["test_acc"] >= 0.85`