lightning/tests/core/test_results.py

import sys
from pathlib import Path

import pytest
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
import tests.base.develop_utils as tutils

from tests.base import EvalModelTemplate
from tests.base.datamodules import TrialMNISTDataModule


def _setup_ddp(rank, worldsize):
    import os

    os.environ["MASTER_ADDR"] = "localhost"

    # initialize the process group
    dist.init_process_group("gloo", rank=rank, world_size=worldsize)


def _ddp_test_fn(rank, worldsize, result_cls: Result):
    _setup_ddp(rank, worldsize)
    tensor = torch.tensor([1.0])

    res = result_cls()
    res.log("test_tensor", tensor, sync_dist=True, sync_dist_op=torch.distributed.ReduceOp.SUM)

    assert res["test_tensor"].item() == dist.get_world_size(), "Result-Log does not work properly with DDP and Tensors"


@pytest.mark.parametrize("result_cls", [Result, TrainResult, EvalResult])
@pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")
def test_result_reduce_ddp(result_cls):
    """Make sure result logging works with DDP"""
    tutils.reset_seed()
    tutils.set_random_master_port()

    worldsize = 2
    mp.spawn(_ddp_test_fn, args=(worldsize, result_cls), nprocs=worldsize)


@pytest.mark.parametrize(
    "test_option,do_train,gpus",
    [
        pytest.param(
            0, True, 0, id='full_loop'
        ),
        pytest.param(
            0, False, 0, id='test_only'
        ),
        pytest.param(
            1, False, 0, id='test_only_mismatching_tensor', marks=pytest.mark.xfail(raises=ValueError, match="Mism.*")
        ),
        pytest.param(
            2, False, 0, id='mix_of_tensor_dims'
        ),
        pytest.param(
            3, False, 0, id='string_list_predictions'
        ),
        pytest.param(
            4, False, 0, id='int_list_predictions'
        ),
        pytest.param(
            5, False, 0, id='nested_list_predictions'
        ),
        pytest.param(
            6, False, 0, id='dict_list_predictions'
        ),
        pytest.param(
            0, True, 1, id='full_loop_single_gpu', marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="test requires single-GPU machine")
        )
    ]
)
def test_result_obj_predictions(tmpdir, test_option, do_train, gpus):
    tutils.reset_seed()

    dm = TrialMNISTDataModule(tmpdir)
    prediction_file = Path(tmpdir) / 'predictions.pt'

    model = EvalModelTemplate()
    model.test_option = test_option
    model.prediction_file = prediction_file.as_posix()
    model.test_step = model.test_step_result_preds
    model.test_step_end = None
    model.test_epoch_end = None
    model.test_end = None

    if prediction_file.exists():
        prediction_file.unlink()

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=3,
        weights_summary=None,
        deterministic=True,
        gpus=gpus
    )

    # Prediction file shouldn't exist yet because we haven't done anything
    assert not prediction_file.exists()

    if do_train:
        result = trainer.fit(model, dm)
        assert result == 1
        result = trainer.test(datamodule=dm)
        result = result[0]
        assert result['test_loss'] < 0.6
        assert result['test_acc'] > 0.8
    else:
        result = trainer.test(model, datamodule=dm)

    # check prediction file now exists and is of expected length
    assert prediction_file.exists()
    predictions = torch.load(prediction_file)
    assert len(predictions) == len(dm.mnist_test)


@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_result_obj_predictions_ddp_spawn(tmpdir):
    seed_everything(4321)

    distributed_backend = 'ddp_spawn'
    option = 0

    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

    dm = TrialMNISTDataModule(tmpdir)

    prediction_file = Path(tmpdir) / 'predictions.pt'

    model = EvalModelTemplate(learning_rate=0.005)
    model.test_option = option
    model.prediction_file = prediction_file.as_posix()
    model.test_step = model.test_step_result_preds
    model.test_step_end = None
    model.test_epoch_end = None
    model.test_end = None

    prediction_files = [Path(tmpdir) / 'predictions_rank_0.pt', Path(tmpdir) / 'predictions_rank_1.pt']
    for prediction_file in prediction_files:
        if prediction_file.exists():
            prediction_file.unlink()

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=3,
        weights_summary=None,
        deterministic=True,
        distributed_backend=distributed_backend,
        gpus=[0, 1]
    )

    # Prediction file shouldn't exist yet because we haven't done anything
    # assert not model.prediction_file.exists()

    result = trainer.fit(model, dm)
    assert result == 1
    result = trainer.test(datamodule=dm)
    result = result[0]
    assert result['test_loss'] < 0.6
    assert result['test_acc'] > 0.8

    dm.setup('test')

    # check prediction file now exists and is of expected length
    size = 0
    for prediction_file in prediction_files:
        assert prediction_file.exists()
        predictions = torch.load(prediction_file)
        size += len(predictions)
    assert size == len(dm.mnist_test)


def test_result_gather_stack():
    """ Test that tensors get concatenated when they all have the same shape. """
    outputs = [
        {"foo": torch.zeros(4, 5)},
        {"foo": torch.zeros(4, 5)},
        {"foo": torch.zeros(4, 5)},
    ]
    result = Result.gather(outputs)
    assert isinstance(result["foo"], torch.Tensor)
    assert list(result["foo"].shape) == [12, 5]


def test_result_gather_concatenate():
    """ Test that tensors get concatenated when they have varying size in first dimension. """
    outputs = [
        {"foo": torch.zeros(4, 5)},
        {"foo": torch.zeros(8, 5)},
        {"foo": torch.zeros(3, 5)},
    ]
    result = Result.gather(outputs)
    assert isinstance(result["foo"], torch.Tensor)
    assert list(result["foo"].shape) == [15, 5]


def test_result_gather_scalar():
    """ Test that 0-dim tensors get gathered and stacked correctly. """
    outputs = [
        {"foo": torch.tensor(1)},
        {"foo": torch.tensor(2)},
        {"foo": torch.tensor(3)},
    ]
    result = Result.gather(outputs)
    assert isinstance(result["foo"], torch.Tensor)
    assert list(result["foo"].shape) == [3]


def test_result_gather_different_shapes():
    """ Test that tensors of varying shape get gathered into a list. """
    outputs = [
        {"foo": torch.tensor(1)},
        {"foo": torch.zeros(2, 3)},
        {"foo": torch.zeros(1, 2, 3)},
    ]
    result = Result.gather(outputs)
    expected = [torch.tensor(1), torch.zeros(2, 3), torch.zeros(1, 2, 3)]
    assert isinstance(result["foo"], list)
    assert all(torch.eq(r, e).all() for r, e in zip(result["foo"], expected))


def test_result_gather_mixed_types():
    """ Test that a collection of mixed types gets gathered into a list. """
    outputs = [
        {"foo": 1.2},
        {"foo": ["bar", None]},
        {"foo": torch.tensor(1)},
    ]
    result = Result.gather(outputs)
    expected = [1.2, ["bar", None], torch.tensor(1)]
    assert isinstance(result["foo"], list)
    assert result["foo"] == expected


def test_result_retrieve_last_logged_item():
    result = Result()
    result.log('a', 5., on_step=True, on_epoch=True)
    assert result['epoch_a'] == 5.
    assert result['step_a'] == 5.
    assert result['a'] == 5.
Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00			`import sys`
			`from pathlib import Path`

add ddp sync for logging in result step (#2822) * add ddp sync for logging in result step * pep8 * pep8 * make ddp tests run also on cpu (except windowws) * create class instance in ddp test * revert automated formatting * pep8 2020-08-06 00:42:09 +00:00			`import pytest`
			`import torch`
			`import torch.distributed as dist`
			`import torch.multiprocessing as mp`
Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00			`from pytorch_lightning import Trainer, seed_everything`
add ddp sync for logging in result step (#2822) * add ddp sync for logging in result step * pep8 * pep8 * make ddp tests run also on cpu (except windowws) * create class instance in ddp test * revert automated formatting * pep8 2020-08-06 00:42:09 +00:00			`from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult`
			`import tests.base.develop_utils as tutils`
Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00
			`from tests.base import EvalModelTemplate`
			`from tests.base.datamodules import TrialMNISTDataModule`
add ddp sync for logging in result step (#2822) * add ddp sync for logging in result step * pep8 * pep8 * make ddp tests run also on cpu (except windowws) * create class instance in ddp test * revert automated formatting * pep8 2020-08-06 00:42:09 +00:00

			`def _setup_ddp(rank, worldsize):`
			`import os`

			`os.environ["MASTER_ADDR"] = "localhost"`

			`# initialize the process group`
			`dist.init_process_group("gloo", rank=rank, world_size=worldsize)`


			`def _ddp_test_fn(rank, worldsize, result_cls: Result):`
			`_setup_ddp(rank, worldsize)`
			`tensor = torch.tensor([1.0])`

			`res = result_cls()`
document lightiningmodule better (#2920) * updated docs 2020-08-11 23:39:43 +00:00			`res.log("test_tensor", tensor, sync_dist=True, sync_dist_op=torch.distributed.ReduceOp.SUM)`
add ddp sync for logging in result step (#2822) * add ddp sync for logging in result step * pep8 * pep8 * make ddp tests run also on cpu (except windowws) * create class instance in ddp test * revert automated formatting * pep8 2020-08-06 00:42:09 +00:00
			`assert res["test_tensor"].item() == dist.get_world_size(), "Result-Log does not work properly with DDP and Tensors"`


			`@pytest.mark.parametrize("result_cls", [Result, TrainResult, EvalResult])`
			`@pytest.mark.skipif(sys.platform == "win32", reason="DDP not available on windows")`
			`def test_result_reduce_ddp(result_cls):`
			`"""Make sure result logging works with DDP"""`
			`tutils.reset_seed()`
			`tutils.set_random_master_port()`

			`worldsize = 2`
			`mp.spawn(_ddp_test_fn, args=(worldsize, result_cls), nprocs=worldsize)`
Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00

			`@pytest.mark.parametrize(`
			`"test_option,do_train,gpus",`
			`[`
			`pytest.param(`
			`0, True, 0, id='full_loop'`
			`),`
			`pytest.param(`
			`0, False, 0, id='test_only'`
			`),`
			`pytest.param(`
			`1, False, 0, id='test_only_mismatching_tensor', marks=pytest.mark.xfail(raises=ValueError, match="Mism.*")`
			`),`
			`pytest.param(`
			`2, False, 0, id='mix_of_tensor_dims'`
			`),`
			`pytest.param(`
			`3, False, 0, id='string_list_predictions'`
			`),`
			`pytest.param(`
			`4, False, 0, id='int_list_predictions'`
			`),`
			`pytest.param(`
			`5, False, 0, id='nested_list_predictions'`
			`),`
			`pytest.param(`
			`6, False, 0, id='dict_list_predictions'`
			`),`
			`pytest.param(`
			`0, True, 1, id='full_loop_single_gpu', marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="test requires single-GPU machine")`
			`)`
			`]`
			`)`
			`def test_result_obj_predictions(tmpdir, test_option, do_train, gpus):`
			`tutils.reset_seed()`

			`dm = TrialMNISTDataModule(tmpdir)`
use tmpdir in tests when writing predictions to disk (#3561) * save to tmpdir * path 2020-09-23 11:44:15 +00:00			`prediction_file = Path(tmpdir) / 'predictions.pt'`
Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00
			`model = EvalModelTemplate()`
			`model.test_option = test_option`
			`model.prediction_file = prediction_file.as_posix()`
			`model.test_step = model.test_step_result_preds`
			`model.test_step_end = None`
			`model.test_epoch_end = None`
			`model.test_end = None`

			`if prediction_file.exists():`
			`prediction_file.unlink()`

			`trainer = Trainer(`
			`default_root_dir=tmpdir,`
			`max_epochs=3,`
			`weights_summary=None,`
			`deterministic=True,`
			`gpus=gpus`
			`)`

			`# Prediction file shouldn't exist yet because we haven't done anything`
			`assert not prediction_file.exists()`

			`if do_train:`
			`result = trainer.fit(model, dm)`
			`assert result == 1`
			`result = trainer.test(datamodule=dm)`
			`result = result[0]`
			`assert result['test_loss'] < 0.6`
			`assert result['test_acc'] > 0.8`
			`else:`
			`result = trainer.test(model, datamodule=dm)`

			`# check prediction file now exists and is of expected length`
			`assert prediction_file.exists()`
			`predictions = torch.load(prediction_file)`
			`assert len(predictions) == len(dm.mnist_test)`


			`@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")`
			`def test_result_obj_predictions_ddp_spawn(tmpdir):`
ref: refactored gpu backend __step (#3120) * refactored gpu backend __step * refactored gpu backend __step * refactored gpu backend __step * refactored gpu backend __step 2020-08-24 13:22:05 +00:00			`seed_everything(4321)`

Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00			`distributed_backend = 'ddp_spawn'`
			`option = 0`

			`import os`
			`os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'`

			`dm = TrialMNISTDataModule(tmpdir)`

use tmpdir in tests when writing predictions to disk (#3561) * save to tmpdir * path 2020-09-23 11:44:15 +00:00			`prediction_file = Path(tmpdir) / 'predictions.pt'`
Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00
ref: inner train loop (intermediate step) 17/n (#3376) * ref: inner train loop (intermediate step) 17/n * ref: inner train loop (intermediate step) 17/n * ref: inner train loop (intermediate step) 17/n 2020-09-07 13:31:42 +00:00			`model = EvalModelTemplate(learning_rate=0.005)`
Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00			`model.test_option = option`
			`model.prediction_file = prediction_file.as_posix()`
			`model.test_step = model.test_step_result_preds`
			`model.test_step_end = None`
			`model.test_epoch_end = None`
			`model.test_end = None`

use tmpdir in tests when writing predictions to disk (#3561) * save to tmpdir * path 2020-09-23 11:44:15 +00:00			`prediction_files = [Path(tmpdir) / 'predictions_rank_0.pt', Path(tmpdir) / 'predictions_rank_1.pt']`
Save test predictions on multiple GPUs (#2926) * Save test predictions on multiple GPUs 2020-08-14 21:52:43 +00:00			`for prediction_file in prediction_files:`
			`if prediction_file.exists():`
			`prediction_file.unlink()`

			`trainer = Trainer(`
			`default_root_dir=tmpdir,`
			`max_epochs=3,`
			`weights_summary=None,`
			`deterministic=True,`
			`distributed_backend=distributed_backend,`
			`gpus=[0, 1]`
			`)`

			`# Prediction file shouldn't exist yet because we haven't done anything`
			`# assert not model.prediction_file.exists()`

			`result = trainer.fit(model, dm)`
			`assert result == 1`
			`result = trainer.test(datamodule=dm)`
			`result = result[0]`
			`assert result['test_loss'] < 0.6`
			`assert result['test_acc'] > 0.8`

			`dm.setup('test')`

			`# check prediction file now exists and is of expected length`
			`size = 0`
			`for prediction_file in prediction_files:`
			`assert prediction_file.exists()`
			`predictions = torch.load(prediction_file)`
			`size += len(predictions)`
			`assert size == len(dm.mnist_test)`
Fix result gathering with varying tensor shapes (#3020) * test for gethering results * fix gather * document tests * changelog * assert dtype * default to concat * additional test 2020-08-19 00:27:48 +00:00

			`def test_result_gather_stack():`
			`""" Test that tensors get concatenated when they all have the same shape. """`
			`outputs = [`
			`{"foo": torch.zeros(4, 5)},`
			`{"foo": torch.zeros(4, 5)},`
			`{"foo": torch.zeros(4, 5)},`
			`]`
			`result = Result.gather(outputs)`
			`assert isinstance(result["foo"], torch.Tensor)`
			`assert list(result["foo"].shape) == [12, 5]`


			`def test_result_gather_concatenate():`
			`""" Test that tensors get concatenated when they have varying size in first dimension. """`
			`outputs = [`
			`{"foo": torch.zeros(4, 5)},`
			`{"foo": torch.zeros(8, 5)},`
			`{"foo": torch.zeros(3, 5)},`
			`]`
			`result = Result.gather(outputs)`
			`assert isinstance(result["foo"], torch.Tensor)`
			`assert list(result["foo"].shape) == [15, 5]`


			`def test_result_gather_scalar():`
			`""" Test that 0-dim tensors get gathered and stacked correctly. """`
			`outputs = [`
			`{"foo": torch.tensor(1)},`
			`{"foo": torch.tensor(2)},`
			`{"foo": torch.tensor(3)},`
			`]`
			`result = Result.gather(outputs)`
			`assert isinstance(result["foo"], torch.Tensor)`
			`assert list(result["foo"].shape) == [3]`


			`def test_result_gather_different_shapes():`
			`""" Test that tensors of varying shape get gathered into a list. """`
			`outputs = [`
			`{"foo": torch.tensor(1)},`
			`{"foo": torch.zeros(2, 3)},`
			`{"foo": torch.zeros(1, 2, 3)},`
			`]`
			`result = Result.gather(outputs)`
			`expected = [torch.tensor(1), torch.zeros(2, 3), torch.zeros(1, 2, 3)]`
			`assert isinstance(result["foo"], list)`
			`assert all(torch.eq(r, e).all() for r, e in zip(result["foo"], expected))`


			`def test_result_gather_mixed_types():`
			`""" Test that a collection of mixed types gets gathered into a list. """`
			`outputs = [`
			`{"foo": 1.2},`
			`{"foo": ["bar", None]},`
			`{"foo": torch.tensor(1)},`
			`]`
			`result = Result.gather(outputs)`
			`expected = [1.2, ["bar", None], torch.tensor(1)]`
			`assert isinstance(result["foo"], list)`
			`assert result["foo"] == expected`
Retrieve last logged val from result by key (#3049) * return last logged value * Update test_results.py * Update step_result.py * Update step_result.py * pep8 * pep8 2020-08-19 22:59:14 +00:00

			`def test_result_retrieve_last_logged_item():`
			`result = Result()`
			`result.log('a', 5., on_step=True, on_epoch=True)`
			`assert result['epoch_a'] == 5.`
			`assert result['step_a'] == 5.`
			`assert result['a'] == 5.`