lightning/tests/callbacks/test_gpu_stats_monitor.py

import os
import pytest
import torch

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import GPUStatsMonitor
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.loggers.csv_logs import ExperimentWriter
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.base import EvalModelTemplate


@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_gpu_stats_monitor(tmpdir):
    """
    Test GPU stats are logged using a logger.
    """
    model = EvalModelTemplate()
    gpu_stats = GPUStatsMonitor()
    logger = CSVLogger(tmpdir)

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        gpus=1,
        callbacks=[gpu_stats],
        logger=logger
    )

    results = trainer.fit(model)
    assert results

    path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE)
    with open(path_csv, 'r') as fp:
        lines = fp.readlines()

    header = lines[0].split()

    fields = [
        'utilization.gpu',
        'memory.used',
        'memory.free',
        'utilization.memory'
    ]

    for f in fields:
        assert any([f in h for h in header])


@pytest.mark.skipif(torch.cuda.is_available(), reason="test requires CPU machine")
def test_gpu_stats_monitor_cpu_machine(tmpdir):
    """
    Test GPUStatsMonitor on CPU machine.
    """
    with pytest.raises(MisconfigurationException, match='NVIDIA driver is not installed'):
        gpu_stats = GPUStatsMonitor()


@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_gpu_stats_monitor_no_logger(tmpdir):
    """
    Test GPUStatsMonitor with no logger in Trainer.
    """
    model = EvalModelTemplate()
    gpu_stats = GPUStatsMonitor()

    trainer = Trainer(
        default_root_dir=tmpdir,
        callbacks=[gpu_stats],
        max_epochs=1,
        gpus=1,
        logger=False
    )

    with pytest.raises(MisconfigurationException, match='Trainer that has no logger.'):
        trainer.fit(model)


@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_gpu_stats_monitor_no_gpu_warning(tmpdir):
    """
    Test GPUStatsMonitor raises a warning when not training on GPU device.
    """
    model = EvalModelTemplate()
    gpu_stats = GPUStatsMonitor()

    trainer = Trainer(
        default_root_dir=tmpdir,
        callbacks=[gpu_stats],
        max_steps=1,
        gpus=None
    )

    with pytest.raises(MisconfigurationException, match='not running on GPU'):
        trainer.fit(model)


def test_gpu_stats_monitor_parse_gpu_stats():
    logs = GPUStatsMonitor._parse_gpu_stats('1,2', [[3, 4, 5], [6, 7]], [('gpu', 'a'), ('memory', 'b')])
    expected = {'gpu_id: 1/gpu (a)': 3, 'gpu_id: 1/memory (b)': 4, 'gpu_id: 2/gpu (a)': 6, 'gpu_id: 2/memory (b)': 7}
    assert logs == expected
Fix GpuUsageLogger to work on different platforms (#3008) * Fix GpuUsageLogger * docstrings * misconfigexception * add basic tests * skip doctest * fix parameter and docstring * rm cl * skip doctest * cleanup * chlog * add suggestions from review * add test from suggestions * fix import * fix test * fix test * fix test * fix test * rename GpuUsageLogger to GPUStatsMonitor * doc fix * Apply suggestions from code review * update docs format * update docs * miss * merge * fix title formatting * unindent * punctuation * simplify if statements * fix test * suggestions * pep * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix on_train_batch_* * use AttributeDict * usage * rank zero Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * import * minor changes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Adrian Wälchli <adrian.waelchli@inf.unibe.ch> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2020-08-27 17:50:32 +00:00			`import os`
			`import pytest`
			`import torch`

			`from pytorch_lightning import Trainer`
			`from pytorch_lightning.callbacks import GPUStatsMonitor`
			`from pytorch_lightning.loggers import CSVLogger`
			`from pytorch_lightning.loggers.csv_logs import ExperimentWriter`
			`from pytorch_lightning.utilities.exceptions import MisconfigurationException`
			`from tests.base import EvalModelTemplate`


			`@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")`
			`def test_gpu_stats_monitor(tmpdir):`
			`"""`
			`Test GPU stats are logged using a logger.`
			`"""`
			`model = EvalModelTemplate()`
			`gpu_stats = GPUStatsMonitor()`
			`logger = CSVLogger(tmpdir)`

			`trainer = Trainer(`
			`default_root_dir=tmpdir,`
			`max_epochs=1,`
			`gpus=1,`
			`callbacks=[gpu_stats],`
			`logger=logger`
			`)`

			`results = trainer.fit(model)`
			`assert results`

			`path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE)`
			`with open(path_csv, 'r') as fp:`
			`lines = fp.readlines()`

			`header = lines[0].split()`

			`fields = [`
Refactor GPUStatsMonitor to improve training speed (#3257) * Refactor GPUMonitor to improve training speed * added gpu ids to monitor * update tests * added deprecation warning * pep * fix test * fix docs * fix log_gpu_memory * move deprecation check * chlog * Update CHANGELOG.md * suggestions and fix Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2020-09-04 10:02:16 +00:00			`'utilization.gpu',`
			`'memory.used',`
			`'memory.free',`
			`'utilization.memory'`
Fix GpuUsageLogger to work on different platforms (#3008) * Fix GpuUsageLogger * docstrings * misconfigexception * add basic tests * skip doctest * fix parameter and docstring * rm cl * skip doctest * cleanup * chlog * add suggestions from review * add test from suggestions * fix import * fix test * fix test * fix test * fix test * rename GpuUsageLogger to GPUStatsMonitor * doc fix * Apply suggestions from code review * update docs format * update docs * miss * merge * fix title formatting * unindent * punctuation * simplify if statements * fix test * suggestions * pep * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix on_train_batch_* * use AttributeDict * usage * rank zero Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * import * minor changes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Adrian Wälchli <adrian.waelchli@inf.unibe.ch> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2020-08-27 17:50:32 +00:00			`]`

			`for f in fields:`
			`assert any([f in h for h in header])`


			`@pytest.mark.skipif(torch.cuda.is_available(), reason="test requires CPU machine")`
			`def test_gpu_stats_monitor_cpu_machine(tmpdir):`
			`"""`
			`Test GPUStatsMonitor on CPU machine.`
			`"""`
			`with pytest.raises(MisconfigurationException, match='NVIDIA driver is not installed'):`
			`gpu_stats = GPUStatsMonitor()`


			`@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")`
			`def test_gpu_stats_monitor_no_logger(tmpdir):`
			`"""`
			`Test GPUStatsMonitor with no logger in Trainer.`
			`"""`
			`model = EvalModelTemplate()`
			`gpu_stats = GPUStatsMonitor()`

			`trainer = Trainer(`
			`default_root_dir=tmpdir,`
			`callbacks=[gpu_stats],`
			`max_epochs=1,`
			`gpus=1,`
Split GPUStatsMonitor function (#3644) * Split function * Add docstrings * Add typing annotations * Minor refactor * Make static to add a test 2020-09-25 05:30:30 +00:00			`logger=False`
Fix GpuUsageLogger to work on different platforms (#3008) * Fix GpuUsageLogger * docstrings * misconfigexception * add basic tests * skip doctest * fix parameter and docstring * rm cl * skip doctest * cleanup * chlog * add suggestions from review * add test from suggestions * fix import * fix test * fix test * fix test * fix test * rename GpuUsageLogger to GPUStatsMonitor * doc fix * Apply suggestions from code review * update docs format * update docs * miss * merge * fix title formatting * unindent * punctuation * simplify if statements * fix test * suggestions * pep * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix on_train_batch_* * use AttributeDict * usage * rank zero Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * import * minor changes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Adrian Wälchli <adrian.waelchli@inf.unibe.ch> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2020-08-27 17:50:32 +00:00			`)`

			`with pytest.raises(MisconfigurationException, match='Trainer that has no logger.'):`
			`trainer.fit(model)`


			`@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")`
			`def test_gpu_stats_monitor_no_gpu_warning(tmpdir):`
			`"""`
			`Test GPUStatsMonitor raises a warning when not training on GPU device.`
			`"""`
			`model = EvalModelTemplate()`
			`gpu_stats = GPUStatsMonitor()`

			`trainer = Trainer(`
			`default_root_dir=tmpdir,`
			`callbacks=[gpu_stats],`
			`max_steps=1,`
			`gpus=None`
			`)`

Refactor GPUStatsMonitor to improve training speed (#3257) * Refactor GPUMonitor to improve training speed * added gpu ids to monitor * update tests * added deprecation warning * pep * fix test * fix docs * fix log_gpu_memory * move deprecation check * chlog * Update CHANGELOG.md * suggestions and fix Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2020-09-04 10:02:16 +00:00			`with pytest.raises(MisconfigurationException, match='not running on GPU'):`
Fix GpuUsageLogger to work on different platforms (#3008) * Fix GpuUsageLogger * docstrings * misconfigexception * add basic tests * skip doctest * fix parameter and docstring * rm cl * skip doctest * cleanup * chlog * add suggestions from review * add test from suggestions * fix import * fix test * fix test * fix test * fix test * rename GpuUsageLogger to GPUStatsMonitor * doc fix * Apply suggestions from code review * update docs format * update docs * miss * merge * fix title formatting * unindent * punctuation * simplify if statements * fix test * suggestions * pep * Update CHANGELOG.md Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * fix on_train_batch_* * use AttributeDict * usage * rank zero Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * import * minor changes Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Adrian Wälchli <adrian.waelchli@inf.unibe.ch> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2020-08-27 17:50:32 +00:00			`trainer.fit(model)`
Split GPUStatsMonitor function (#3644) * Split function * Add docstrings * Add typing annotations * Minor refactor * Make static to add a test 2020-09-25 05:30:30 +00:00

			`def test_gpu_stats_monitor_parse_gpu_stats():`
			`logs = GPUStatsMonitor._parse_gpu_stats('1,2', [[3, 4, 5], [6, 7]], [('gpu', 'a'), ('memory', 'b')])`
			`expected = {'gpu_id: 1/gpu (a)': 3, 'gpu_id: 1/memory (b)': 4, 'gpu_id: 2/gpu (a)': 6, 'gpu_id: 2/memory (b)': 7}`
			`assert logs == expected`