lightning/tests/trainer/logging_/test_distributed_logging.py

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from unittest import mock
from unittest.mock import Mock

from pytorch_lightning import Callback, Trainer
from tests.helpers import BoringModel
from tests.helpers.runif import RunIf


class TestModel(BoringModel):
    def on_pretrain_routine_end(self) -> None:
        with mock.patch("pytorch_lightning.loggers.base.LightningLoggerBase.agg_and_log_metrics") as m:
            self.trainer.logger_connector.log_metrics({"a": 2})
            logged_times = m.call_count
            expected = int(self.trainer.is_global_zero)
            msg = f"actual logger called from non-global zero, logged_times: {logged_times}, expected: {expected}"
            assert logged_times == expected, msg


@RunIf(skip_windows=True)
def test_global_zero_only_logging_ddp_cpu(tmpdir):
    """
    Makes sure logging only happens from root zero
    """
    model = TestModel()
    model.training_epoch_end = None
    trainer = Trainer(
        accelerator="ddp_cpu",
        num_processes=2,
        default_root_dir=tmpdir,
        limit_train_batches=1,
        limit_val_batches=1,
        max_epochs=1,
        weights_summary=None,
    )
    trainer.fit(model)


@RunIf(min_gpus=2)
def test_global_zero_only_logging_ddp_spawn(tmpdir):
    """
    Makes sure logging only happens from root zero
    """
    model = TestModel()
    model.training_epoch_end = None
    trainer = Trainer(
        accelerator="ddp_spawn",
        gpus=2,
        default_root_dir=tmpdir,
        limit_train_batches=1,
        limit_val_batches=1,
        max_epochs=1,
        weights_summary=None,
    )
    trainer.fit(model)


def test_first_logger_call_in_subprocess(tmpdir):
    """
    Test that the Trainer does not call the logger too early. Only when the worker processes are initialized
    do we have access to the rank and know which one is the main process.
    """

    class LoggerCallsObserver(Callback):
        def on_fit_start(self, trainer, pl_module):
            # this hook is executed directly before Trainer.pre_dispatch
            # logger should not write any logs until this point
            assert not trainer.logger.method_calls
            assert not os.listdir(trainer.logger.save_dir)

        def on_train_start(self, trainer, pl_module):
            assert trainer.logger.method_call
            trainer.logger.log_hyperparams.assert_called_once()
            trainer.logger.log_graph.assert_called_once()

    logger = Mock()
    logger.version = "0"
    logger.name = "name"
    logger.save_dir = tmpdir

    model = BoringModel()
    trainer = Trainer(
        default_root_dir=tmpdir,
        limit_train_batches=1,
        limit_val_batches=1,
        max_epochs=1,
        logger=logger,
        callbacks=[LoggerCallsObserver()],
    )
    trainer.fit(model)
notices (#4118) 2020-10-13 11:18:07 +00:00			`# Copyright The PyTorch Lightning team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
fix logger creating directory structure too early in DDP (#6380) * fix * add simple test * fix imports * add changelog * tighter test with on_fit_start hook closer to the dispatch call * move class inside test f unction * add a comment 2021-03-09 09:49:59 +00:00			`import os`
Fix pre-commit isort failure on tests/trainer/.py (#5421) Remove tests.trainer from skipped module in pyproject.toml * Fix pre-commit isort failure on tests/trainer/*.py 2021-01-14 12:51:20 +00:00			`from unittest import mock`
fix logger creating directory structure too early in DDP (#6380) * fix * add simple test * fix imports * add changelog * tighter test with on_fit_start hook closer to the dispatch call * move class inside test f unction * add a comment 2021-03-09 09:49:59 +00:00			`from unittest.mock import Mock`
Fix pre-commit isort failure on tests/trainer/.py (#5421) Remove tests.trainer from skipped module in pyproject.toml * Fix pre-commit isort failure on tests/trainer/*.py 2021-01-14 12:51:20 +00:00
fix logger creating directory structure too early in DDP (#6380) * fix * add simple test * fix imports * add changelog * tighter test with on_fit_start hook closer to the dispatch call * move class inside test f unction * add a comment 2021-03-09 09:49:59 +00:00			`from pytorch_lightning import Callback, Trainer`
fix miss-leading imports in tests (#5873) * fix imorts * . 2021-02-09 10:10:52 +00:00			`from tests.helpers import BoringModel`
Refactor: skipif for Windows 2/n (#6268) * win * isort * flake8 2021-03-02 09:36:01 +00:00			`from tests.helpers.runif import RunIf`
Adds tests to make sure logging doesn't happen multiple times (#3899) * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * added bug report model * fix local model * fix local model * fix local model * fix local model 2020-10-06 16:43:51 +00:00

			`class TestModel(BoringModel):`
			`def on_pretrain_routine_end(self) -> None:`
Replace `yapf` with `black` (#7783) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2021-07-26 11:37:35 +00:00			`with mock.patch("pytorch_lightning.loggers.base.LightningLoggerBase.agg_and_log_metrics") as m:`
			`self.trainer.logger_connector.log_metrics({"a": 2})`
Adds tests to make sure logging doesn't happen multiple times (#3899) * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * added bug report model * fix local model * fix local model * fix local model * fix local model 2020-10-06 16:43:51 +00:00			`logged_times = m.call_count`
[feat] Logging refactor 2/n - train (#4495) * update logging * solve more bugs * replace Mapping by Dict * update on comments * resolve pep8 * Apply suggestions from code review Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update on comments * typo * update for coverage * update test * update * Update tests/models/test_hooks.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * Update tests/models/test_hooks.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * update on comments * remove deepcopy * remove useless look for * another small optim * extra optim * remove lastest optim, can be source of bug * resolve bug * add docstring * optimize coverage * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging_tests/test_distributed_logging.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/trainer/evaluation_loop.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging/test_logger_connector.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging_tests/test_train_loop_logging_1_0.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update on comments * update * update on comments * update parity speed * get it down to 0.65 * update * 0.8 max_dif Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-11-05 22:27:04 +00:00			`expected = int(self.trainer.is_global_zero)`
Replace `yapf` with `black` (#7783) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2021-07-26 11:37:35 +00:00			`msg = f"actual logger called from non-global zero, logged_times: {logged_times}, expected: {expected}"`
[feat] Logging refactor 2/n - train (#4495) * update logging * solve more bugs * replace Mapping by Dict * update on comments * resolve pep8 * Apply suggestions from code review Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update on comments * typo * update for coverage * update test * update * Update tests/models/test_hooks.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * Update tests/models/test_hooks.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * update on comments * remove deepcopy * remove useless look for * another small optim * extra optim * remove lastest optim, can be source of bug * resolve bug * add docstring * optimize coverage * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging_tests/test_distributed_logging.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/trainer/evaluation_loop.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging/test_logger_connector.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging_tests/test_train_loop_logging_1_0.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update on comments * update * update on comments * update parity speed * get it down to 0.65 * update * 0.8 max_dif Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-11-05 22:27:04 +00:00			`assert logged_times == expected, msg`
Adds tests to make sure logging doesn't happen multiple times (#3899) * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * added bug report model * fix local model * fix local model * fix local model * fix local model 2020-10-06 16:43:51 +00:00

Refactor: skipif for Windows 2/n (#6268) * win * isort * flake8 2021-03-02 09:36:01 +00:00			`@RunIf(skip_windows=True)`
Adds tests to make sure logging doesn't happen multiple times (#3899) * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * added bug report model * fix local model * fix local model * fix local model * fix local model 2020-10-06 16:43:51 +00:00			`def test_global_zero_only_logging_ddp_cpu(tmpdir):`
			`"""`
			`Makes sure logging only happens from root zero`
			`"""`
			`model = TestModel()`
			`model.training_epoch_end = None`
			`trainer = Trainer(`
Replace `yapf` with `black` (#7783) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2021-07-26 11:37:35 +00:00			`accelerator="ddp_cpu",`
Adds tests to make sure logging doesn't happen multiple times (#3899) * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * added bug report model * fix local model * fix local model * fix local model * fix local model 2020-10-06 16:43:51 +00:00			`num_processes=2,`
			`default_root_dir=tmpdir,`
			`limit_train_batches=1,`
			`limit_val_batches=1,`
			`max_epochs=1,`
			`weights_summary=None,`
			`)`
			`trainer.fit(model)`


Refactor: skipif for multi - gpus 1/n (#6266) * ngpus * gpu * isort * pt * flake8 2021-03-02 08:03:32 +00:00			`@RunIf(min_gpus=2)`
Adds tests to make sure logging doesn't happen multiple times (#3899) * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * added bug report model * fix local model * fix local model * fix local model * fix local model 2020-10-06 16:43:51 +00:00			`def test_global_zero_only_logging_ddp_spawn(tmpdir):`
			`"""`
			`Makes sure logging only happens from root zero`
			`"""`
			`model = TestModel()`
			`model.training_epoch_end = None`
			`trainer = Trainer(`
Replace `yapf` with `black` (#7783) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2021-07-26 11:37:35 +00:00			`accelerator="ddp_spawn",`
Adds tests to make sure logging doesn't happen multiple times (#3899) * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * Makes sure logging doesn't ever happen from non-root zero * added bug report model * fix local model * fix local model * fix local model * fix local model 2020-10-06 16:43:51 +00:00			`gpus=2,`
			`default_root_dir=tmpdir,`
			`limit_train_batches=1,`
			`limit_val_batches=1,`
			`max_epochs=1,`
			`weights_summary=None,`
			`)`
			`trainer.fit(model)`
fix logger creating directory structure too early in DDP (#6380) * fix * add simple test * fix imports * add changelog * tighter test with on_fit_start hook closer to the dispatch call * move class inside test f unction * add a comment 2021-03-09 09:49:59 +00:00

			`def test_first_logger_call_in_subprocess(tmpdir):`
			`"""`
			`Test that the Trainer does not call the logger too early. Only when the worker processes are initialized`
			`do we have access to the rank and know which one is the main process.`
			`"""`

			`class LoggerCallsObserver(Callback):`
			`def on_fit_start(self, trainer, pl_module):`
			`# this hook is executed directly before Trainer.pre_dispatch`
			`# logger should not write any logs until this point`
			`assert not trainer.logger.method_calls`
			`assert not os.listdir(trainer.logger.save_dir)`

			`def on_train_start(self, trainer, pl_module):`
			`assert trainer.logger.method_call`
			`trainer.logger.log_hyperparams.assert_called_once()`
			`trainer.logger.log_graph.assert_called_once()`

			`logger = Mock()`
			`logger.version = "0"`
			`logger.name = "name"`
			`logger.save_dir = tmpdir`

			`model = BoringModel()`
			`trainer = Trainer(`
			`default_root_dir=tmpdir,`
			`limit_train_batches=1,`
			`limit_val_batches=1,`
			`max_epochs=1,`
			`logger=logger,`
Replace `yapf` with `black` (#7783) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2021-07-26 11:37:35 +00:00			`callbacks=[LoggerCallsObserver()],`
fix logger creating directory structure too early in DDP (#6380) * fix * add simple test * fix imports * add changelog * tighter test with on_fit_start hook closer to the dispatch call * move class inside test f unction * add a comment 2021-03-09 09:49:59 +00:00			`)`
			`trainer.fit(model)`