lightning/tests/trainer/logging_/test_distributed_logging.py

104 lines
3.3 KiB
Python
Raw Normal View History

2020-10-13 11:18:07 +00:00
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from unittest import mock
from unittest.mock import Mock
from pytorch_lightning import Callback, Trainer
from tests.helpers import BoringModel
from tests.helpers.runif import RunIf
class TestModel(BoringModel):
def on_pretrain_routine_end(self) -> None:
with mock.patch("pytorch_lightning.loggers.base.LightningLoggerBase.agg_and_log_metrics") as m:
self.trainer.logger_connector.log_metrics({"a": 2})
logged_times = m.call_count
[feat] Logging refactor 2/n - train (#4495) * update logging * solve more bugs * replace Mapping by Dict * update on comments * resolve pep8 * Apply suggestions from code review Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update on comments * typo * update for coverage * update test * update * Update tests/models/test_hooks.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * Update tests/models/test_hooks.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * update on comments * remove deepcopy * remove useless look for * another small optim * extra optim * remove lastest optim, can be source of bug * resolve bug * add docstring * optimize coverage * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging_tests/test_distributed_logging.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/trainer/evaluation_loop.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging/test_logger_connector.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging_tests/test_train_loop_logging_1_0.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update on comments * update * update on comments * update parity speed * get it down to 0.65 * update * 0.8 max_dif Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-11-05 22:27:04 +00:00
expected = int(self.trainer.is_global_zero)
msg = f"actual logger called from non-global zero, logged_times: {logged_times}, expected: {expected}"
[feat] Logging refactor 2/n - train (#4495) * update logging * solve more bugs * replace Mapping by Dict * update on comments * resolve pep8 * Apply suggestions from code review Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update on comments * typo * update for coverage * update test * update * Update tests/models/test_hooks.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * Update tests/models/test_hooks.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * update on comments * remove deepcopy * remove useless look for * another small optim * extra optim * remove lastest optim, can be source of bug * resolve bug * add docstring * optimize coverage * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging_tests/test_distributed_logging.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update pytorch_lightning/trainer/evaluation_loop.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging/test_logger_connector.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Update tests/trainer/logging_tests/test_train_loop_logging_1_0.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update on comments * update * update on comments * update parity speed * get it down to 0.65 * update * 0.8 max_dif Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-11-05 22:27:04 +00:00
assert logged_times == expected, msg
@RunIf(skip_windows=True)
def test_global_zero_only_logging_ddp_cpu(tmpdir):
"""
Makes sure logging only happens from root zero
"""
model = TestModel()
model.training_epoch_end = None
trainer = Trainer(
accelerator="ddp_cpu",
num_processes=2,
default_root_dir=tmpdir,
limit_train_batches=1,
limit_val_batches=1,
max_epochs=1,
weights_summary=None,
)
trainer.fit(model)
@RunIf(min_gpus=2)
def test_global_zero_only_logging_ddp_spawn(tmpdir):
"""
Makes sure logging only happens from root zero
"""
model = TestModel()
model.training_epoch_end = None
trainer = Trainer(
accelerator="ddp_spawn",
gpus=2,
default_root_dir=tmpdir,
limit_train_batches=1,
limit_val_batches=1,
max_epochs=1,
weights_summary=None,
)
trainer.fit(model)
def test_first_logger_call_in_subprocess(tmpdir):
"""
Test that the Trainer does not call the logger too early. Only when the worker processes are initialized
do we have access to the rank and know which one is the main process.
"""
class LoggerCallsObserver(Callback):
def on_fit_start(self, trainer, pl_module):
# this hook is executed directly before Trainer.pre_dispatch
# logger should not write any logs until this point
assert not trainer.logger.method_calls
assert not os.listdir(trainer.logger.save_dir)
def on_train_start(self, trainer, pl_module):
assert trainer.logger.method_call
trainer.logger.log_hyperparams.assert_called_once()
trainer.logger.log_graph.assert_called_once()
logger = Mock()
logger.version = "0"
logger.name = "name"
logger.save_dir = tmpdir
model = BoringModel()
trainer = Trainer(
default_root_dir=tmpdir,
limit_train_batches=1,
limit_val_batches=1,
max_epochs=1,
logger=logger,
callbacks=[LoggerCallsObserver()],
)
trainer.fit(model)