Removed `flush_logs_every_n_steps` argument from Trainer (#13074)
Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
This commit is contained in:
parent
c5938f8fbc
commit
dd47518322
|
@ -120,6 +120,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
|||
|
||||
### Removed
|
||||
|
||||
- Removed the deprecated `flush_logs_every_n_steps` argument from the `Trainer` constructor ([#13074](https://github.com/PyTorchLightning/pytorch-lightning/pull/13074))
|
||||
|
||||
|
||||
- Removed the deprecated `process_position` argument from the `Trainer` constructor ([13071](https://github.com/PyTorchLightning/pytorch-lightning/pull/13071))
|
||||
|
||||
|
||||
|
|
|
@ -695,30 +695,6 @@ impact to subsequent runs. These are the changes enabled:
|
|||
- Disables the Tuner.
|
||||
- If using the CLI, the configuration file is not saved.
|
||||
|
||||
flush_logs_every_n_steps
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. warning:: ``flush_logs_every_n_steps`` has been deprecated in v1.5 and will be removed in v1.7.
|
||||
Please configure flushing directly in the logger instead.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<video width="50%" max-width="400px" controls
|
||||
poster="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/trainer_flags/thumb/flush_logs%E2%80%A8_every_n_steps.jpg"
|
||||
src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/trainer_flags/flush_logs_every_n_steps.mp4"></video>
|
||||
|
||||
|
|
||||
|
||||
Writes logs to disk this often.
|
||||
|
||||
.. testcode::
|
||||
|
||||
# default used by the Trainer
|
||||
trainer = Trainer(flush_logs_every_n_steps=100)
|
||||
|
||||
See Also:
|
||||
- :doc:`logging <../extensions/logging>`
|
||||
|
||||
.. _gpus:
|
||||
|
||||
gpus
|
||||
|
|
|
@ -49,20 +49,19 @@ To change this behaviour, set the *log_every_n_steps* :class:`~pytorch_lightning
|
|||
Modify flushing frequency
|
||||
=========================
|
||||
|
||||
Metrics are kept in memory for N steps to improve training efficiency. Every N steps, metrics flush to disk. To change the frequency of this flushing, use the *flush_logs_every_n_steps* Trainer argument.
|
||||
Some loggers keep logged metrics in memory for N steps and only periodically flush them to disk to improve training efficiency.
|
||||
Every logger handles this a bit differently. For example, here is how to fine-tune flushing for the TensorBoard logger:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# faster training, high memory
|
||||
Trainer(flush_logs_every_n_steps=500)
|
||||
# Default used by TensorBoard: Write to disk after 10 logging events or every two minutes
|
||||
logger = TensorBoardLogger(..., max_queue=10, flush_secs=120)
|
||||
|
||||
# slower training, low memory
|
||||
Trainer(flush_logs_every_n_steps=500)
|
||||
# Faster training, more memory used
|
||||
logger = TensorBoardLogger(..., max_queue=100)
|
||||
|
||||
The higher *flush_logs_every_n_steps* is, the faster the model will train but the memory will build up until the next flush.
|
||||
The smaller *flush_logs_every_n_steps* is, the slower the model will train but memory will be kept to a minimum.
|
||||
|
||||
TODO: chart
|
||||
# Slower training, less memory used
|
||||
logger = TensorBoardLogger(..., max_queue=1)
|
||||
|
||||
----
|
||||
|
||||
|
|
|
@ -528,9 +528,7 @@ class TrainingEpochLoop(loops.Loop[_OUTPUTS_TYPE]):
|
|||
|
||||
def _save_loggers_on_train_batch_end(self) -> None:
|
||||
"""Flushes loggers to disk."""
|
||||
# this assumes that `batches_that_stepped` was increased before
|
||||
should_flush = self._batches_that_stepped % self.trainer.flush_logs_every_n_steps == 0
|
||||
if should_flush or self.trainer.should_stop:
|
||||
if self.trainer.should_stop:
|
||||
for logger in self.trainer.loggers:
|
||||
logger.save()
|
||||
|
||||
|
|
|
@ -43,19 +43,10 @@ class LoggerConnector:
|
|||
def on_trainer_init(
|
||||
self,
|
||||
logger: Union[bool, Logger, Iterable[Logger]],
|
||||
flush_logs_every_n_steps: Optional[int],
|
||||
log_every_n_steps: int,
|
||||
move_metrics_to_cpu: bool,
|
||||
) -> None:
|
||||
self.configure_logger(logger)
|
||||
if flush_logs_every_n_steps is not None:
|
||||
rank_zero_deprecation(
|
||||
f"Setting `Trainer(flush_logs_every_n_steps={flush_logs_every_n_steps})` is deprecated in v1.5 "
|
||||
"and will be removed in v1.7. Please configure flushing in the logger instead."
|
||||
)
|
||||
else:
|
||||
flush_logs_every_n_steps = 100 # original default parameter
|
||||
self.trainer.flush_logs_every_n_steps = flush_logs_every_n_steps
|
||||
self.trainer.log_every_n_steps = log_every_n_steps
|
||||
self.trainer.move_metrics_to_cpu = move_metrics_to_cpu
|
||||
for logger in self.trainer.loggers:
|
||||
|
|
|
@ -159,7 +159,6 @@ class Trainer(
|
|||
limit_test_batches: Optional[Union[int, float]] = None,
|
||||
limit_predict_batches: Optional[Union[int, float]] = None,
|
||||
val_check_interval: Optional[Union[int, float]] = None,
|
||||
flush_logs_every_n_steps: Optional[int] = None,
|
||||
log_every_n_steps: int = 50,
|
||||
accelerator: Optional[Union[str, Accelerator]] = None,
|
||||
strategy: Optional[Union[str, Strategy]] = None,
|
||||
|
@ -260,12 +259,6 @@ class Trainer(
|
|||
of train, val and test to find any bugs (ie: a sort of unit test).
|
||||
Default: ``False``.
|
||||
|
||||
flush_logs_every_n_steps: How often to flush logs to disk (defaults to every 100 steps).
|
||||
|
||||
.. deprecated:: v1.5
|
||||
``flush_logs_every_n_steps`` has been deprecated in v1.5 and will be removed in v1.7.
|
||||
Please configure flushing directly in the logger instead.
|
||||
|
||||
gpus: Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node
|
||||
Default: ``None``.
|
||||
|
||||
|
@ -555,7 +548,7 @@ class Trainer(
|
|||
|
||||
# init logger flags
|
||||
self._loggers: List[Logger]
|
||||
self._logger_connector.on_trainer_init(logger, flush_logs_every_n_steps, log_every_n_steps, move_metrics_to_cpu)
|
||||
self._logger_connector.on_trainer_init(logger, log_every_n_steps, move_metrics_to_cpu)
|
||||
|
||||
# init debugging flags
|
||||
self.val_check_interval: Union[int, float]
|
||||
|
|
|
@ -61,11 +61,6 @@ def test_v1_7_0_on_interrupt(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
def test_v1_7_0_flush_logs_every_n_steps_trainer_constructor(tmpdir):
|
||||
with pytest.deprecated_call(match=r"Setting `Trainer\(flush_logs_every_n_steps=10\)` is deprecated in v1.5"):
|
||||
_ = Trainer(flush_logs_every_n_steps=10)
|
||||
|
||||
|
||||
class BoringCallbackDDPSpawnModel(BoringModel):
|
||||
def add_to_queue(self, queue):
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue