Remove deprecated automatic logging of gpu metrics (#12657)
Co-authored-by: carmocca <carlossmocholi@gmail.com>
This commit is contained in:
parent
3bd48b8535
commit
50294a7eb4
|
@ -75,10 +75,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
|||
- Removed the deprecated `terminate_on_nan` argument from the `Trainer` constructor ([#12553](https://github.com/PyTorchLightning/pytorch-lightning/pull/12553))
|
||||
|
||||
|
||||
-
|
||||
|
||||
- Removed the deprecated `log_gpu_memory` argument from the `Trainer` constructor ([#12657](https://github.com/PyTorchLightning/pytorch-lightning/pull/12657))
|
||||
|
||||
-
|
||||
- Removed the deprecated automatic logging of GPU stats by the logger connector ([#12657](https://github.com/PyTorchLightning/pytorch-lightning/pull/12657))
|
||||
|
||||
### Fixed
|
||||
|
||||
|
|
|
@ -11,17 +11,15 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Any, Dict, Iterable, Optional, Union
|
||||
from typing import Any, Iterable, Optional, Union
|
||||
|
||||
import torch
|
||||
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.accelerators import GPUAccelerator
|
||||
from pytorch_lightning.loggers import LightningLoggerBase, TensorBoardLogger
|
||||
from pytorch_lightning.plugins.environments.slurm_environment import SLURMEnvironment
|
||||
from pytorch_lightning.trainer.connectors.logger_connector.result import _METRICS, _OUT_DICT, _PBAR_DICT
|
||||
from pytorch_lightning.trainer.states import RunningStage
|
||||
from pytorch_lightning.utilities import memory
|
||||
from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device
|
||||
from pytorch_lightning.utilities.metrics import metrics_to_scalars
|
||||
from pytorch_lightning.utilities.model_helpers import is_overridden
|
||||
|
@ -29,20 +27,13 @@ from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation
|
|||
|
||||
|
||||
class LoggerConnector:
|
||||
def __init__(self, trainer: "pl.Trainer", log_gpu_memory: Optional[str] = None) -> None:
|
||||
def __init__(self, trainer: "pl.Trainer") -> None:
|
||||
self.trainer = trainer
|
||||
if log_gpu_memory is not None:
|
||||
rank_zero_deprecation(
|
||||
"Setting `log_gpu_memory` with the trainer flag is deprecated in v1.5 and will be removed in v1.7. "
|
||||
"Please monitor GPU stats with the `DeviceStatsMonitor` callback directly instead."
|
||||
)
|
||||
self.log_gpu_memory = log_gpu_memory
|
||||
self._val_log_step: int = 0
|
||||
self._test_log_step: int = 0
|
||||
self._progress_bar_metrics: _PBAR_DICT = {}
|
||||
self._logged_metrics: _OUT_DICT = {}
|
||||
self._callback_metrics: _OUT_DICT = {}
|
||||
self._gpus_metrics: Dict[str, float] = {}
|
||||
self._epoch_end_reached = False
|
||||
self._current_fx: Optional[str] = None
|
||||
self._batch_idx: Optional[int] = None
|
||||
|
@ -193,9 +184,6 @@ class LoggerConnector:
|
|||
if self.trainer.fit_loop._should_accumulate() and self.trainer.lightning_module.automatic_optimization:
|
||||
return
|
||||
|
||||
# TODO: remove this call in v1.7
|
||||
self._log_gpus_metrics()
|
||||
|
||||
# when metrics should be logged
|
||||
assert not self._epoch_end_reached
|
||||
if self.should_update_logs or self.trainer.fast_dev_run:
|
||||
|
@ -210,22 +198,6 @@ class LoggerConnector:
|
|||
assert self.trainer._results is not None
|
||||
self.trainer._results.reset(metrics=True)
|
||||
|
||||
def _log_gpus_metrics(self) -> None:
|
||||
"""
|
||||
.. deprecated:: v1.5
|
||||
This function was deprecated in v1.5 in favor of
|
||||
`pytorch_lightning.accelerators.gpu._get_nvidia_gpu_stats` and will be removed in v1.7.
|
||||
"""
|
||||
for key, mem in self.gpus_metrics.items():
|
||||
if self.log_gpu_memory == "min_max":
|
||||
self.trainer.lightning_module.log(key, mem, prog_bar=False, logger=True)
|
||||
else:
|
||||
gpu_id = int(key.split("/")[0].split(":")[1])
|
||||
if gpu_id in self.trainer.device_ids:
|
||||
self.trainer.lightning_module.log(
|
||||
key, mem, prog_bar=False, logger=True, on_step=True, on_epoch=False
|
||||
)
|
||||
|
||||
"""
|
||||
Utilities and properties
|
||||
"""
|
||||
|
@ -298,17 +270,6 @@ class LoggerConnector:
|
|||
assert self.trainer._results is not None
|
||||
return self.trainer._results.metrics(on_step)
|
||||
|
||||
@property
|
||||
def gpus_metrics(self) -> Dict[str, float]:
|
||||
"""
|
||||
.. deprecated:: v1.5
|
||||
Will be removed in v1.7.
|
||||
"""
|
||||
if isinstance(self.trainer.accelerator, GPUAccelerator) and self.log_gpu_memory:
|
||||
mem_map = memory.get_memory_profile(self.log_gpu_memory)
|
||||
self._gpus_metrics.update(mem_map)
|
||||
return self._gpus_metrics
|
||||
|
||||
@property
|
||||
def callback_metrics(self) -> _OUT_DICT:
|
||||
if self.trainer._results:
|
||||
|
|
|
@ -145,7 +145,6 @@ class Trainer(
|
|||
auto_select_gpus: bool = False,
|
||||
tpu_cores: Optional[Union[List[int], str, int]] = None,
|
||||
ipus: Optional[int] = None,
|
||||
log_gpu_memory: Optional[str] = None, # TODO: Remove in 1.7
|
||||
enable_progress_bar: bool = True,
|
||||
overfit_batches: Union[int, float] = 0.0,
|
||||
track_grad_norm: Union[int, float, str] = -1,
|
||||
|
@ -303,12 +302,6 @@ class Trainer(
|
|||
of the individual loggers.
|
||||
Default: ``True``.
|
||||
|
||||
log_gpu_memory: None, 'min_max', 'all'. Might slow performance.
|
||||
|
||||
.. deprecated:: v1.5
|
||||
Deprecated in v1.5.0 and will be removed in v1.7.0
|
||||
Please use the ``DeviceStatsMonitor`` callback directly instead.
|
||||
|
||||
log_every_n_steps: How often to log within steps.
|
||||
Default: ``50``.
|
||||
|
||||
|
@ -461,7 +454,7 @@ class Trainer(
|
|||
amp_level=amp_level,
|
||||
plugins=plugins,
|
||||
)
|
||||
self._logger_connector = LoggerConnector(self, log_gpu_memory)
|
||||
self._logger_connector = LoggerConnector(self)
|
||||
self._callback_connector = CallbackConnector(self)
|
||||
self._checkpoint_connector = CheckpointConnector(self, resume_from_checkpoint)
|
||||
self._signal_connector = SignalConnector(self)
|
||||
|
|
|
@ -337,13 +337,6 @@ def test_v1_7_0_weights_summary_trainer(tmpdir):
|
|||
t.weights_summary = "blah"
|
||||
|
||||
|
||||
def test_v1_7_0_trainer_log_gpu_memory(tmpdir):
|
||||
with pytest.deprecated_call(
|
||||
match="Setting `log_gpu_memory` with the trainer flag is deprecated in v1.5 and will be removed"
|
||||
):
|
||||
_ = Trainer(log_gpu_memory="min_max")
|
||||
|
||||
|
||||
def test_v1_7_0_deprecated_slurm_job_id():
|
||||
trainer = Trainer()
|
||||
with pytest.deprecated_call(match="Method `slurm_job_id` is deprecated in v1.6.0 and will be removed in v1.7.0."):
|
||||
|
|
Loading…
Reference in New Issue