Deprecate LightningModule.summarize() in favor of pl.utilities.model_summary.summarize() (#8513)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
This commit is contained in:
parent
08fba96b6c
commit
f90849cc95
|
@ -49,6 +49,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
|||
|
||||
### Deprecated
|
||||
|
||||
- Deprecated `LightningModule.summarize()` in favor of `pytorch_lightning.utilities.model_summary.summarize()`
|
||||
|
||||
|
||||
- Deprecated `LightningModule.model_size` ([#8343](https://github.com/PyTorchLightning/pytorch-lightning/pull/8343))
|
||||
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ from torchmetrics import Metric
|
|||
|
||||
from pytorch_lightning.core.grads import GradInformation
|
||||
from pytorch_lightning.core.hooks import CheckpointHooks, DataHooks, ModelHooks
|
||||
from pytorch_lightning.core.memory import ModelSummary
|
||||
from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin, HyperparametersMixin
|
||||
from pytorch_lightning.core.optimizer import LightningOptimizer
|
||||
from pytorch_lightning.core.saving import ModelIO
|
||||
|
@ -44,6 +43,7 @@ from pytorch_lightning.utilities.cloud_io import get_filesystem
|
|||
from pytorch_lightning.utilities.distributed import distributed_available, sync_ddp
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.memory import get_model_size_mb
|
||||
from pytorch_lightning.utilities.model_summary import ModelSummary, summarize
|
||||
from pytorch_lightning.utilities.parsing import collect_init_args
|
||||
from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature
|
||||
from pytorch_lightning.utilities.types import _METRIC_COLLECTION, EPOCH_OUTPUT, STEP_OUTPUT
|
||||
|
@ -1715,6 +1715,10 @@ class LightningModule(
|
|||
"""
|
||||
Summarize this LightningModule.
|
||||
|
||||
.. deprecated:: v1.5
|
||||
This method was deprecated in v1.5 in favor of `pytorch_lightning.utilities.model_summary.summarize`
|
||||
and will be removed in v1.7.
|
||||
|
||||
Args:
|
||||
mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers).
|
||||
|
||||
|
@ -1727,24 +1731,13 @@ class LightningModule(
|
|||
Return:
|
||||
The model summary object
|
||||
"""
|
||||
model_summary = None
|
||||
warning_cache.deprecation(
|
||||
"The `LightningModule.summarize` method is deprecated in v1.5 and will be removed in v1.7. "
|
||||
"Use `pytorch_lightning.utilities.model_summary.summarize` instead.",
|
||||
stacklevel=6,
|
||||
)
|
||||
|
||||
# temporary mapping from mode to max_depth
|
||||
if max_depth is None:
|
||||
if mode in ModelSummary.MODES:
|
||||
max_depth = ModelSummary.MODES[mode]
|
||||
rank_zero_deprecation(
|
||||
f"Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"
|
||||
f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behavior."
|
||||
)
|
||||
model_summary = ModelSummary(self, max_depth=max_depth)
|
||||
elif mode is not None:
|
||||
raise MisconfigurationException(f"`mode` can be None, {', '.join(ModelSummary.MODES)}, got {mode}")
|
||||
else:
|
||||
model_summary = ModelSummary(self, max_depth=max_depth)
|
||||
|
||||
log.info("\n" + str(model_summary))
|
||||
return model_summary
|
||||
return summarize(self, mode, max_depth)
|
||||
|
||||
def freeze(self) -> None:
|
||||
r"""
|
||||
|
|
|
@ -11,478 +11,22 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from pytorch_lightning.utilities import rank_zero_deprecation
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from collections import OrderedDict
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
rank_zero_deprecation(
|
||||
"`pytorch_lightning.core.memory.get_memory_profile` and"
|
||||
" `pytorch_lightning.core.memory.get_gpu_memory_map` have been moved"
|
||||
" to `pytorch_lightning.utilities.memory` since v1.5 and will be removed in v1.7."
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch import Tensor
|
||||
from torch.utils.hooks import RemovableHandle
|
||||
# To support backward compatibility as get_memory_profile and get_gpu_memory_map have been moved
|
||||
from pytorch_lightning.utilities.memory import get_gpu_memory_map, get_memory_profile # noqa: E402, F401 # isort: skip
|
||||
|
||||
from pytorch_lightning.utilities import AMPType, DeviceType
|
||||
from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
|
||||
from pytorch_lightning.utilities.warnings import WarningCache
|
||||
rank_zero_deprecation(
|
||||
"`pytorch_lightning.core.memory.LayerSummary` and"
|
||||
" `pytorch_lightning.core.memory.ModelSummary` have been moved"
|
||||
" to `pytorch_lightning.utilities.model_summary` since v1.5 and will be removed in v1.7."
|
||||
)
|
||||
|
||||
warning_cache = WarningCache()
|
||||
|
||||
PARAMETER_NUM_UNITS = [" ", "K", "M", "B", "T"]
|
||||
UNKNOWN_SIZE = "?"
|
||||
|
||||
|
||||
class LayerSummary:
|
||||
"""
|
||||
Summary class for a single layer in a :class:`~pytorch_lightning.core.lightning.LightningModule`.
|
||||
It collects the following information:
|
||||
|
||||
- Type of the layer (e.g. Linear, BatchNorm1d, ...)
|
||||
- Input shape
|
||||
- Output shape
|
||||
- Number of parameters
|
||||
|
||||
The input and output shapes are only known after the example input array was
|
||||
passed through the model.
|
||||
|
||||
Example::
|
||||
|
||||
>>> model = torch.nn.Conv2d(3, 8, 3)
|
||||
>>> summary = LayerSummary(model)
|
||||
>>> summary.num_parameters
|
||||
224
|
||||
>>> summary.layer_type
|
||||
'Conv2d'
|
||||
>>> output = model(torch.rand(1, 3, 5, 5))
|
||||
>>> summary.in_size
|
||||
[1, 3, 5, 5]
|
||||
>>> summary.out_size
|
||||
[1, 8, 3, 3]
|
||||
|
||||
Args:
|
||||
module: A module to summarize
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, module: nn.Module):
|
||||
super().__init__()
|
||||
self._module = module
|
||||
self._hook_handle = self._register_hook()
|
||||
self._in_size = None
|
||||
self._out_size = None
|
||||
|
||||
def __del__(self):
|
||||
self.detach_hook()
|
||||
|
||||
def _register_hook(self) -> Optional[RemovableHandle]:
|
||||
"""
|
||||
Registers a hook on the module that computes the input- and output size(s) on the first forward pass.
|
||||
If the hook is called, it will remove itself from the from the module, meaning that
|
||||
recursive models will only record their input- and output shapes once.
|
||||
Registering hooks on :class:`~torch.jit.ScriptModule` is not supported.
|
||||
|
||||
Return:
|
||||
A handle for the installed hook, or ``None`` if registering the hook is not possible.
|
||||
"""
|
||||
|
||||
def hook(module, inp, out):
|
||||
if len(inp) == 1:
|
||||
inp = inp[0]
|
||||
self._in_size = parse_batch_shape(inp)
|
||||
self._out_size = parse_batch_shape(out)
|
||||
self._hook_handle.remove()
|
||||
|
||||
handle = None
|
||||
if not isinstance(self._module, torch.jit.ScriptModule):
|
||||
handle = self._module.register_forward_hook(hook)
|
||||
return handle
|
||||
|
||||
def detach_hook(self):
|
||||
"""
|
||||
Removes the forward hook if it was not already removed in the forward pass.
|
||||
Will be called after the summary is created.
|
||||
"""
|
||||
if self._hook_handle is not None:
|
||||
self._hook_handle.remove()
|
||||
|
||||
@property
|
||||
def in_size(self) -> Union[str, List]:
|
||||
return self._in_size or UNKNOWN_SIZE
|
||||
|
||||
@property
|
||||
def out_size(self) -> Union[str, List]:
|
||||
return self._out_size or UNKNOWN_SIZE
|
||||
|
||||
@property
|
||||
def layer_type(self) -> str:
|
||||
"""Returns the class name of the module."""
|
||||
return str(self._module.__class__.__name__)
|
||||
|
||||
@property
|
||||
def num_parameters(self) -> int:
|
||||
"""Returns the number of parameters in this module."""
|
||||
return sum(np.prod(p.shape) if not _is_lazy_weight_tensor(p) else 0 for p in self._module.parameters())
|
||||
|
||||
|
||||
class ModelSummary:
|
||||
"""
|
||||
Generates a summary of all layers in a :class:`~pytorch_lightning.core.lightning.LightningModule`.
|
||||
|
||||
Args:
|
||||
model: The model to summarize (also referred to as the root module).
|
||||
mode: Can be one of
|
||||
|
||||
- `top` (default): only the top-level modules will be recorded (the children of the root module)
|
||||
- `full`: summarizes all layers and their submodules in the root module
|
||||
|
||||
.. deprecated:: v1.4
|
||||
This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6.
|
||||
|
||||
max_depth: Maximum depth of modules to show. Use -1 to show all modules or 0 to show no
|
||||
summary. Defaults to 1.
|
||||
|
||||
The string representation of this summary prints a table with columns containing
|
||||
the name, type and number of parameters for each layer.
|
||||
|
||||
The root module may also have an attribute ``example_input_array`` as shown in the example below.
|
||||
If present, the root module will be called with it as input to determine the
|
||||
intermediate input- and output shapes of all layers. Supported are tensors and
|
||||
nested lists and tuples of tensors. All other types of inputs will be skipped and show as `?`
|
||||
in the summary table. The summary will also display `?` for layers not used in the forward pass.
|
||||
|
||||
Example::
|
||||
|
||||
>>> import pytorch_lightning as pl
|
||||
>>> class LitModel(pl.LightningModule):
|
||||
...
|
||||
... def __init__(self):
|
||||
... super().__init__()
|
||||
... self.net = nn.Sequential(nn.Linear(256, 512), nn.BatchNorm1d(512))
|
||||
... self.example_input_array = torch.zeros(10, 256) # optional
|
||||
...
|
||||
... def forward(self, x):
|
||||
... return self.net(x)
|
||||
...
|
||||
>>> model = LitModel()
|
||||
>>> ModelSummary(model, max_depth=1) # doctest: +NORMALIZE_WHITESPACE
|
||||
| Name | Type | Params | In sizes | Out sizes
|
||||
------------------------------------------------------------
|
||||
0 | net | Sequential | 132 K | [10, 256] | [10, 512]
|
||||
------------------------------------------------------------
|
||||
132 K Trainable params
|
||||
0 Non-trainable params
|
||||
132 K Total params
|
||||
0.530 Total estimated model params size (MB)
|
||||
>>> ModelSummary(model, max_depth=-1) # doctest: +NORMALIZE_WHITESPACE
|
||||
| Name | Type | Params | In sizes | Out sizes
|
||||
--------------------------------------------------------------
|
||||
0 | net | Sequential | 132 K | [10, 256] | [10, 512]
|
||||
1 | net.0 | Linear | 131 K | [10, 256] | [10, 512]
|
||||
2 | net.1 | BatchNorm1d | 1.0 K | [10, 512] | [10, 512]
|
||||
--------------------------------------------------------------
|
||||
132 K Trainable params
|
||||
0 Non-trainable params
|
||||
132 K Total params
|
||||
0.530 Total estimated model params size (MB)
|
||||
"""
|
||||
|
||||
MODES = dict(top=1, full=-1) # TODO: remove in v1.6
|
||||
|
||||
def __init__(self, model, mode: Optional[str] = None, max_depth: Optional[int] = 1):
|
||||
self._model = model
|
||||
|
||||
# temporary mapping from mode to max_depth
|
||||
if max_depth is None or mode is not None:
|
||||
if mode in ModelSummary.MODES:
|
||||
max_depth = ModelSummary.MODES[mode]
|
||||
from pytorch_lightning.utilities import rank_zero_deprecation
|
||||
|
||||
rank_zero_deprecation(
|
||||
f"Argument `mode` in `ModelSummary` is deprecated in v1.4"
|
||||
f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behaviour."
|
||||
)
|
||||
else:
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
|
||||
raise MisconfigurationException(f"`mode` can be {', '.join(ModelSummary.MODES)}, got {mode}.")
|
||||
|
||||
if not isinstance(max_depth, int) or max_depth < -1:
|
||||
raise ValueError(f"`max_depth` can be -1, 0 or > 0, got {max_depth}.")
|
||||
|
||||
self._max_depth = max_depth
|
||||
self._layer_summary = self.summarize()
|
||||
# 1 byte -> 8 bits
|
||||
# TODO: how do we compute precisin_megabytes in case of mixed precision?
|
||||
precision = self._model.precision if isinstance(self._model.precision, int) else 32
|
||||
self._precision_megabytes = (precision / 8.0) * 1e-6
|
||||
|
||||
@property
|
||||
def named_modules(self) -> List[Tuple[str, nn.Module]]:
|
||||
if self._max_depth == 0:
|
||||
mods = []
|
||||
elif self._max_depth == 1:
|
||||
# the children are the top-level modules
|
||||
mods = self._model.named_children()
|
||||
else:
|
||||
mods = self._model.named_modules()
|
||||
mods = list(mods)[1:] # do not include root module (LightningModule)
|
||||
return list(mods)
|
||||
|
||||
@property
|
||||
def layer_names(self) -> List[str]:
|
||||
return list(self._layer_summary.keys())
|
||||
|
||||
@property
|
||||
def layer_types(self) -> List[str]:
|
||||
return [layer.layer_type for layer in self._layer_summary.values()]
|
||||
|
||||
@property
|
||||
def in_sizes(self) -> List:
|
||||
return [layer.in_size for layer in self._layer_summary.values()]
|
||||
|
||||
@property
|
||||
def out_sizes(self) -> List:
|
||||
return [layer.out_size for layer in self._layer_summary.values()]
|
||||
|
||||
@property
|
||||
def param_nums(self) -> List[int]:
|
||||
return [layer.num_parameters for layer in self._layer_summary.values()]
|
||||
|
||||
@property
|
||||
def total_parameters(self) -> int:
|
||||
return sum(p.numel() if not _is_lazy_weight_tensor(p) else 0 for p in self._model.parameters())
|
||||
|
||||
@property
|
||||
def trainable_parameters(self) -> int:
|
||||
return sum(
|
||||
p.numel() if not _is_lazy_weight_tensor(p) else 0 for p in self._model.parameters() if p.requires_grad
|
||||
)
|
||||
|
||||
@property
|
||||
def model_size(self) -> float:
|
||||
# todo: seems it does not work with quantized models - it returns 0.0
|
||||
return self.total_parameters * self._precision_megabytes
|
||||
|
||||
def summarize(self) -> Dict[str, LayerSummary]:
|
||||
summary = OrderedDict((name, LayerSummary(module)) for name, module in self.named_modules)
|
||||
if self._model.example_input_array is not None:
|
||||
self._forward_example_input()
|
||||
for layer in summary.values():
|
||||
layer.detach_hook()
|
||||
|
||||
if self._max_depth >= 1:
|
||||
# remove summary entries with depth > max_depth
|
||||
for k in [k for k in summary if k.count(".") >= self._max_depth]:
|
||||
del summary[k]
|
||||
|
||||
return summary
|
||||
|
||||
def _forward_example_input(self) -> None:
|
||||
"""Run the example input through each layer to get input- and output sizes."""
|
||||
model = self._model
|
||||
trainer = self._model.trainer
|
||||
|
||||
input_ = model.example_input_array
|
||||
input_ = model._apply_batch_transfer_handler(input_)
|
||||
|
||||
if trainer is not None and trainer.amp_backend == AMPType.NATIVE and trainer._device_type != DeviceType.TPU:
|
||||
model.forward = torch.cuda.amp.autocast()(model.forward)
|
||||
|
||||
mode = model.training
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
# let the model hooks collect the input- and output shapes
|
||||
if isinstance(input_, (list, tuple)):
|
||||
model(*input_)
|
||||
elif isinstance(input_, dict):
|
||||
model(**input_)
|
||||
else:
|
||||
model(input_)
|
||||
model.train(mode) # restore mode of module
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
Makes a summary listing with:
|
||||
|
||||
Layer Name, Layer Type, Number of Parameters, Input Sizes, Output Sizes, Model Size
|
||||
"""
|
||||
arrays = [
|
||||
[" ", list(map(str, range(len(self._layer_summary))))],
|
||||
["Name", self.layer_names],
|
||||
["Type", self.layer_types],
|
||||
["Params", list(map(get_human_readable_count, self.param_nums))],
|
||||
]
|
||||
if self._model.example_input_array is not None:
|
||||
arrays.append(["In sizes", self.in_sizes])
|
||||
arrays.append(["Out sizes", self.out_sizes])
|
||||
total_parameters = self.total_parameters
|
||||
trainable_parameters = self.trainable_parameters
|
||||
model_size = self.model_size
|
||||
|
||||
return _format_summary_table(total_parameters, trainable_parameters, model_size, *arrays)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
|
||||
def parse_batch_shape(batch: Any) -> Union[str, List]:
|
||||
if hasattr(batch, "shape"):
|
||||
return list(batch.shape)
|
||||
|
||||
if isinstance(batch, (list, tuple)):
|
||||
shape = [parse_batch_shape(el) for el in batch]
|
||||
return shape
|
||||
|
||||
return UNKNOWN_SIZE
|
||||
|
||||
|
||||
def _format_summary_table(total_parameters: int, trainable_parameters: int, model_size: float, *cols) -> str:
|
||||
"""
|
||||
Takes in a number of arrays, each specifying a column in
|
||||
the summary table, and combines them all into one big
|
||||
string defining the summary table that are nicely formatted.
|
||||
"""
|
||||
n_rows = len(cols[0][1])
|
||||
n_cols = 1 + len(cols)
|
||||
|
||||
# Get formatting width of each column
|
||||
col_widths = []
|
||||
for c in cols:
|
||||
col_width = max(len(str(a)) for a in c[1]) if n_rows else 0
|
||||
col_width = max(col_width, len(c[0])) # minimum length is header length
|
||||
col_widths.append(col_width)
|
||||
|
||||
# Formatting
|
||||
s = "{:<{}}"
|
||||
total_width = sum(col_widths) + 3 * n_cols
|
||||
header = [s.format(c[0], l) for c, l in zip(cols, col_widths)]
|
||||
|
||||
# Summary = header + divider + Rest of table
|
||||
summary = " | ".join(header) + "\n" + "-" * total_width
|
||||
for i in range(n_rows):
|
||||
line = []
|
||||
for c, l in zip(cols, col_widths):
|
||||
line.append(s.format(str(c[1][i]), l))
|
||||
summary += "\n" + " | ".join(line)
|
||||
summary += "\n" + "-" * total_width
|
||||
|
||||
summary += "\n" + s.format(get_human_readable_count(trainable_parameters), 10)
|
||||
summary += "Trainable params"
|
||||
summary += "\n" + s.format(get_human_readable_count(total_parameters - trainable_parameters), 10)
|
||||
summary += "Non-trainable params"
|
||||
summary += "\n" + s.format(get_human_readable_count(total_parameters), 10)
|
||||
summary += "Total params"
|
||||
summary += "\n" + s.format(get_formatted_model_size(model_size), 10)
|
||||
summary += "Total estimated model params size (MB)"
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def get_memory_profile(mode: str) -> Union[Dict[str, int], Dict[int, int]]:
|
||||
"""Get a profile of the current memory usage.
|
||||
|
||||
Args:
|
||||
mode: There are two modes:
|
||||
|
||||
- 'all' means return memory for all gpus
|
||||
- 'min_max' means return memory for max and min
|
||||
|
||||
Return:
|
||||
A dictionary in which the keys are device ids as integers and
|
||||
values are memory usage as integers in MB.
|
||||
If mode is 'min_max', the dictionary will also contain two additional keys:
|
||||
|
||||
- 'min_gpu_mem': the minimum memory usage in MB
|
||||
- 'max_gpu_mem': the maximum memory usage in MB
|
||||
"""
|
||||
memory_map = get_gpu_memory_map()
|
||||
|
||||
if mode == "min_max":
|
||||
min_index, min_memory = min(memory_map.items(), key=lambda item: item[1])
|
||||
max_index, max_memory = max(memory_map.items(), key=lambda item: item[1])
|
||||
|
||||
memory_map = {"min_gpu_mem": min_memory, "max_gpu_mem": max_memory}
|
||||
|
||||
return memory_map
|
||||
|
||||
|
||||
def get_gpu_memory_map() -> Dict[str, int]:
|
||||
"""
|
||||
Get the current gpu usage.
|
||||
|
||||
Return:
|
||||
A dictionary in which the keys are device ids as integers and
|
||||
values are memory usage as integers in MB.
|
||||
"""
|
||||
result = subprocess.run(
|
||||
[shutil.which("nvidia-smi"), "--query-gpu=memory.used", "--format=csv,nounits,noheader"],
|
||||
encoding="utf-8",
|
||||
# capture_output=True, # valid for python version >=3.7
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE, # for backward compatibility with python version 3.6
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Convert lines into a dictionary
|
||||
gpu_memory = [float(x) for x in result.stdout.strip().split(os.linesep)]
|
||||
gpu_memory_map = {f"gpu_id: {gpu_id}/memory.used (MB)": memory for gpu_id, memory in enumerate(gpu_memory)}
|
||||
return gpu_memory_map
|
||||
|
||||
|
||||
def get_formatted_model_size(total_model_size: float) -> float:
|
||||
return f"{total_model_size:,.3f}"
|
||||
|
||||
|
||||
def get_human_readable_count(number: int) -> str:
|
||||
"""
|
||||
Abbreviates an integer number with K, M, B, T for thousands, millions,
|
||||
billions and trillions, respectively.
|
||||
|
||||
Examples:
|
||||
>>> get_human_readable_count(123)
|
||||
'123 '
|
||||
>>> get_human_readable_count(1234) # (one thousand)
|
||||
'1.2 K'
|
||||
>>> get_human_readable_count(2e6) # (two million)
|
||||
'2.0 M'
|
||||
>>> get_human_readable_count(3e9) # (three billion)
|
||||
'3.0 B'
|
||||
>>> get_human_readable_count(4e14) # (four hundred trillion)
|
||||
'400 T'
|
||||
>>> get_human_readable_count(5e15) # (more than trillion)
|
||||
'5,000 T'
|
||||
|
||||
Args:
|
||||
number: a positive integer number
|
||||
|
||||
Return:
|
||||
A string formatted according to the pattern described above.
|
||||
|
||||
"""
|
||||
assert number >= 0
|
||||
labels = PARAMETER_NUM_UNITS
|
||||
num_digits = int(np.floor(np.log10(number)) + 1 if number > 0 else 1)
|
||||
num_groups = int(np.ceil(num_digits / 3))
|
||||
num_groups = min(num_groups, len(labels)) # don't abbreviate beyond trillions
|
||||
shift = -3 * (num_groups - 1)
|
||||
number = number * (10 ** shift)
|
||||
index = num_groups - 1
|
||||
if index < 1 or number >= 100:
|
||||
return f"{int(number):,d} {labels[index]}"
|
||||
|
||||
return f"{number:,.1f} {labels[index]}"
|
||||
|
||||
|
||||
def _is_lazy_weight_tensor(p: Tensor) -> bool:
|
||||
if _TORCH_GREATER_EQUAL_1_8:
|
||||
from torch.nn.parameter import UninitializedParameter
|
||||
|
||||
if isinstance(p, UninitializedParameter):
|
||||
warning_cache.warn(
|
||||
"A layer with UninitializedParameter was found. "
|
||||
"Thus, the total number of parameters detected may be inaccurate."
|
||||
)
|
||||
return True
|
||||
return False
|
||||
# To support backward compatibility as LayerSummary and ModelSummary have been moved
|
||||
from pytorch_lightning.utilities.model_summary import LayerSummary, ModelSummary # noqa: E402, F401 # isort: skip
|
||||
|
|
|
@ -17,11 +17,10 @@ from typing import Any, Dict, Iterable, Mapping, Optional, Union
|
|||
import torch
|
||||
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.core import memory
|
||||
from pytorch_lightning.loggers import LightningLoggerBase, LoggerCollection, TensorBoardLogger
|
||||
from pytorch_lightning.trainer.connectors.logger_connector.result import _METRIC, MetricSource
|
||||
from pytorch_lightning.trainer.states import RunningStage, TrainerFn
|
||||
from pytorch_lightning.utilities import DeviceType
|
||||
from pytorch_lightning.utilities import DeviceType, memory
|
||||
from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device
|
||||
from pytorch_lightning.utilities.metrics import metrics_to_scalars
|
||||
from pytorch_lightning.utilities.types import _EVALUATE_OUTPUT
|
||||
|
|
|
@ -27,7 +27,6 @@ import pytorch_lightning as pl
|
|||
from pytorch_lightning.accelerators import Accelerator, IPUAccelerator
|
||||
from pytorch_lightning.callbacks import Callback
|
||||
from pytorch_lightning.core.datamodule import LightningDataModule
|
||||
from pytorch_lightning.core.memory import ModelSummary
|
||||
from pytorch_lightning.loggers import LightningLoggerBase
|
||||
from pytorch_lightning.loops import TrainingBatchLoop, TrainingEpochLoop
|
||||
from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop
|
||||
|
@ -80,6 +79,7 @@ from pytorch_lightning.utilities.distributed import distributed_available
|
|||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.imports import _fault_tolerant_enabled
|
||||
from pytorch_lightning.utilities.model_helpers import is_overridden
|
||||
from pytorch_lightning.utilities.model_summary import ModelSummary, summarize
|
||||
from pytorch_lightning.utilities.seed import reset_seed
|
||||
from pytorch_lightning.utilities.types import _EVALUATE_OUTPUT, _PREDICT_OUTPUT, EVAL_DATALOADERS, TRAIN_DATALOADERS
|
||||
|
||||
|
@ -1031,7 +1031,7 @@ class Trainer(
|
|||
# print model summary
|
||||
if self.is_global_zero and self.weights_summary is not None and not self.testing:
|
||||
max_depth = ModelSummary.MODES[self.weights_summary]
|
||||
ref_model.summarize(max_depth=max_depth)
|
||||
summarize(ref_model, max_depth=max_depth)
|
||||
|
||||
# on pretrain routine end
|
||||
self.on_pretrain_routine_end()
|
||||
|
|
|
@ -14,7 +14,10 @@
|
|||
|
||||
import gc
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import uuid
|
||||
from typing import Dict, Union
|
||||
|
||||
import torch
|
||||
from torch.nn import Module
|
||||
|
@ -92,6 +95,57 @@ def garbage_collection_cuda():
|
|||
raise
|
||||
|
||||
|
||||
def get_memory_profile(mode: str) -> Union[Dict[str, int], Dict[int, int]]:
|
||||
"""Get a profile of the current memory usage.
|
||||
|
||||
Args:
|
||||
mode: There are two modes:
|
||||
|
||||
- 'all' means return memory for all gpus
|
||||
- 'min_max' means return memory for max and min
|
||||
|
||||
Return:
|
||||
A dictionary in which the keys are device ids as integers and
|
||||
values are memory usage as integers in MB.
|
||||
If mode is 'min_max', the dictionary will also contain two additional keys:
|
||||
|
||||
- 'min_gpu_mem': the minimum memory usage in MB
|
||||
- 'max_gpu_mem': the maximum memory usage in MB
|
||||
"""
|
||||
memory_map = get_gpu_memory_map()
|
||||
|
||||
if mode == "min_max":
|
||||
min_index, min_memory = min(memory_map.items(), key=lambda item: item[1])
|
||||
max_index, max_memory = max(memory_map.items(), key=lambda item: item[1])
|
||||
|
||||
memory_map = {"min_gpu_mem": min_memory, "max_gpu_mem": max_memory}
|
||||
|
||||
return memory_map
|
||||
|
||||
|
||||
def get_gpu_memory_map() -> Dict[str, int]:
|
||||
"""
|
||||
Get the current gpu usage.
|
||||
|
||||
Return:
|
||||
A dictionary in which the keys are device ids as integers and
|
||||
values are memory usage as integers in MB.
|
||||
"""
|
||||
result = subprocess.run(
|
||||
[shutil.which("nvidia-smi"), "--query-gpu=memory.used", "--format=csv,nounits,noheader"],
|
||||
encoding="utf-8",
|
||||
# capture_output=True, # valid for python version >=3.7
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE, # for backward compatibility with python version 3.6
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Convert lines into a dictionary
|
||||
gpu_memory = [float(x) for x in result.stdout.strip().split(os.linesep)]
|
||||
gpu_memory_map = {f"gpu_id: {gpu_id}/memory.used (MB)": memory for gpu_id, memory in enumerate(gpu_memory)}
|
||||
return gpu_memory_map
|
||||
|
||||
|
||||
def get_model_size_mb(model: Module) -> float:
|
||||
"""
|
||||
Calculates the size of a Module in megabytes by saving the model to a temporary file and reading its size.
|
||||
|
|
|
@ -0,0 +1,471 @@
|
|||
# Copyright The PyTorch Lightning team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
from collections import OrderedDict
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch import Tensor
|
||||
from torch.utils.hooks import RemovableHandle
|
||||
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.utilities import AMPType, DeviceType, rank_zero_deprecation
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
|
||||
from pytorch_lightning.utilities.warnings import WarningCache
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
warning_cache = WarningCache()
|
||||
|
||||
PARAMETER_NUM_UNITS = [" ", "K", "M", "B", "T"]
|
||||
UNKNOWN_SIZE = "?"
|
||||
|
||||
|
||||
class LayerSummary:
|
||||
"""
|
||||
Summary class for a single layer in a :class:`~pytorch_lightning.core.lightning.LightningModule`.
|
||||
It collects the following information:
|
||||
|
||||
- Type of the layer (e.g. Linear, BatchNorm1d, ...)
|
||||
- Input shape
|
||||
- Output shape
|
||||
- Number of parameters
|
||||
|
||||
The input and output shapes are only known after the example input array was
|
||||
passed through the model.
|
||||
|
||||
Example::
|
||||
|
||||
>>> model = torch.nn.Conv2d(3, 8, 3)
|
||||
>>> summary = LayerSummary(model)
|
||||
>>> summary.num_parameters
|
||||
224
|
||||
>>> summary.layer_type
|
||||
'Conv2d'
|
||||
>>> output = model(torch.rand(1, 3, 5, 5))
|
||||
>>> summary.in_size
|
||||
[1, 3, 5, 5]
|
||||
>>> summary.out_size
|
||||
[1, 8, 3, 3]
|
||||
|
||||
Args:
|
||||
module: A module to summarize
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, module: nn.Module):
|
||||
super().__init__()
|
||||
self._module = module
|
||||
self._hook_handle = self._register_hook()
|
||||
self._in_size = None
|
||||
self._out_size = None
|
||||
|
||||
def __del__(self):
|
||||
self.detach_hook()
|
||||
|
||||
def _register_hook(self) -> Optional[RemovableHandle]:
|
||||
"""
|
||||
Registers a hook on the module that computes the input- and output size(s) on the first forward pass.
|
||||
If the hook is called, it will remove itself from the from the module, meaning that
|
||||
recursive models will only record their input- and output shapes once.
|
||||
Registering hooks on :class:`~torch.jit.ScriptModule` is not supported.
|
||||
|
||||
Return:
|
||||
A handle for the installed hook, or ``None`` if registering the hook is not possible.
|
||||
"""
|
||||
|
||||
def hook(module, inp, out):
|
||||
if len(inp) == 1:
|
||||
inp = inp[0]
|
||||
self._in_size = parse_batch_shape(inp)
|
||||
self._out_size = parse_batch_shape(out)
|
||||
self._hook_handle.remove()
|
||||
|
||||
handle = None
|
||||
if not isinstance(self._module, torch.jit.ScriptModule):
|
||||
handle = self._module.register_forward_hook(hook)
|
||||
return handle
|
||||
|
||||
def detach_hook(self):
|
||||
"""
|
||||
Removes the forward hook if it was not already removed in the forward pass.
|
||||
Will be called after the summary is created.
|
||||
"""
|
||||
if self._hook_handle is not None:
|
||||
self._hook_handle.remove()
|
||||
|
||||
@property
|
||||
def in_size(self) -> Union[str, List]:
|
||||
return self._in_size or UNKNOWN_SIZE
|
||||
|
||||
@property
|
||||
def out_size(self) -> Union[str, List]:
|
||||
return self._out_size or UNKNOWN_SIZE
|
||||
|
||||
@property
|
||||
def layer_type(self) -> str:
|
||||
"""Returns the class name of the module."""
|
||||
return str(self._module.__class__.__name__)
|
||||
|
||||
@property
|
||||
def num_parameters(self) -> int:
|
||||
"""Returns the number of parameters in this module."""
|
||||
return sum(np.prod(p.shape) if not _is_lazy_weight_tensor(p) else 0 for p in self._module.parameters())
|
||||
|
||||
|
||||
class ModelSummary:
|
||||
"""
|
||||
Generates a summary of all layers in a :class:`~pytorch_lightning.core.lightning.LightningModule`.
|
||||
|
||||
Args:
|
||||
model: The model to summarize (also referred to as the root module).
|
||||
mode: Can be one of
|
||||
|
||||
- `top` (default): only the top-level modules will be recorded (the children of the root module)
|
||||
- `full`: summarizes all layers and their submodules in the root module
|
||||
|
||||
.. deprecated:: v1.4
|
||||
This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6.
|
||||
|
||||
max_depth: Maximum depth of modules to show. Use -1 to show all modules or 0 to show no
|
||||
summary. Defaults to 1.
|
||||
|
||||
The string representation of this summary prints a table with columns containing
|
||||
the name, type and number of parameters for each layer.
|
||||
|
||||
The root module may also have an attribute ``example_input_array`` as shown in the example below.
|
||||
If present, the root module will be called with it as input to determine the
|
||||
intermediate input- and output shapes of all layers. Supported are tensors and
|
||||
nested lists and tuples of tensors. All other types of inputs will be skipped and show as `?`
|
||||
in the summary table. The summary will also display `?` for layers not used in the forward pass.
|
||||
|
||||
Example::
|
||||
|
||||
>>> import pytorch_lightning as pl
|
||||
>>> class LitModel(pl.LightningModule):
|
||||
...
|
||||
... def __init__(self):
|
||||
... super().__init__()
|
||||
... self.net = nn.Sequential(nn.Linear(256, 512), nn.BatchNorm1d(512))
|
||||
... self.example_input_array = torch.zeros(10, 256) # optional
|
||||
...
|
||||
... def forward(self, x):
|
||||
... return self.net(x)
|
||||
...
|
||||
>>> model = LitModel()
|
||||
>>> ModelSummary(model, max_depth=1) # doctest: +NORMALIZE_WHITESPACE
|
||||
| Name | Type | Params | In sizes | Out sizes
|
||||
------------------------------------------------------------
|
||||
0 | net | Sequential | 132 K | [10, 256] | [10, 512]
|
||||
------------------------------------------------------------
|
||||
132 K Trainable params
|
||||
0 Non-trainable params
|
||||
132 K Total params
|
||||
0.530 Total estimated model params size (MB)
|
||||
>>> ModelSummary(model, max_depth=-1) # doctest: +NORMALIZE_WHITESPACE
|
||||
| Name | Type | Params | In sizes | Out sizes
|
||||
--------------------------------------------------------------
|
||||
0 | net | Sequential | 132 K | [10, 256] | [10, 512]
|
||||
1 | net.0 | Linear | 131 K | [10, 256] | [10, 512]
|
||||
2 | net.1 | BatchNorm1d | 1.0 K | [10, 512] | [10, 512]
|
||||
--------------------------------------------------------------
|
||||
132 K Trainable params
|
||||
0 Non-trainable params
|
||||
132 K Total params
|
||||
0.530 Total estimated model params size (MB)
|
||||
"""
|
||||
|
||||
MODES = dict(top=1, full=-1) # TODO: remove in v1.6
|
||||
|
||||
def __init__(self, model, mode: Optional[str] = None, max_depth: Optional[int] = 1):
|
||||
self._model = model
|
||||
|
||||
# temporary mapping from mode to max_depth
|
||||
if max_depth is None or mode is not None:
|
||||
if mode in ModelSummary.MODES:
|
||||
max_depth = ModelSummary.MODES[mode]
|
||||
rank_zero_deprecation(
|
||||
f"Argument `mode` in `ModelSummary` is deprecated in v1.4"
|
||||
f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behaviour."
|
||||
)
|
||||
else:
|
||||
raise MisconfigurationException(f"`mode` can be {', '.join(ModelSummary.MODES)}, got {mode}.")
|
||||
|
||||
if not isinstance(max_depth, int) or max_depth < -1:
|
||||
raise ValueError(f"`max_depth` can be -1, 0 or > 0, got {max_depth}.")
|
||||
|
||||
self._max_depth = max_depth
|
||||
self._layer_summary = self.summarize()
|
||||
# 1 byte -> 8 bits
|
||||
# TODO: how do we compute precisin_megabytes in case of mixed precision?
|
||||
precision = self._model.precision if isinstance(self._model.precision, int) else 32
|
||||
self._precision_megabytes = (precision / 8.0) * 1e-6
|
||||
|
||||
@property
|
||||
def named_modules(self) -> List[Tuple[str, nn.Module]]:
|
||||
if self._max_depth == 0:
|
||||
mods = []
|
||||
elif self._max_depth == 1:
|
||||
# the children are the top-level modules
|
||||
mods = self._model.named_children()
|
||||
else:
|
||||
mods = self._model.named_modules()
|
||||
mods = list(mods)[1:] # do not include root module (LightningModule)
|
||||
return list(mods)
|
||||
|
||||
@property
|
||||
def layer_names(self) -> List[str]:
|
||||
return list(self._layer_summary.keys())
|
||||
|
||||
@property
|
||||
def layer_types(self) -> List[str]:
|
||||
return [layer.layer_type for layer in self._layer_summary.values()]
|
||||
|
||||
@property
|
||||
def in_sizes(self) -> List:
|
||||
return [layer.in_size for layer in self._layer_summary.values()]
|
||||
|
||||
@property
|
||||
def out_sizes(self) -> List:
|
||||
return [layer.out_size for layer in self._layer_summary.values()]
|
||||
|
||||
@property
|
||||
def param_nums(self) -> List[int]:
|
||||
return [layer.num_parameters for layer in self._layer_summary.values()]
|
||||
|
||||
@property
|
||||
def total_parameters(self) -> int:
|
||||
return sum(p.numel() if not _is_lazy_weight_tensor(p) else 0 for p in self._model.parameters())
|
||||
|
||||
@property
|
||||
def trainable_parameters(self) -> int:
|
||||
return sum(
|
||||
p.numel() if not _is_lazy_weight_tensor(p) else 0 for p in self._model.parameters() if p.requires_grad
|
||||
)
|
||||
|
||||
@property
|
||||
def model_size(self) -> float:
|
||||
# todo: seems it does not work with quantized models - it returns 0.0
|
||||
return self.total_parameters * self._precision_megabytes
|
||||
|
||||
def summarize(self) -> Dict[str, LayerSummary]:
|
||||
summary = OrderedDict((name, LayerSummary(module)) for name, module in self.named_modules)
|
||||
if self._model.example_input_array is not None:
|
||||
self._forward_example_input()
|
||||
for layer in summary.values():
|
||||
layer.detach_hook()
|
||||
|
||||
if self._max_depth >= 1:
|
||||
# remove summary entries with depth > max_depth
|
||||
for k in [k for k in summary if k.count(".") >= self._max_depth]:
|
||||
del summary[k]
|
||||
|
||||
return summary
|
||||
|
||||
def _forward_example_input(self) -> None:
|
||||
"""Run the example input through each layer to get input- and output sizes."""
|
||||
model = self._model
|
||||
trainer = self._model.trainer
|
||||
|
||||
input_ = model.example_input_array
|
||||
input_ = model._apply_batch_transfer_handler(input_)
|
||||
|
||||
if trainer is not None and trainer.amp_backend == AMPType.NATIVE and trainer._device_type != DeviceType.TPU:
|
||||
model.forward = torch.cuda.amp.autocast()(model.forward)
|
||||
|
||||
mode = model.training
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
# let the model hooks collect the input- and output shapes
|
||||
if isinstance(input_, (list, tuple)):
|
||||
model(*input_)
|
||||
elif isinstance(input_, dict):
|
||||
model(**input_)
|
||||
else:
|
||||
model(input_)
|
||||
model.train(mode) # restore mode of module
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
Makes a summary listing with:
|
||||
|
||||
Layer Name, Layer Type, Number of Parameters, Input Sizes, Output Sizes, Model Size
|
||||
"""
|
||||
arrays = [
|
||||
[" ", list(map(str, range(len(self._layer_summary))))],
|
||||
["Name", self.layer_names],
|
||||
["Type", self.layer_types],
|
||||
["Params", list(map(get_human_readable_count, self.param_nums))],
|
||||
]
|
||||
if self._model.example_input_array is not None:
|
||||
arrays.append(["In sizes", self.in_sizes])
|
||||
arrays.append(["Out sizes", self.out_sizes])
|
||||
total_parameters = self.total_parameters
|
||||
trainable_parameters = self.trainable_parameters
|
||||
model_size = self.model_size
|
||||
|
||||
return _format_summary_table(total_parameters, trainable_parameters, model_size, *arrays)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
|
||||
def parse_batch_shape(batch: Any) -> Union[str, List]:
|
||||
if hasattr(batch, "shape"):
|
||||
return list(batch.shape)
|
||||
|
||||
if isinstance(batch, (list, tuple)):
|
||||
shape = [parse_batch_shape(el) for el in batch]
|
||||
return shape
|
||||
|
||||
return UNKNOWN_SIZE
|
||||
|
||||
|
||||
def _format_summary_table(total_parameters: int, trainable_parameters: int, model_size: float, *cols) -> str:
|
||||
"""
|
||||
Takes in a number of arrays, each specifying a column in
|
||||
the summary table, and combines them all into one big
|
||||
string defining the summary table that are nicely formatted.
|
||||
"""
|
||||
n_rows = len(cols[0][1])
|
||||
n_cols = 1 + len(cols)
|
||||
|
||||
# Get formatting width of each column
|
||||
col_widths = []
|
||||
for c in cols:
|
||||
col_width = max(len(str(a)) for a in c[1]) if n_rows else 0
|
||||
col_width = max(col_width, len(c[0])) # minimum length is header length
|
||||
col_widths.append(col_width)
|
||||
|
||||
# Formatting
|
||||
s = "{:<{}}"
|
||||
total_width = sum(col_widths) + 3 * n_cols
|
||||
header = [s.format(c[0], l) for c, l in zip(cols, col_widths)]
|
||||
|
||||
# Summary = header + divider + Rest of table
|
||||
summary = " | ".join(header) + "\n" + "-" * total_width
|
||||
for i in range(n_rows):
|
||||
line = []
|
||||
for c, l in zip(cols, col_widths):
|
||||
line.append(s.format(str(c[1][i]), l))
|
||||
summary += "\n" + " | ".join(line)
|
||||
summary += "\n" + "-" * total_width
|
||||
|
||||
summary += "\n" + s.format(get_human_readable_count(trainable_parameters), 10)
|
||||
summary += "Trainable params"
|
||||
summary += "\n" + s.format(get_human_readable_count(total_parameters - trainable_parameters), 10)
|
||||
summary += "Non-trainable params"
|
||||
summary += "\n" + s.format(get_human_readable_count(total_parameters), 10)
|
||||
summary += "Total params"
|
||||
summary += "\n" + s.format(get_formatted_model_size(model_size), 10)
|
||||
summary += "Total estimated model params size (MB)"
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def get_formatted_model_size(total_model_size: float) -> float:
|
||||
return f"{total_model_size:,.3f}"
|
||||
|
||||
|
||||
def get_human_readable_count(number: int) -> str:
|
||||
"""
|
||||
Abbreviates an integer number with K, M, B, T for thousands, millions,
|
||||
billions and trillions, respectively.
|
||||
|
||||
Examples:
|
||||
>>> get_human_readable_count(123)
|
||||
'123 '
|
||||
>>> get_human_readable_count(1234) # (one thousand)
|
||||
'1.2 K'
|
||||
>>> get_human_readable_count(2e6) # (two million)
|
||||
'2.0 M'
|
||||
>>> get_human_readable_count(3e9) # (three billion)
|
||||
'3.0 B'
|
||||
>>> get_human_readable_count(4e14) # (four hundred trillion)
|
||||
'400 T'
|
||||
>>> get_human_readable_count(5e15) # (more than trillion)
|
||||
'5,000 T'
|
||||
|
||||
Args:
|
||||
number: a positive integer number
|
||||
|
||||
Return:
|
||||
A string formatted according to the pattern described above.
|
||||
|
||||
"""
|
||||
assert number >= 0
|
||||
labels = PARAMETER_NUM_UNITS
|
||||
num_digits = int(np.floor(np.log10(number)) + 1 if number > 0 else 1)
|
||||
num_groups = int(np.ceil(num_digits / 3))
|
||||
num_groups = min(num_groups, len(labels)) # don't abbreviate beyond trillions
|
||||
shift = -3 * (num_groups - 1)
|
||||
number = number * (10 ** shift)
|
||||
index = num_groups - 1
|
||||
if index < 1 or number >= 100:
|
||||
return f"{int(number):,d} {labels[index]}"
|
||||
|
||||
return f"{number:,.1f} {labels[index]}"
|
||||
|
||||
|
||||
def _is_lazy_weight_tensor(p: Tensor) -> bool:
|
||||
if _TORCH_GREATER_EQUAL_1_8:
|
||||
from torch.nn.parameter import UninitializedParameter
|
||||
|
||||
if isinstance(p, UninitializedParameter):
|
||||
warning_cache.warn(
|
||||
"A layer with UninitializedParameter was found. "
|
||||
"Thus, the total number of parameters detected may be inaccurate."
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def summarize(
|
||||
lightning_module: "pl.LightningModule", mode: Optional[str] = "top", max_depth: Optional[int] = None
|
||||
) -> Optional[ModelSummary]:
|
||||
"""
|
||||
Summarize the LightningModule specified by `lightning_module`.
|
||||
|
||||
Args:
|
||||
lightning_module: `LightningModule` to summarize.
|
||||
mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers).
|
||||
|
||||
.. deprecated:: v1.4
|
||||
This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6.
|
||||
|
||||
max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the
|
||||
layer summary off. Default: 1.
|
||||
|
||||
Return:
|
||||
The model summary object
|
||||
"""
|
||||
|
||||
# temporary mapping from mode to max_depth
|
||||
if max_depth is None:
|
||||
if mode in ModelSummary.MODES:
|
||||
max_depth = ModelSummary.MODES[mode]
|
||||
rank_zero_deprecation(
|
||||
f"Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"
|
||||
f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behavior."
|
||||
)
|
||||
model_summary = ModelSummary(lightning_module, max_depth=max_depth)
|
||||
elif mode is not None:
|
||||
raise MisconfigurationException(f"`mode` can be None, {', '.join(ModelSummary.MODES)}, got {mode}")
|
||||
else:
|
||||
model_summary = ModelSummary(lightning_module, max_depth=max_depth)
|
||||
log.info("\n" + str(model_summary))
|
||||
return model_summary
|
|
@ -14,8 +14,8 @@
|
|||
import tests.helpers.pipelines as tpipes
|
||||
import tests.helpers.utils as tutils
|
||||
from pytorch_lightning.callbacks import EarlyStopping
|
||||
from pytorch_lightning.core import memory
|
||||
from pytorch_lightning.trainer import Trainer
|
||||
from pytorch_lightning.utilities import memory
|
||||
from tests.helpers import BoringModel
|
||||
from tests.helpers.datamodules import ClassifDataModule
|
||||
from tests.helpers.runif import RunIf
|
||||
|
|
|
@ -21,7 +21,7 @@ import tests.helpers.pipelines as tpipes
|
|||
import tests.helpers.utils as tutils
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import EarlyStopping
|
||||
from pytorch_lightning.core import memory
|
||||
from pytorch_lightning.utilities import memory
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from tests.helpers import BoringModel, RandomDataset
|
||||
from tests.helpers.datamodules import ClassifDataModule
|
||||
|
|
|
@ -17,10 +17,10 @@ import pytest
|
|||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import ModelCheckpoint
|
||||
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
|
||||
from pytorch_lightning.core.memory import ModelSummary
|
||||
from pytorch_lightning.plugins.training_type import DDPPlugin, DDPSpawnPlugin
|
||||
from pytorch_lightning.utilities.distributed import rank_zero_deprecation, rank_zero_warn
|
||||
from pytorch_lightning.utilities.model_helpers import is_overridden
|
||||
from pytorch_lightning.utilities.model_summary import ModelSummary
|
||||
from tests.deprecated_api import _soft_unimport_module
|
||||
from tests.helpers import BoringDataModule, BoringModel
|
||||
|
||||
|
|
|
@ -12,11 +12,34 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" Test deprecated functionality which will be removed in v1.7.0 """
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.deprecated_api import _soft_unimport_module
|
||||
from tests.helpers import BoringModel
|
||||
|
||||
|
||||
def test_v1_7_0_deprecated_lightning_module_summarize(tmpdir):
|
||||
from pytorch_lightning.core.lightning import warning_cache
|
||||
|
||||
model = BoringModel()
|
||||
model.summarize(max_depth=1)
|
||||
assert any("The `LightningModule.summarize` method is deprecated in v1.5" in w for w in warning_cache)
|
||||
warning_cache.clear()
|
||||
|
||||
|
||||
def test_v1_7_0_moved_model_summary_and_layer_summary(tmpdir):
|
||||
_soft_unimport_module("pytorch_lightning.core.memory")
|
||||
with pytest.deprecated_call(match="to `pytorch_lightning.utilities.model_summary` since v1.5"):
|
||||
from pytorch_lightning.core.memory import LayerSummary, ModelSummary # noqa: F401
|
||||
|
||||
|
||||
def test_v1_7_0_moved_get_memory_profile_and_get_gpu_memory_map(tmpdir):
|
||||
_soft_unimport_module("pytorch_lightning.core.memory")
|
||||
with pytest.deprecated_call(match="to `pytorch_lightning.utilities.memory` since v1.5"):
|
||||
from pytorch_lightning.core.memory import get_gpu_memory_map, get_memory_profile # noqa: F401
|
||||
|
||||
|
||||
def test_v1_7_0_deprecated_model_size():
|
||||
model = BoringModel()
|
||||
with pytest.deprecated_call(
|
||||
|
|
|
@ -16,9 +16,9 @@ import torch
|
|||
import torch.nn as nn
|
||||
|
||||
from pytorch_lightning import LightningModule, Trainer
|
||||
from pytorch_lightning.core.memory import ModelSummary, UNKNOWN_SIZE
|
||||
from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_9
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.model_summary import ModelSummary, summarize, UNKNOWN_SIZE
|
||||
from tests.helpers import BoringModel
|
||||
from tests.helpers.advanced_models import ParityModuleRNN
|
||||
from tests.helpers.runif import RunIf
|
||||
|
@ -140,7 +140,7 @@ class DeepNestedModel(LightningModule):
|
|||
def test_invalid_weights_summmary():
|
||||
"""Test that invalid value for weights_summary raises an error."""
|
||||
with pytest.raises(MisconfigurationException, match="`mode` can be None, .* got temp"):
|
||||
UnorderedModel().summarize(mode="temp")
|
||||
summarize(UnorderedModel, mode="temp")
|
||||
|
||||
with pytest.raises(MisconfigurationException, match="`weights_summary` can be None, .* got temp"):
|
||||
Trainer(weights_summary="temp")
|
||||
|
@ -150,7 +150,7 @@ def test_invalid_weights_summmary():
|
|||
def test_empty_model_summary_shapes(mode: str):
|
||||
"""Test that the summary works for models that have no submodules."""
|
||||
model = EmptyModule()
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert summary.in_sizes == []
|
||||
assert summary.out_sizes == []
|
||||
assert summary.param_nums == []
|
||||
|
@ -166,7 +166,7 @@ def test_linear_model_summary_shapes(device, mode):
|
|||
"""Test that the model summary correctly computes the input- and output shapes."""
|
||||
model = UnorderedModel().to(device)
|
||||
model.train()
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert summary.in_sizes == [[2, 10], [2, 7], [2, 3], [2, 7], UNKNOWN_SIZE] # layer 2 # combine # layer 1 # relu
|
||||
assert summary.out_sizes == [[2, 2], [2, 9], [2, 5], [2, 7], UNKNOWN_SIZE] # layer 2 # combine # layer 1 # relu
|
||||
assert model.training
|
||||
|
@ -176,7 +176,7 @@ def test_linear_model_summary_shapes(device, mode):
|
|||
def test_mixed_dtype_model_summary():
|
||||
"""Test that the model summary works with models that have mixed input- and parameter dtypes."""
|
||||
model = MixedDtypeModel()
|
||||
summary = model.summarize()
|
||||
summary = summarize(model)
|
||||
assert summary.in_sizes == [[2, 3], [2, 3, 20]] # embed # reduce
|
||||
assert summary.out_sizes == [[2, 3, 20], [2, 3, 1]] # embed # reduce
|
||||
|
||||
|
@ -205,7 +205,7 @@ def test_rnn_summary_shapes(mode):
|
|||
|
||||
model.example_input_array = torch.zeros(b, t, 10)
|
||||
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert summary.in_sizes == [[b, t, i], [b, t, h]] # rnn # linear
|
||||
assert summary.out_sizes == [[[b, t, h], [[1, b, h], [1, b, h]]], [b, t, o]] # rnn # linear
|
||||
|
||||
|
@ -214,7 +214,7 @@ def test_rnn_summary_shapes(mode):
|
|||
def test_summary_parameter_count(mode):
|
||||
"""Test that the summary counts the number of parameters in every submodule."""
|
||||
model = UnorderedModel()
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert summary.param_nums == [
|
||||
model.layer2.weight.numel() + model.layer2.bias.numel(),
|
||||
model.combine.weight.numel() + model.combine.bias.numel(),
|
||||
|
@ -228,14 +228,14 @@ def test_summary_parameter_count(mode):
|
|||
def test_summary_layer_types(mode):
|
||||
"""Test that the summary displays the layer names correctly."""
|
||||
model = UnorderedModel()
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert summary.layer_types == ["Linear", "Linear", "Linear", "ReLU", "Conv2d"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", ["full", "top"])
|
||||
def test_summary_with_scripted_modules(mode):
|
||||
model = PartialScriptModel()
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert summary.layer_types == ["RecursiveScriptModule", "Linear"]
|
||||
assert summary.in_sizes == [UNKNOWN_SIZE, [2, 3]]
|
||||
assert summary.out_sizes == [UNKNOWN_SIZE, [2, 2]]
|
||||
|
@ -272,7 +272,7 @@ def test_example_input_array_types(example_input, expected_size, mode):
|
|||
|
||||
model = DummyLightningModule()
|
||||
model.example_input_array = example_input
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert summary.in_sizes == [expected_size]
|
||||
|
||||
|
||||
|
@ -280,7 +280,7 @@ def test_example_input_array_types(example_input, expected_size, mode):
|
|||
def test_model_size(mode):
|
||||
"""Test model size is calculated correctly."""
|
||||
model = PreCalculatedModel()
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert model.pre_calculated_model_size == summary.model_size
|
||||
|
||||
|
||||
|
@ -288,7 +288,7 @@ def test_model_size(mode):
|
|||
def test_empty_model_size(mode):
|
||||
"""Test empty model size is zero."""
|
||||
model = EmptyModule()
|
||||
summary = model.summarize(mode=mode)
|
||||
summary = summarize(model, mode=mode)
|
||||
assert 0.0 == summary.model_size
|
||||
|
||||
|
||||
|
@ -300,7 +300,7 @@ def test_model_size_precision(tmpdir):
|
|||
# fit model
|
||||
trainer = Trainer(default_root_dir=tmpdir, gpus=1, max_steps=1, max_epochs=1, precision=32)
|
||||
trainer.fit(model)
|
||||
summary = model.summarize()
|
||||
summary = summarize(model)
|
||||
assert model.pre_calculated_model_size == summary.model_size
|
||||
|
||||
|
||||
|
@ -326,15 +326,15 @@ def test_lazy_model_summary():
|
|||
|
||||
|
||||
def test_max_depth_equals_mode_interface():
|
||||
"""Test model.summarize(full/top) interface mapping matches max_depth"""
|
||||
"""Test summarize(model, full/top) interface mapping matches max_depth"""
|
||||
model = DeepNestedModel()
|
||||
|
||||
summary_top = model.summarize(mode="top")
|
||||
summary_0 = model.summarize(max_depth=1)
|
||||
summary_top = summarize(model, mode="top")
|
||||
summary_0 = summarize(model, max_depth=1)
|
||||
assert str(summary_top) == str(summary_0)
|
||||
|
||||
summary_full = model.summarize(mode="full")
|
||||
summary_minus1 = model.summarize(max_depth=-1)
|
||||
summary_full = summarize(model, mode="full")
|
||||
summary_minus1 = summarize(model, max_depth=-1)
|
||||
assert str(summary_full) == str(summary_minus1)
|
||||
|
||||
|
||||
|
@ -351,4 +351,4 @@ def test_max_depth_param(max_depth):
|
|||
@pytest.mark.parametrize("max_depth", [-99, -2, "invalid"])
|
||||
def test_raise_invalid_max_depth_value(max_depth):
|
||||
with pytest.raises(ValueError, match=f"`max_depth` can be -1, 0 or > 0, got {max_depth}"):
|
||||
DeepNestedModel().summarize(max_depth=max_depth)
|
||||
summarize(DeepNestedModel(), max_depth=max_depth)
|
Loading…
Reference in New Issue