Param printing (#336)
* print thousands as K, M, B, T, ... * add option to print top-level modules only * added doc string and added spacing * do not print summary if neither "full" nor "top" * updated docs showing summary print options * fix line length for travis
This commit is contained in:
parent
ff2a21a08a
commit
6e3e740a7f
|
@ -38,6 +38,14 @@ trainer = Trainer(overfit_pct=0.01)
|
|||
#### Print the parameter count by layer
|
||||
By default lightning prints a list of parameters *and submodules* when it starts training.
|
||||
|
||||
``` {.python}
|
||||
# DEFAULT print a full list of all submodules and their parameters.
|
||||
trainer = Trainer(weights_summary='full')
|
||||
|
||||
# only print the top-level modules (i.e. the children of LightningModule).
|
||||
trainer = Trainer(weights_summary='top')
|
||||
```
|
||||
|
||||
---
|
||||
#### Print which gradients are nan
|
||||
This option prints a list of tensors with nan gradients.
|
||||
|
|
|
@ -12,11 +12,12 @@ import pandas as pd
|
|||
|
||||
class ModelSummary(object):
|
||||
|
||||
def __init__(self, model):
|
||||
def __init__(self, model, mode='full'):
|
||||
'''
|
||||
Generates summaries of model layers and dimensions.
|
||||
'''
|
||||
self.model = model
|
||||
self.mode = mode
|
||||
self.in_sizes = []
|
||||
self.out_sizes = []
|
||||
|
||||
|
@ -28,9 +29,20 @@ class ModelSummary(object):
|
|||
def __repr__(self):
|
||||
return self.summary.__str__()
|
||||
|
||||
def named_modules(self):
|
||||
if self.mode == 'full':
|
||||
mods = self.model.named_modules()
|
||||
mods = list(mods)[1:] # do not include root module (LightningModule)
|
||||
elif self.mode == 'top':
|
||||
# the children are the top-level modules
|
||||
mods = self.model.named_children()
|
||||
else:
|
||||
mods = []
|
||||
return list(mods)
|
||||
|
||||
def get_variable_sizes(self):
|
||||
'''Run sample input through each layer to get output sizes'''
|
||||
mods = list(self.model.modules())
|
||||
mods = self.named_modules()
|
||||
in_sizes = []
|
||||
out_sizes = []
|
||||
input_ = self.model.example_input_array
|
||||
|
@ -43,8 +55,7 @@ class ModelSummary(object):
|
|||
|
||||
with torch.no_grad():
|
||||
|
||||
for i in range(1, len(mods)):
|
||||
m = mods[i]
|
||||
for _, m in mods:
|
||||
if type(input_) is list or type(input_) is tuple: # pragma: no cover
|
||||
out = m(*input_)
|
||||
else:
|
||||
|
@ -72,16 +83,17 @@ class ModelSummary(object):
|
|||
|
||||
self.in_sizes = in_sizes
|
||||
self.out_sizes = out_sizes
|
||||
assert len(in_sizes) == len(out_sizes)
|
||||
return
|
||||
|
||||
def get_layer_names(self):
|
||||
'''Collect Layer Names'''
|
||||
mods = list(self.model.named_modules())
|
||||
mods = self.named_modules()
|
||||
names = []
|
||||
layers = []
|
||||
for m in mods[1:]:
|
||||
names += [m[0]]
|
||||
layers += [str(m[1].__class__)]
|
||||
for name, m in mods:
|
||||
names += [name]
|
||||
layers += [str(m.__class__)]
|
||||
|
||||
layer_types = [x.split('.')[-1][:-2] for x in layers]
|
||||
|
||||
|
@ -91,11 +103,9 @@ class ModelSummary(object):
|
|||
|
||||
def get_parameter_sizes(self):
|
||||
'''Get sizes of all parameters in `model`'''
|
||||
mods = list(self.model.modules())
|
||||
mods = self.named_modules()
|
||||
sizes = []
|
||||
|
||||
for i in range(1, len(mods)):
|
||||
m = mods[i]
|
||||
for _, m in mods:
|
||||
p = list(m.parameters())
|
||||
modsz = []
|
||||
for j in range(len(p)):
|
||||
|
@ -133,6 +143,7 @@ class ModelSummary(object):
|
|||
df['Name'] = self.layer_names
|
||||
df['Type'] = self.layer_types
|
||||
df['Params'] = self.param_nums
|
||||
df['Params'] = df['Params'].map(get_human_readable_count)
|
||||
|
||||
if self.model.example_input_array is not None:
|
||||
|
||||
|
@ -226,3 +237,28 @@ def get_gpu_memory_map():
|
|||
k = f'gpu_{k}'
|
||||
gpu_memory_map[k] = v
|
||||
return gpu_memory_map
|
||||
|
||||
|
||||
def get_human_readable_count(number):
|
||||
"""
|
||||
Abbreviates an integer number with K, M, B, T for thousands, millions,
|
||||
billions and trillions, respectively.
|
||||
Examples:
|
||||
123 -> 123
|
||||
1234 -> 1 K (one thousand)
|
||||
2e6 -> 2 M (two million)
|
||||
3e9 -> 3 B (three billion)
|
||||
4e12 -> 4 T (four trillion)
|
||||
5e15 -> 5,000 T
|
||||
:param number: a positive integer number
|
||||
:returns a string formatted according to the pattern described above.
|
||||
"""
|
||||
assert number >= 0
|
||||
labels = [' ', 'K', 'M', 'B', 'T']
|
||||
num_digits = int(np.floor(np.log10(number)) + 1 if number > 0 else 1)
|
||||
num_groups = int(np.ceil(num_digits / 3))
|
||||
num_groups = min(num_groups, len(labels)) # don't abbreviate beyond trillions
|
||||
shift = -3 * (num_groups - 1)
|
||||
number = number * (10 ** shift)
|
||||
index = num_groups - 1
|
||||
return f'{int(number):,d} {labels[index]}'
|
||||
|
|
|
@ -159,8 +159,8 @@ class LightningModule(GradInformation, ModelIO, ModelHooks):
|
|||
|
||||
return model
|
||||
|
||||
def summarize(self):
|
||||
model_summary = ModelSummary(self)
|
||||
def summarize(self, mode):
|
||||
model_summary = ModelSummary(self, mode=mode)
|
||||
print(model_summary)
|
||||
|
||||
def freeze(self):
|
||||
|
|
|
@ -84,7 +84,7 @@ class Trainer(TrainerIO):
|
|||
distributed_backend=None,
|
||||
use_amp=False,
|
||||
print_nan_grads=False,
|
||||
print_weights_summary=True,
|
||||
weights_summary='full',
|
||||
weights_save_path=None,
|
||||
amp_level='O1',
|
||||
nb_sanity_val_steps=5):
|
||||
|
@ -116,7 +116,7 @@ class Trainer(TrainerIO):
|
|||
:param distributed_backend: str. Options: 'dp', 'ddp', 'ddp2'.
|
||||
:param use_amp: Bool. If true uses apex for 16bit precision
|
||||
:param print_nan_grads: Bool. Prints nan gradients
|
||||
:param print_weights_summary: Bool. Prints summary of weights
|
||||
:param weights_summary: str. Options: 'full', 'top'.
|
||||
:param weights_save_path: Bool. Where to save weights if on cluster
|
||||
:param amp_level: str. Check nvidia docs for level
|
||||
:param nb_sanity_val_steps: int. How many val steps before a full train loop.
|
||||
|
@ -131,7 +131,7 @@ class Trainer(TrainerIO):
|
|||
self.fast_dev_run = fast_dev_run
|
||||
self.on_gpu = gpus is not None and torch.cuda.is_available()
|
||||
self.process_position = process_position
|
||||
self.print_weights_summary = print_weights_summary
|
||||
self.weights_summary = weights_summary
|
||||
self.max_nb_epochs = max_nb_epochs
|
||||
self.min_nb_epochs = min_nb_epochs
|
||||
self.nb_sanity_val_steps = nb_sanity_val_steps
|
||||
|
@ -981,8 +981,8 @@ class Trainer(TrainerIO):
|
|||
self.__layout_bookeeping()
|
||||
|
||||
# print model summary
|
||||
if self.proc_rank == 0 and self.print_weights_summary:
|
||||
ref_model.summarize()
|
||||
if self.proc_rank == 0 and self.weights_summary in ['full', 'top']:
|
||||
ref_model.summarize(mode=self.weights_summary)
|
||||
|
||||
# link up experiment object
|
||||
if self.logger is not None:
|
||||
|
|
Loading…
Reference in New Issue