Param printing (#336)

* print thousands as K, M, B, T, ... * add option to print top-level modules only * added doc string and added spacing * do not print summary if neither "full" nor "top" * updated docs showing summary print options * fix line length for travis
2019-10-08 21:30:06 +02:00 · 2019-10-08 21:30:06 +02:00 · 6e3e740a7f
parent ff2a21a08a
commit 6e3e740a7f
4 changed files with 63 additions and 19 deletions
--- a/docs/Trainer/debugging.md
+++ b/docs/Trainer/debugging.md
@ -38,6 +38,14 @@ trainer = Trainer(overfit_pct=0.01)
 #### Print the parameter count by layer
 By default lightning prints a list of parameters *and submodules* when it starts training.

+``` {.python}
+# DEFAULT print a full list of all submodules and their parameters.
+trainer = Trainer(weights_summary='full')
+
+# only print the top-level modules (i.e. the children of LightningModule).
+trainer = Trainer(weights_summary='top')
+```
+
 ---
 #### Print which gradients are nan 
 This option prints a list of tensors with nan gradients.
--- a/pytorch_lightning/root_module/memory.py
+++ b/pytorch_lightning/root_module/memory.py
@ -12,11 +12,12 @@ import pandas as pd

 class ModelSummary(object):

-    def __init__(self, model):
+    def __init__(self, model, mode='full'):
        '''
        Generates summaries of model layers and dimensions.
        '''
        self.model = model
+        self.mode = mode
        self.in_sizes = []
        self.out_sizes = []

@ -28,9 +29,20 @@ class ModelSummary(object):
    def __repr__(self):
        return self.summary.__str__()

+    def named_modules(self):
+        if self.mode == 'full':
+            mods = self.model.named_modules()
+            mods = list(mods)[1:]  # do not include root module (LightningModule)
+        elif self.mode == 'top':
+            # the children are the top-level modules
+            mods = self.model.named_children()
+        else:
+            mods = []
+        return list(mods)
+
    def get_variable_sizes(self):
        '''Run sample input through each layer to get output sizes'''
-        mods = list(self.model.modules())
+        mods = self.named_modules()
        in_sizes = []
        out_sizes = []
        input_ = self.model.example_input_array
@ -43,8 +55,7 @@ class ModelSummary(object):

        with torch.no_grad():

-            for i in range(1, len(mods)):
-                m = mods[i]
+            for _, m in mods:
                if type(input_) is list or type(input_) is tuple:  # pragma: no cover
                    out = m(*input_)
                else:
@ -72,16 +83,17 @@ class ModelSummary(object):

        self.in_sizes = in_sizes
        self.out_sizes = out_sizes
+        assert len(in_sizes) == len(out_sizes)
        return

    def get_layer_names(self):
        '''Collect Layer Names'''
-        mods = list(self.model.named_modules())
+        mods = self.named_modules()
        names = []
        layers = []
-        for m in mods[1:]:
-            names += [m[0]]
-            layers += [str(m[1].__class__)]
+        for name, m in mods:
+            names += [name]
+            layers += [str(m.__class__)]

        layer_types = [x.split('.')[-1][:-2] for x in layers]

@ -91,11 +103,9 @@ class ModelSummary(object):

    def get_parameter_sizes(self):
        '''Get sizes of all parameters in `model`'''
-        mods = list(self.model.modules())
+        mods = self.named_modules()
        sizes = []
-
-        for i in range(1, len(mods)):
-            m = mods[i]
+        for _, m in mods:
            p = list(m.parameters())
            modsz = []
            for j in range(len(p)):
@ -133,6 +143,7 @@ class ModelSummary(object):
        df['Name'] = self.layer_names
        df['Type'] = self.layer_types
        df['Params'] = self.param_nums
+        df['Params'] = df['Params'].map(get_human_readable_count)

        if self.model.example_input_array is not None:

@ -226,3 +237,28 @@ def get_gpu_memory_map():
        k = f'gpu_{k}'
        gpu_memory_map[k] = v
    return gpu_memory_map
+
+
+def get_human_readable_count(number):
+    """
+    Abbreviates an integer number with K, M, B, T for thousands, millions,
+    billions and trillions, respectively.
+    Examples:
+        123     -> 123
+        1234    -> 1 K       (one thousand)
+        2e6     -> 2 M       (two million)
+        3e9     -> 3 B       (three billion)
+        4e12    -> 4 T       (four trillion)
+        5e15    -> 5,000 T
+    :param number: a positive integer number
+    :returns a string formatted according to the pattern described above.
+    """
+    assert number >= 0
+    labels = [' ', 'K', 'M', 'B', 'T']
+    num_digits = int(np.floor(np.log10(number)) + 1 if number > 0 else 1)
+    num_groups = int(np.ceil(num_digits / 3))
+    num_groups = min(num_groups, len(labels))  # don't abbreviate beyond trillions
+    shift = -3 * (num_groups - 1)
+    number = number * (10 ** shift)
+    index = num_groups - 1
+    return f'{int(number):,d} {labels[index]}'
--- a/pytorch_lightning/root_module/root_module.py
+++ b/pytorch_lightning/root_module/root_module.py
@ -159,8 +159,8 @@ class LightningModule(GradInformation, ModelIO, ModelHooks):

        return model

-    def summarize(self):
-        model_summary = ModelSummary(self)
+    def summarize(self, mode):
+        model_summary = ModelSummary(self, mode=mode)
        print(model_summary)

    def freeze(self):
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@ -84,7 +84,7 @@ class Trainer(TrainerIO):
                 distributed_backend=None,
                 use_amp=False,
                 print_nan_grads=False,
-                 print_weights_summary=True,
+                 weights_summary='full',
                 weights_save_path=None,
                 amp_level='O1',
                 nb_sanity_val_steps=5):
@ -116,7 +116,7 @@ class Trainer(TrainerIO):
        :param distributed_backend: str. Options: 'dp', 'ddp', 'ddp2'.
        :param use_amp: Bool. If true uses apex for 16bit precision
        :param print_nan_grads: Bool. Prints nan gradients
-        :param print_weights_summary: Bool. Prints summary of weights
+        :param weights_summary: str. Options: 'full', 'top'.
        :param weights_save_path: Bool. Where to save weights if on cluster
        :param amp_level: str. Check nvidia docs for level
        :param nb_sanity_val_steps: int. How many val steps before a full train loop.
@ -131,7 +131,7 @@ class Trainer(TrainerIO):
        self.fast_dev_run = fast_dev_run
        self.on_gpu = gpus is not None and torch.cuda.is_available()
        self.process_position = process_position
-        self.print_weights_summary = print_weights_summary
+        self.weights_summary = weights_summary
        self.max_nb_epochs = max_nb_epochs
        self.min_nb_epochs = min_nb_epochs
        self.nb_sanity_val_steps = nb_sanity_val_steps
@ -981,8 +981,8 @@ class Trainer(TrainerIO):
        self.__layout_bookeeping()

        # print model summary
-        if self.proc_rank == 0 and self.print_weights_summary:
-            ref_model.summarize()
+        if self.proc_rank == 0 and self.weights_summary in ['full', 'top']:
+            ref_model.summarize(mode=self.weights_summary)

        # link up experiment object
        if self.logger is not None: