40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import numpy as np
|
|
from torch import nn
|
|
|
|
"""
|
|
Module to describe gradients
|
|
"""
|
|
|
|
|
|
class GradInformation(nn.Module):
|
|
|
|
def grad_norm(self, norm_type):
|
|
results = {}
|
|
total_norm = 0
|
|
for i, p in enumerate(self.parameters()):
|
|
if p.requires_grad:
|
|
try:
|
|
param_norm = p.grad.data.norm(norm_type)
|
|
total_norm += param_norm ** norm_type
|
|
norm = param_norm ** (1 / norm_type)
|
|
|
|
results['grad_{}_norm_{}'.format(norm_type, i)] = round(norm.data.cpu().numpy().flatten()[0], 3)
|
|
except Exception as e:
|
|
# this param had no grad
|
|
pass
|
|
|
|
total_norm = total_norm ** (1. / norm_type)
|
|
results['grad_{}_norm_total'.format(norm_type)] = round(total_norm.data.cpu().numpy().flatten()[0], 3)
|
|
return results
|
|
|
|
|
|
def describe_grads(self):
|
|
for p in self.parameters():
|
|
g = p.grad.data.numpy().flatten()
|
|
print(np.max(g), np.min(g), np.mean(g))
|
|
|
|
|
|
def describe_params(self):
|
|
for p in self.parameters():
|
|
g = p.data.numpy().flatten()
|
|
print(np.max(g), np.min(g), np.mean(g)) |