Dont normalize gradient by batch size

This commit is contained in:
Matthew Honnibal 2018-05-02 08:42:10 +02:00
parent 9d147e12c4
commit 569440a6db
2 changed files with 1 additions and 5 deletions

View File

@ -389,7 +389,7 @@ class Tensorizer(Pipe):
vectors = self.model.ops.xp.vstack([w.vector for w in doc])
target.append(vectors)
target = self.model.ops.xp.vstack(target)
d_scores = (prediction - target) / prediction.shape[0]
d_scores = (prediction - target)
loss = (d_scores**2).sum()
return loss, d_scores
@ -510,7 +510,6 @@ class Tagger(Pipe):
idx += 1
correct = self.model.ops.xp.array(correct, dtype='i')
d_scores = scores - to_categorical(correct, nb_classes=scores.shape[1])
d_scores /= d_scores.shape[0]
loss = (d_scores**2).sum()
d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs])
return float(loss), d_scores
@ -749,7 +748,6 @@ class MultitaskObjective(Tagger):
idx += 1
correct = self.model.ops.xp.array(correct, dtype='i')
d_scores = scores - to_categorical(correct, nb_classes=scores.shape[1])
d_scores /= d_scores.shape[0]
loss = (d_scores**2).sum()
return float(loss), d_scores

View File

@ -602,7 +602,6 @@ cdef class Parser:
scores, bp_scores = vec2scores.begin_update(vector, drop=drop)
d_scores = self.get_batch_loss(states, golds, scores)
d_scores /= len(docs)
d_vector = bp_scores(d_scores, sgd=sgd)
if drop != 0:
d_vector *= mask
@ -654,7 +653,6 @@ cdef class Parser:
backprop_lower = []
cdef float batch_size = len(docs)
for i, d_scores in enumerate(states_d_scores):
d_scores /= batch_size
if losses is not None:
losses[self.name] += (d_scores**2).sum()
ids, bp_vectors, bp_scores = backprops[i]