From 569440a6db542a714c55e6c45af06c25e16d8693 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 2 May 2018 08:42:10 +0200 Subject: [PATCH] Dont normalize gradient by batch size --- spacy/pipeline.pyx | 4 +--- spacy/syntax/nn_parser.pyx | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index cb838b449..477c9d6e2 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -389,7 +389,7 @@ class Tensorizer(Pipe): vectors = self.model.ops.xp.vstack([w.vector for w in doc]) target.append(vectors) target = self.model.ops.xp.vstack(target) - d_scores = (prediction - target) / prediction.shape[0] + d_scores = (prediction - target) loss = (d_scores**2).sum() return loss, d_scores @@ -510,7 +510,6 @@ class Tagger(Pipe): idx += 1 correct = self.model.ops.xp.array(correct, dtype='i') d_scores = scores - to_categorical(correct, nb_classes=scores.shape[1]) - d_scores /= d_scores.shape[0] loss = (d_scores**2).sum() d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs]) return float(loss), d_scores @@ -749,7 +748,6 @@ class MultitaskObjective(Tagger): idx += 1 correct = self.model.ops.xp.array(correct, dtype='i') d_scores = scores - to_categorical(correct, nb_classes=scores.shape[1]) - d_scores /= d_scores.shape[0] loss = (d_scores**2).sum() return float(loss), d_scores diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 8479a99fd..e419765ac 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -602,7 +602,6 @@ cdef class Parser: scores, bp_scores = vec2scores.begin_update(vector, drop=drop) d_scores = self.get_batch_loss(states, golds, scores) - d_scores /= len(docs) d_vector = bp_scores(d_scores, sgd=sgd) if drop != 0: d_vector *= mask @@ -654,7 +653,6 @@ cdef class Parser: backprop_lower = [] cdef float batch_size = len(docs) for i, d_scores in enumerate(states_d_scores): - d_scores /= batch_size if losses is not None: losses[self.name] += (d_scores**2).sum() ids, bp_vectors, bp_scores = backprops[i]