From 4ceebde523c003440cd31da447e12acf3b1147f0 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 30 Aug 2017 17:31:53 -0500
Subject: [PATCH] Fix gradient bug in parser

---
 spacy/syntax/nn_parser.pyx | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 06a248767..552ea4f8f 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -36,7 +36,7 @@ from murmurhash.mrmr cimport hash64
 from preshed.maps cimport MapStruct
 from preshed.maps cimport map_get
 
-from thinc.api import layerize, chain, noop, clone
+from thinc.api import layerize, chain, noop, clone, with_flatten
 from thinc.neural import Model, Affine, ReLu, Maxout
 from thinc.neural._classes.batchnorm import BatchNorm as BN
 from thinc.neural._classes.selu import SELU
@@ -245,7 +245,7 @@ cdef class Parser:
         parser_maxout_pieces = util.env_opt('parser_maxout_pieces', 2)
         embed_size = util.env_opt('embed_size', 4000)
         tensors = fine_tune(Tok2Vec(token_vector_width, embed_size,
-                    preprocess=doc2feats()))
+                                    preprocess=doc2feats()))
         if parser_maxout_pieces == 1:
             lower = PrecomputableAffine(hidden_width if depth >= 1 else nr_class,
                         nF=cls.nr_feature,
@@ -393,7 +393,7 @@ cdef class Parser:
 
         tokvecs = self.model[0].ops.flatten(tokvecses)
         if USE_FINE_TUNE:
-            tokvecs += self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
+            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
 
         nr_state = len(docs)
         nr_class = self.moves.n_moves
@@ -453,7 +453,7 @@ cdef class Parser:
         cdef StateClass stcls, output
         tokvecs = self.model[0].ops.flatten(tokvecses)
         if USE_FINE_TUNE:
-            tokvecs += self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
+            tokvecs = self.model[0].ops.flatten(self.model[0]((docs, tokvecses)))
         cuda_stream = get_cuda_stream()
         state2vec, vec2scores = self.get_batch_model(len(docs), tokvecs,
                                                      cuda_stream, 0.0)
@@ -531,9 +531,8 @@ cdef class Parser:
             docs = [docs]
             golds = [golds]
         if USE_FINE_TUNE:
-            my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
-            my_tokvecs = self.model[0].ops.flatten(my_tokvecs)
-            tokvecs += my_tokvecs
+            tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
+            tokvecs = self.model[0].ops.flatten(tokvecs)
 
         cuda_stream = get_cuda_stream()
 
@@ -586,7 +585,7 @@ cdef class Parser:
             backprops, sgd, cuda_stream)
         d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
         if USE_FINE_TUNE:
-            bp_my_tokvecs(d_tokvecs, sgd=sgd)
+            d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd)
         return d_tokvecs
 
     def update_beam(self, docs_tokvecs, golds, width=None, density=None,
@@ -606,9 +605,8 @@ cdef class Parser:
         assert min(lengths) >= 1
         tokvecs = self.model[0].ops.flatten(tokvecs)
         if USE_FINE_TUNE:
-            my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
-            my_tokvecs = self.model[0].ops.flatten(my_tokvecs)
-            tokvecs += my_tokvecs
+            tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
+            tokvecs = self.model[0].ops.flatten(tokvecs)
 
         states = self.moves.init_batch(docs)
         for gold in golds:
@@ -642,7 +640,7 @@ cdef class Parser:
         self._make_updates(d_tokvecs, backprop_lower, sgd, cuda_stream)
         d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, lengths)
         if USE_FINE_TUNE:
-            bp_my_tokvecs(d_tokvecs, sgd=sgd)
+            d_tokvecs = bp_my_tokvecs(d_tokvecs, sgd=sgd)
         return d_tokvecs
 
     def _init_gold_batch(self, whole_docs, whole_golds):