From 6aa6a5bc25eeebf1ffea4ee97f7e26d3f09c357a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Tue, 3 Oct 2017 12:43:09 +0200
Subject: [PATCH] Add a layer type for history features

---
 spacy/_ml.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/spacy/_ml.py b/spacy/_ml.py
index 62fc7543f..38f220cc1 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -21,6 +21,7 @@ from thinc.neural._classes.affine import _set_dimensions_if_needed
 from thinc.api import FeatureExtracter, with_getitem
 from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool
 from thinc.neural._classes.attention import ParametricAttention
+from thinc.neural._classes.embed import Embed
 from thinc.linear.linear import LinearModel
 from thinc.api import uniqued, wrap, flatten_add_lengths, noop
 
@@ -212,6 +213,27 @@ class PrecomputableMaxouts(Model):
         return Yfp, backward
 
 
+def HistoryFeatures(nr_class, hist_size=8, nr_dim=8):
+    '''Wrap a model, adding features representing action history.'''
+    embed = Embed(nr_dim, nr_dim, nr_class)
+    ops = embed.ops
+    def add_history_fwd(vectors_hists, drop=0.):
+        vectors, hist_ids = vectors_hists
+        flat_hists, bp_hists = embed.begin_update(hist_ids.flatten(), drop=drop)
+        hists = flat_hists.reshape((hist_ids.shape[0],
+                                    hist_ids.shape[1] * flat_hists.shape[1]))
+        outputs = ops.xp.hstack((vectors, hists))
+
+        def add_history_bwd(d_outputs, sgd=None):
+            d_vectors = d_outputs[:, :vectors.shape[1]]
+            d_hists = d_outputs[:, vectors.shape[1]:]
+            bp_hists(d_hists.reshape((d_hists.shape[0]*hist_size,
+                int(d_hists.shape[1]/hist_size))), sgd=sgd)
+            return embed.ops.xp.ascontiguousarray(d_vectors)
+        return outputs, add_history_bwd
+    return wrap(add_history_fwd, embed)
+
+
 def drop_layer(layer, factor=2.):
     def drop_layer_fwd(X, drop=0.):
         if drop <= 0.: