From 4efb3919940da57965152a538a58cfc05217d513 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Tue, 9 May 2017 18:45:18 +0200
Subject: [PATCH] Fix serializer

---
 spacy/tokens/doc.pyx | 51 ++++++++++++++++++--------------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 6f397f963..515228d8d 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -1,4 +1,6 @@
 # coding: utf8
+# cython: infer_types=True
+# cython: bounds_check=False
 from __future__ import unicode_literals
 
 cimport cython
@@ -565,7 +567,7 @@ cdef class Doc:
         for i in range(self.length):
             self.c[i] = parsed[i]
 
-    def from_array(self, attrs, array):
+    def from_array(self, attrs, int[:, :] array):
         """
         Write to a `Doc` object, from an `(M, N)` array of attributes.
         """
@@ -573,34 +575,23 @@ cdef class Doc:
         cdef attr_id_t attr_id
         cdef TokenC* tokens = self.c
         cdef int length = len(array)
-        cdef attr_t[:] values
+        # Get set up for fast loading
+        cdef Pool mem = Pool()
+        cdef int n_attrs = len(attrs)
+        attr_ids = <attr_id_t*>mem.alloc(n_attrs, sizeof(attr_id_t))
+        for i, attr_id in enumerate(attrs):
+            attr_ids[i] = attr_id
+        # Now load the data
+        for i in range(self.length):
+            token = &self.c[i]
+            for j in range(n_attrs):
+                Token.set_struct_attr(token, attr_ids[j], array[i, j])
+        # Auxiliary loading logic
         for col, attr_id in enumerate(attrs):
-            values = array[:, col]
-            if attr_id == HEAD:
+            if attr_id == TAG:
                 for i in range(length):
-                    tokens[i].head = values[i]
-                    if values[i] >= 1:
-                        tokens[i + values[i]].l_kids += 1
-                    elif values[i] < 0:
-                        tokens[i + values[i]].r_kids += 1
-            elif attr_id == TAG:
-                for i in range(length):
-                    if values[i] != 0:
-                        self.vocab.morphology.assign_tag(&tokens[i], values[i])
-            elif attr_id == POS:
-                for i in range(length):
-                    tokens[i].pos = <univ_pos_t>values[i]
-            elif attr_id == DEP:
-                for i in range(length):
-                    tokens[i].dep = values[i]
-            elif attr_id == ENT_IOB:
-                for i in range(length):
-                    tokens[i].ent_iob = values[i]
-            elif attr_id == ENT_TYPE:
-                for i in range(length):
-                    tokens[i].ent_type = values[i]
-            else:
-                raise ValueError("Unknown attribute ID: %d" % attr_id)
+                    if array[i, col] != 0:
+                        self.vocab.morphology.assign_tag(&tokens[i], array[i, col])
         set_children_from_heads(self.c, self.length)
         self.is_parsed = bool(HEAD in attrs or DEP in attrs)
         self.is_tagged = bool(TAG in attrs or POS in attrs)
@@ -645,9 +636,9 @@ cdef class Doc:
             self.push_back(lex, has_space)
  
             start = end + has_space
-        self.from_array(attrs[:, 2:],
-            [TAG,LEMMA,HEAD,DEP,ENT_IOB,ENT_TYPE])
-
+        self.from_array([TAG,LEMMA,HEAD,DEP,ENT_IOB,ENT_TYPE],
+            attrs[:, 2:])
+        return self
 
     def merge(self, int start_idx, int end_idx, *args, **attributes):
         """