From 52b48b415eae63408e68fb246a0759d195d0a86b Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 16 Oct 2016 11:41:36 +0200
Subject: [PATCH] Fix GoldParse class

---
 spacy/gold.pyx | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index 8ad7c5298..aea055ead 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -228,7 +228,7 @@ cdef class GoldParse:
         if tags is None:
             tags = [None for _ in doc]
         if heads is None:
-            heads = [None for _ in doc]
+            heads = [token.i for token in doc]
         if deps is None:
             deps = [None for _ in doc]
         if entities is None:
@@ -261,12 +261,12 @@ cdef class GoldParse:
         self.orig_annot = list(zip(*annot_tuples))
 
         for i, gold_i in enumerate(self.cand_to_gold):
-            if doc[i].isspace():
+            if doc[i].text.isspace():
                 self.tags[i] = 'SP'
                 self.heads[i] = None
                 self.labels[i] = None
                 self.ner[i] = 'O'
-            elif gold_i is None:
+            if gold_i is None:
                 pass
             else:
                 self.tags[i] = tags[gold_i]
@@ -307,7 +307,7 @@ def biluo_tags_from_offsets(doc, entities):
         tags (list):
             A list of unicode strings, describing the tags. Each tag string will
             be of the form either "", "O" or "{action}-{label}", where action is one
-            of "B", "I", "L", "U". The empty string "" is used where the entity
+            of "B", "I", "L", "U". The string "-" is used where the entity
             offsets don't align with the tokenization in the Doc object. The
             training algorithm will view these as missing values. "O" denotes
             a non-entity token. "B" denotes the beginning of a multi-token entity,
@@ -325,7 +325,7 @@ def biluo_tags_from_offsets(doc, entities):
     '''
     starts = {token.idx: token.i for token in doc}
     ends = {token.idx+len(token): token.i for token in doc}
-    biluo = ['' for _ in doc]
+    biluo = ['-' for _ in doc]
     # Handle entity cases
     for start_char, end_char, label in entities:
         start_token = starts.get(start_char)
@@ -355,13 +355,3 @@ def biluo_tags_from_offsets(doc, entities):
 
 def is_punct_label(label):
     return label == 'P' or label.lower() == 'punct'
-
-
-
-
-
-
-
-
-
-