From 304339985e53e64bbe8a68dbcddac8e770b2e9eb Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sat, 16 Jan 2016 16:17:28 +0100
Subject: [PATCH] * Add a linear scan to Span.root method, to help with long
 sentences

---
 spacy/tokens/span.pyx | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 4a47e6c42..0637273cb 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -170,6 +170,19 @@ cdef class Span:
             # This should probably be called 'head', and the other one called
             # 'gov'. But we went with 'head' elsehwhere, and now we're stuck =/
             cdef int i
+            # First, we scan through the Span, and check whether there's a word
+            # with head==0, i.e. a sentence root. If so, we can return it. The
+            # longer the span, the more likely it contains a sentence root, and
+            # in this case we return in linear time.
+            for i in range(self.start, self.end):
+                if self.doc.c[i].head == 0:
+                    return i
+            # If we don't have a sentence root, we do something that's not so
+            # algorithmically clever, but I think should be quite fast, especially
+            # for short spans.
+            # For each word, we count the path length, and arg min this measure.
+            # We could use better tree logic to save steps here...But I think this
+            # should be okay.
             cdef int current_best = _count_words_to_root(&self.doc.c[self.start],
                                                          self.doc.length)
             cdef int root = self.start