mirror of https://github.com/explosion/spaCy.git
* Add a linear scan to Span.root method, to help with long sentences
This commit is contained in:
parent
ca0a603fde
commit
304339985e
|
@ -170,6 +170,19 @@ cdef class Span:
|
||||||
# This should probably be called 'head', and the other one called
|
# This should probably be called 'head', and the other one called
|
||||||
# 'gov'. But we went with 'head' elsehwhere, and now we're stuck =/
|
# 'gov'. But we went with 'head' elsehwhere, and now we're stuck =/
|
||||||
cdef int i
|
cdef int i
|
||||||
|
# First, we scan through the Span, and check whether there's a word
|
||||||
|
# with head==0, i.e. a sentence root. If so, we can return it. The
|
||||||
|
# longer the span, the more likely it contains a sentence root, and
|
||||||
|
# in this case we return in linear time.
|
||||||
|
for i in range(self.start, self.end):
|
||||||
|
if self.doc.c[i].head == 0:
|
||||||
|
return i
|
||||||
|
# If we don't have a sentence root, we do something that's not so
|
||||||
|
# algorithmically clever, but I think should be quite fast, especially
|
||||||
|
# for short spans.
|
||||||
|
# For each word, we count the path length, and arg min this measure.
|
||||||
|
# We could use better tree logic to save steps here...But I think this
|
||||||
|
# should be okay.
|
||||||
cdef int current_best = _count_words_to_root(&self.doc.c[self.start],
|
cdef int current_best = _count_words_to_root(&self.doc.c[self.start],
|
||||||
self.doc.length)
|
self.doc.length)
|
||||||
cdef int root = self.start
|
cdef int root = self.start
|
||||||
|
|
Loading…
Reference in New Issue