* Fix Issue #54: Error merging multi-word token when there's a mid-token match.

This commit is contained in:
Matthew Honnibal 2015-04-16 04:28:06 +02:00
parent 42617548af
commit 2ef170a991
2 changed files with 8 additions and 0 deletions

View File

@ -281,6 +281,8 @@ cdef class Tokens:
if self.data[i].idx == start_idx:
start = i
if (self.data[i].idx + self.data[i].lex.length) == end_idx:
if start == -1:
return None
end = i + 1
break
else:

View File

@ -30,3 +30,9 @@ def test_merge_heads():
assert tokens[3].head.i == 1
assert tokens[4].head.i in [1, 3]
assert tokens[5].head.i == 4
def test_issue_54():
text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).'
tokens = NLU(text, merge_mwes=True)