mirror of https://github.com/explosion/spaCy.git
* Fix Issue #54: Error merging multi-word token when there's a mid-token match.
This commit is contained in:
parent
42617548af
commit
2ef170a991
|
@ -281,6 +281,8 @@ cdef class Tokens:
|
|||
if self.data[i].idx == start_idx:
|
||||
start = i
|
||||
if (self.data[i].idx + self.data[i].lex.length) == end_idx:
|
||||
if start == -1:
|
||||
return None
|
||||
end = i + 1
|
||||
break
|
||||
else:
|
||||
|
|
|
@ -30,3 +30,9 @@ def test_merge_heads():
|
|||
assert tokens[3].head.i == 1
|
||||
assert tokens[4].head.i in [1, 3]
|
||||
assert tokens[5].head.i == 4
|
||||
|
||||
|
||||
def test_issue_54():
|
||||
text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).'
|
||||
tokens = NLU(text, merge_mwes=True)
|
||||
|
||||
|
|
Loading…
Reference in New Issue