diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index b61302c24..27d99a045 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -281,6 +281,8 @@ cdef class Tokens: if self.data[i].idx == start_idx: start = i if (self.data[i].idx + self.data[i].lex.length) == end_idx: + if start == -1: + return None end = i + 1 break else: diff --git a/tests/test_merge.py b/tests/test_merge.py index 39693b178..370a334b8 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -30,3 +30,9 @@ def test_merge_heads(): assert tokens[3].head.i == 1 assert tokens[4].head.i in [1, 3] assert tokens[5].head.i == 4 + + +def test_issue_54(): + text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).' + tokens = NLU(text, merge_mwes=True) +