From d25d31442df1e2de7f66346ec24694c88a9fe478 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 27 May 2015 19:14:31 +0200 Subject: [PATCH] * Hackishly support broken NER annotations. Should fix this. --- spacy/munge/read_ner.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/spacy/munge/read_ner.py b/spacy/munge/read_ner.py index aa601bdd2..7fa651577 100644 --- a/spacy/munge/read_ner.py +++ b/spacy/munge/read_ner.py @@ -80,11 +80,15 @@ def tags_to_entities(tags): entities = [] start = None for i, tag in enumerate(tags): - if tag.startswith('O') or tag == '-': - assert not start + if tag.startswith('O'): + # TODO: We shouldn't be getting these malformed inputs. Fix this. + if start is not None: + start = None + continue + elif tag == '-': continue elif tag.startswith('I'): - assert start is not None, tags + assert start is not None, tags[:i] continue if tag.startswith('U'): entities.append((tag[2:], i, i))