* Hackishly support broken NER annotations. Should fix this.

This commit is contained in:
Matthew Honnibal 2015-05-27 19:14:31 +02:00
parent a7cee46fe9
commit d25d31442d
1 changed files with 7 additions and 3 deletions

View File

@ -80,11 +80,15 @@ def tags_to_entities(tags):
entities = [] entities = []
start = None start = None
for i, tag in enumerate(tags): for i, tag in enumerate(tags):
if tag.startswith('O') or tag == '-': if tag.startswith('O'):
assert not start # TODO: We shouldn't be getting these malformed inputs. Fix this.
if start is not None:
start = None
continue
elif tag == '-':
continue continue
elif tag.startswith('I'): elif tag.startswith('I'):
assert start is not None, tags assert start is not None, tags[:i]
continue continue
if tag.startswith('U'): if tag.startswith('U'):
entities.append((tag[2:], i, i)) entities.append((tag[2:], i, i))