From ec63f4fe7ba874995bbb14939ce1d54d4fe8337b Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 29 Jul 2017 21:58:37 +0200 Subject: [PATCH] Add option to control how missing entities are handled when getting NER tags --- spacy/gold.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index aa5daa41d..39951447c 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -483,7 +483,7 @@ cdef class GoldParse: return not nonproj.is_nonproj_tree(self.heads) -def biluo_tags_from_offsets(doc, entities): +def biluo_tags_from_offsets(doc, entities, missing='O'): """Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out scheme (BILUO). @@ -535,7 +535,7 @@ def biluo_tags_from_offsets(doc, entities): if i in entity_chars: break else: - biluo[token.i] = 'O' + biluo[token.i] = missing return biluo