From 0ca58324271aeb56e949be8afacac4a78634bcd7 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 20 Jul 2017 00:18:49 +0200 Subject: [PATCH] Improve negative example handling in NER oracle --- spacy/syntax/ner.pyx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index 93d98a8cd..656c84e4c 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -127,6 +127,8 @@ cdef class BiluoPushDown(TransitionSystem): if name == '-' or name == None: move_str = 'M' label = 0 + elif name == '!O': + return Transition(clas=0, move=ISNT, label=0, score=0) elif '-' in name: move_str, label_str = name.split('-', 1) # Hacky way to denote 'not this entity' @@ -308,6 +310,9 @@ cdef class In: elif g_act == UNIT: # I, Gold U --> True iff next tag == O return next_act != OUT + # Support partial supervision in the form of "not this label" + elif g_act == ISNT: + return 0 else: return 1 @@ -350,6 +355,9 @@ cdef class Last: elif g_act == UNIT: # L, Gold U --> True return 0 + # Support partial supervision in the form of "not this label" + elif g_act == ISNT: + return 0 else: return 1 @@ -418,7 +426,9 @@ cdef class Out: cdef int g_act = gold.ner[s.B(0)].move cdef attr_t g_tag = gold.ner[s.B(0)].label - if g_act == MISSING or g_act == ISNT: + if g_act == ISNT and g_tag == 0: + return 1 + elif g_act == MISSING or g_act == ISNT: return 0 elif g_act == BEGIN: # O, Gold B --> False