From 16c5861d29ba9bd82476171289cce86a79e1ec2d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 9 Dec 2018 08:06:45 +0100 Subject: [PATCH] Fix NER space constraints Allow entities to end on spaces, to avoid stumping the oracle when we're inside an entity, and there's a space just before a correct entity. --- spacy/syntax/ner.pyx | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index 3f6d96304..62aed3c65 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -319,10 +319,10 @@ cdef class In: return False # TODO: Is this quite right? I think it's supposed to be ensuring the # gazetteer matches are maintained - elif st.B_(1).ent_iob != preset_ent_iob: + elif st.B(1) != -1 and st.B_(1).ent_iob != preset_ent_iob: return False # Don't allow entities to extend across sentence boundaries - elif st.B_(1).sent_start == 1: + elif st.B(1) != -1 and st.B_(1).sent_start == 1: return False return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label @@ -371,9 +371,6 @@ cdef class Last: cdef bint is_valid(const StateC* st, attr_t label) nogil: if st.B_(1).ent_iob == 1: return False - # Don't allow entities to end on whitespace - elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE): - return False return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label @staticmethod