Fix NER space constraints

Allow entities to end on spaces, to avoid stumping the oracle when we're
inside an entity, and there's a space just before a correct entity.
This commit is contained in:
Matthew Honnibal 2018-12-09 08:06:45 +01:00
parent 1b1a1af193
commit 16c5861d29
1 changed files with 2 additions and 5 deletions

View File

@ -319,10 +319,10 @@ cdef class In:
return False return False
# TODO: Is this quite right? I think it's supposed to be ensuring the # TODO: Is this quite right? I think it's supposed to be ensuring the
# gazetteer matches are maintained # gazetteer matches are maintained
elif st.B_(1).ent_iob != preset_ent_iob: elif st.B(1) != -1 and st.B_(1).ent_iob != preset_ent_iob:
return False return False
# Don't allow entities to extend across sentence boundaries # Don't allow entities to extend across sentence boundaries
elif st.B_(1).sent_start == 1: elif st.B(1) != -1 and st.B_(1).sent_start == 1:
return False return False
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
@ -371,9 +371,6 @@ cdef class Last:
cdef bint is_valid(const StateC* st, attr_t label) nogil: cdef bint is_valid(const StateC* st, attr_t label) nogil:
if st.B_(1).ent_iob == 1: if st.B_(1).ent_iob == 1:
return False return False
# Don't allow entities to end on whitespace
elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE):
return False
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
@staticmethod @staticmethod