* Prevent Begin and In moves from applying in NER if we're at the last token of a sentence, as this would mean the entity would span over a sentence boundary. Re Issue #169

This commit is contained in:
Matthew Honnibal 2015-11-07 05:30:44 +11:00
parent b65633f270
commit 01ab464383
1 changed files with 7 additions and 0 deletions

View File

@ -170,6 +170,9 @@ cdef class Begin:
return False
elif preset_ent_iob == 3 and st.B_(0).ent_type != label:
return False
# Don't allow entities to extend across sentence boundaries
elif st.B_(1).sent_start:
return False
else:
return label != 0 and not st.entity_is_open()
@ -207,8 +210,12 @@ cdef class In:
elif preset_ent_iob == 3:
return False
# TODO: Is this quite right?
# I think it's supposed to be ensuring the gazetteer matches are maintained
elif st.B_(1).ent_iob != preset_ent_iob:
return False
# Don't allow entities to extend across sentence boundaries
elif st.B_(1).sent_start:
return False
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
@staticmethod