mirror of https://github.com/explosion/spaCy.git
* Prevent Begin and In moves from applying in NER if we're at the last token of a sentence, as this would mean the entity would span over a sentence boundary. Re Issue #169
This commit is contained in:
parent
b65633f270
commit
01ab464383
|
@ -170,6 +170,9 @@ cdef class Begin:
|
||||||
return False
|
return False
|
||||||
elif preset_ent_iob == 3 and st.B_(0).ent_type != label:
|
elif preset_ent_iob == 3 and st.B_(0).ent_type != label:
|
||||||
return False
|
return False
|
||||||
|
# Don't allow entities to extend across sentence boundaries
|
||||||
|
elif st.B_(1).sent_start:
|
||||||
|
return False
|
||||||
else:
|
else:
|
||||||
return label != 0 and not st.entity_is_open()
|
return label != 0 and not st.entity_is_open()
|
||||||
|
|
||||||
|
@ -207,8 +210,12 @@ cdef class In:
|
||||||
elif preset_ent_iob == 3:
|
elif preset_ent_iob == 3:
|
||||||
return False
|
return False
|
||||||
# TODO: Is this quite right?
|
# TODO: Is this quite right?
|
||||||
|
# I think it's supposed to be ensuring the gazetteer matches are maintained
|
||||||
elif st.B_(1).ent_iob != preset_ent_iob:
|
elif st.B_(1).ent_iob != preset_ent_iob:
|
||||||
return False
|
return False
|
||||||
|
# Don't allow entities to extend across sentence boundaries
|
||||||
|
elif st.B_(1).sent_start:
|
||||||
|
return False
|
||||||
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
Loading…
Reference in New Issue