mirror of https://github.com/explosion/spaCy.git
Fix NER space constraints
Allow entities to end on spaces, to avoid stumping the oracle when we're inside an entity, and there's a space just before a correct entity.
This commit is contained in:
parent
1b1a1af193
commit
16c5861d29
|
@ -319,10 +319,10 @@ cdef class In:
|
||||||
return False
|
return False
|
||||||
# TODO: Is this quite right? I think it's supposed to be ensuring the
|
# TODO: Is this quite right? I think it's supposed to be ensuring the
|
||||||
# gazetteer matches are maintained
|
# gazetteer matches are maintained
|
||||||
elif st.B_(1).ent_iob != preset_ent_iob:
|
elif st.B(1) != -1 and st.B_(1).ent_iob != preset_ent_iob:
|
||||||
return False
|
return False
|
||||||
# Don't allow entities to extend across sentence boundaries
|
# Don't allow entities to extend across sentence boundaries
|
||||||
elif st.B_(1).sent_start == 1:
|
elif st.B(1) != -1 and st.B_(1).sent_start == 1:
|
||||||
return False
|
return False
|
||||||
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
||||||
|
|
||||||
|
@ -371,9 +371,6 @@ cdef class Last:
|
||||||
cdef bint is_valid(const StateC* st, attr_t label) nogil:
|
cdef bint is_valid(const StateC* st, attr_t label) nogil:
|
||||||
if st.B_(1).ent_iob == 1:
|
if st.B_(1).ent_iob == 1:
|
||||||
return False
|
return False
|
||||||
# Don't allow entities to end on whitespace
|
|
||||||
elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE):
|
|
||||||
return False
|
|
||||||
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
Loading…
Reference in New Issue