mirror of https://github.com/explosion/spaCy.git
Try to prevent spaces from being tagged as entities
This commit is contained in:
parent
2d0c366101
commit
1e6725e9b7
|
@ -10,6 +10,8 @@ from ._state cimport StateC
|
|||
from .transition_system cimport Transition
|
||||
from .transition_system cimport do_func_t
|
||||
from ..gold cimport GoldParseC, GoldParse
|
||||
from ..lexeme cimport Lexeme
|
||||
from ..attrs cimport IS_SPACE
|
||||
from ..errors import Errors
|
||||
|
||||
|
||||
|
@ -273,6 +275,9 @@ cdef class Begin:
|
|||
# Don't allow entities to extend across sentence boundaries
|
||||
elif st.B_(1).sent_start == 1:
|
||||
return False
|
||||
# Don't allow entities to start on whitespace
|
||||
elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE):
|
||||
return False
|
||||
else:
|
||||
return label != 0 and not st.entity_is_open()
|
||||
|
||||
|
@ -366,6 +371,9 @@ cdef class Last:
|
|||
cdef bint is_valid(const StateC* st, attr_t label) nogil:
|
||||
if st.B_(1).ent_iob == 1:
|
||||
return False
|
||||
# Don't allow entities to end on whitespace
|
||||
elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE):
|
||||
return False
|
||||
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
||||
|
||||
@staticmethod
|
||||
|
@ -418,6 +426,8 @@ cdef class Unit:
|
|||
return False
|
||||
elif st.B_(1).ent_iob == 1:
|
||||
return False
|
||||
elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE):
|
||||
return False
|
||||
return label != 0 and not st.entity_is_open()
|
||||
|
||||
@staticmethod
|
||||
|
|
Loading…
Reference in New Issue