mirror of https://github.com/explosion/spaCy.git
Try to prevent spaces from being tagged as entities
This commit is contained in:
parent
2d0c366101
commit
1e6725e9b7
|
@ -10,6 +10,8 @@ from ._state cimport StateC
|
||||||
from .transition_system cimport Transition
|
from .transition_system cimport Transition
|
||||||
from .transition_system cimport do_func_t
|
from .transition_system cimport do_func_t
|
||||||
from ..gold cimport GoldParseC, GoldParse
|
from ..gold cimport GoldParseC, GoldParse
|
||||||
|
from ..lexeme cimport Lexeme
|
||||||
|
from ..attrs cimport IS_SPACE
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
|
||||||
|
|
||||||
|
@ -273,6 +275,9 @@ cdef class Begin:
|
||||||
# Don't allow entities to extend across sentence boundaries
|
# Don't allow entities to extend across sentence boundaries
|
||||||
elif st.B_(1).sent_start == 1:
|
elif st.B_(1).sent_start == 1:
|
||||||
return False
|
return False
|
||||||
|
# Don't allow entities to start on whitespace
|
||||||
|
elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE):
|
||||||
|
return False
|
||||||
else:
|
else:
|
||||||
return label != 0 and not st.entity_is_open()
|
return label != 0 and not st.entity_is_open()
|
||||||
|
|
||||||
|
@ -366,6 +371,9 @@ cdef class Last:
|
||||||
cdef bint is_valid(const StateC* st, attr_t label) nogil:
|
cdef bint is_valid(const StateC* st, attr_t label) nogil:
|
||||||
if st.B_(1).ent_iob == 1:
|
if st.B_(1).ent_iob == 1:
|
||||||
return False
|
return False
|
||||||
|
# Don't allow entities to end on whitespace
|
||||||
|
elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE):
|
||||||
|
return False
|
||||||
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
return st.entity_is_open() and label != 0 and st.E_(0).ent_type == label
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -418,6 +426,8 @@ cdef class Unit:
|
||||||
return False
|
return False
|
||||||
elif st.B_(1).ent_iob == 1:
|
elif st.B_(1).ent_iob == 1:
|
||||||
return False
|
return False
|
||||||
|
elif Lexeme.get_struct_attr(st.B_(0).lex, IS_SPACE):
|
||||||
|
return False
|
||||||
return label != 0 and not st.entity_is_open()
|
return label != 0 and not st.entity_is_open()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
Loading…
Reference in New Issue