From 6b0fec76fddc08bd4ab5f69bae2125b2473cb278 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 4 Sep 2019 13:42:42 +0200 Subject: [PATCH] Fix handling of preset entities in NER * Fix check of valid ent_type for B * Add valid L as preset-I followed by not-I --- spacy/syntax/ner.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx index c2a85bfe1..767e4c2e0 100644 --- a/spacy/syntax/ner.pyx +++ b/spacy/syntax/ner.pyx @@ -258,7 +258,7 @@ cdef class Begin: @staticmethod cdef bint is_valid(const StateC* st, attr_t label) nogil: cdef int preset_ent_iob = st.B_(0).ent_iob - cdef int preset_ent_label = st.B_(0).ent_type + cdef attr_t preset_ent_label = st.B_(0).ent_type # If we're the last token of the input, we can't B -- must U or O. if st.B(1) == -1: return False @@ -395,6 +395,9 @@ cdef class Last: return False elif not st.entity_is_open(): return False + elif st.B_(0).ent_iob == 1 and st.B_(1).ent_iob != 1: + # If a preset entity has I followed by not-I, is L + return True elif st.E_(0).ent_type != label: return False elif st.B_(1).ent_iob == 1: