diff --git a/spacy/structs.pxd b/spacy/structs.pxd index a423af8b0..6a15b8951 100644 --- a/spacy/structs.pxd +++ b/spacy/structs.pxd @@ -48,6 +48,13 @@ cdef struct Entity: int label +cdef struct Constituent: + int head + int start + int end + int label + + cdef struct TokenC: const LexemeC* lex Morphology morph @@ -65,6 +72,9 @@ cdef struct TokenC: uint32_t l_edge uint32_t r_edge + int attach_order + int ctnt_label + int ent_iob int ent_type diff --git a/spacy/syntax/_state.pxd b/spacy/syntax/_state.pxd index 59e1c8c0a..a1f17b94c 100644 --- a/spacy/syntax/_state.pxd +++ b/spacy/syntax/_state.pxd @@ -2,13 +2,14 @@ from libc.stdint cimport uint32_t from cymem.cymem cimport Pool -from ..structs cimport TokenC, Entity +from ..structs cimport TokenC, Entity, Constituent cdef struct State: TokenC* sent int* stack Entity* ent + Constituent* ctnt int i int sent_len int stack_len diff --git a/spacy/syntax/_state.pyx b/spacy/syntax/_state.pyx index 07d55ad98..2acd51670 100644 --- a/spacy/syntax/_state.pyx +++ b/spacy/syntax/_state.pyx @@ -2,7 +2,7 @@ from libc.string cimport memmove, memcpy from cymem.cymem cimport Pool from ..lexeme cimport EMPTY_LEXEME -from ..structs cimport TokenC, Entity +from ..structs cimport TokenC, Entity, Constituent DEF PADDING = 5 @@ -137,10 +137,12 @@ cdef int count_right_kids(const TokenC* head) nogil: cdef State* new_state(Pool mem, const TokenC* sent, const int sent_len) except NULL: cdef int padded_len = sent_len + PADDING + PADDING cdef State* s = mem.alloc(1, sizeof(State)) + s.ctnt = mem.alloc(padded_len, sizeof(Constituent)) s.ent = mem.alloc(padded_len, sizeof(Entity)) s.stack = mem.alloc(padded_len, sizeof(int)) for i in range(PADDING): s.stack[i] = -1 + s.ctnt += (PADDING -1) s.stack += (PADDING - 1) s.ent += (PADDING - 1) assert s.stack[0] == -1