From e09a08bd00274c9e974137d490733cd834b5c662 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 1 Jun 2015 23:06:30 +0200 Subject: [PATCH] * Add copy_state function --- spacy/syntax/_state.pxd | 3 ++- spacy/syntax/_state.pyx | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/spacy/syntax/_state.pxd b/spacy/syntax/_state.pxd index 5ffc1f063..ee89d3d59 100644 --- a/spacy/syntax/_state.pxd +++ b/spacy/syntax/_state.pxd @@ -106,7 +106,8 @@ cdef int head_in_buffer(const State *s, const int child, const int* gold) except cdef int children_in_stack(const State *s, const int head, const int* gold) except -1 cdef int head_in_stack(const State *s, const int child, const int* gold) except -1 -cdef State* new_state(Pool mem, TokenC* sent, const int sent_length) except NULL +cdef State* new_state(Pool mem, const TokenC* sent, const int sent_length) except NULL +cdef int copy_state(State* dest, const State* src) except -1 cdef int count_left_kids(const TokenC* head) nogil diff --git a/spacy/syntax/_state.pyx b/spacy/syntax/_state.pyx index 3aae85773..74167319f 100644 --- a/spacy/syntax/_state.pyx +++ b/spacy/syntax/_state.pyx @@ -21,9 +21,17 @@ cdef int add_dep(State *s, int head, int child, int label) except -1: s.sent[head].r_kids |= 1 << (-dist) s.sent[head].r_edge = child - head # Walk up the tree, setting right edge + n_iter = 0 + start = head while s.sent[head].head != 0: head += s.sent[head].head s.sent[head].r_edge = child - head + n_iter += 1 + if n_iter >= s.sent_len: + tree = [(i + s.sent[i].head) for i in range(s.sent_len)] + msg = "Error adding dependency (%d, %d). Could not find root of tree: %s" + msg = msg % (start, child, tree) + raise Exception(msg) else: s.sent[head].l_kids |= 1 << dist s.sent[head].l_edge = (child + s.sent[child].l_edge) - head @@ -155,6 +163,27 @@ cdef State* new_state(Pool mem, const TokenC* sent, const int sent_len) except N return s +cdef int copy_state(State* dest, const State* src) except -1: + assert dest.sent_len == src.sent_len + # Copy stack --- remember stack uses pointer arithmetic, so stack[-stack_len] + # is the last word of the stack. + dest.stack += (src.stack_len - dest.stack_len) + for i in range(src.stack_len): + dest.stack[-i] = src.stack[-i] + dest.stack_len = src.stack_len + # Copy sentence (i.e. the parse), up to and including word i. + memcpy(dest.sent, src.sent, sizeof(TokenC) * src.sent_len) + dest.i = src.i + # Copy assigned entities --- also pointer arithmetic + dest.ent += (src.ents_len - dest.ents_len) + for i in range(src.ents_len): + dest.ent[-i] = src.ent[-i] + dest.ents_len = src.ents_len + assert dest.sent[dest.i].head == src.sent[src.i].head + if dest.stack_len > 0: + assert dest.stack[0] < dest.i + + # From https://en.wikipedia.org/wiki/Hamming_weight cdef inline uint32_t _popcount(uint32_t x) nogil: """Find number of non-zero bits."""