diff --git a/spacy/syntax/_parse_features.pxd b/spacy/syntax/_parse_features.pxd index 6c1f0d6a5..00aff5f9d 100644 --- a/spacy/syntax/_parse_features.pxd +++ b/spacy/syntax/_parse_features.pxd @@ -4,8 +4,7 @@ from ._state cimport State from .stateclass cimport StateClass -cdef int fill_context(atom_t* context, State* state) except -1 -cdef int _new_fill_context(atom_t* context, StateClass state) except -1 +cdef int fill_context(atom_t* context, StateClass state) except -1 # Context elements # Ensure each token's attributes are listed: w, p, c, c6, c4. The order diff --git a/spacy/syntax/_parse_features.pyx b/spacy/syntax/_parse_features.pyx index db59c82a2..64a83390c 100644 --- a/spacy/syntax/_parse_features.pyx +++ b/spacy/syntax/_parse_features.pyx @@ -65,7 +65,7 @@ cdef inline void fill_token(atom_t* context, const TokenC* token) nogil: context[10] = token.ent_iob context[11] = token.ent_type -cdef int _new_fill_context(atom_t* ctxt, StateClass st) except -1: +cdef int fill_context(atom_t* ctxt, StateClass st) except -1: # Take care to fill every element of context! # We could memset, but this makes it very easy to have broken features that # make almost no impact on accuracy. If instead they're unset, the impact @@ -111,50 +111,6 @@ cdef int _new_fill_context(atom_t* ctxt, StateClass st) except -1: ctxt[S2_has_head] = st.has_head(st.S(2)) + 1 -cdef int fill_context(atom_t* context, State* state) except -1: - # Take care to fill every element of context! - # We could memset, but this makes it very easy to have broken features that - # make almost no impact on accuracy. If instead they're unset, the impact - # tends to be dramatic, so we get an obvious regression to fix... - fill_token(&context[S2w], get_s2(state)) - fill_token(&context[S1w], get_s1(state)) - fill_token(&context[S1rw], get_right(state, get_s1(state), 1)) - fill_token(&context[S0lw], get_left(state, get_s0(state), 1)) - fill_token(&context[S0l2w], get_left(state, get_s0(state), 2)) - fill_token(&context[S0w], get_s0(state)) - fill_token(&context[S0r2w], get_right(state, get_s0(state), 2)) - fill_token(&context[S0rw], get_right(state, get_s0(state), 1)) - fill_token(&context[N0lw], get_left(state, get_n0(state), 1)) - fill_token(&context[N0l2w], get_left(state, get_n0(state), 2)) - fill_token(&context[N0w], get_n0(state)) - fill_token(&context[N1w], get_n1(state)) - fill_token(&context[N2w], get_n2(state)) - fill_token(&context[P1w], get_p1(state)) - fill_token(&context[P2w], get_p2(state)) - - fill_token(&context[E0w], get_e0(state)) - fill_token(&context[E1w], get_e1(state)) - if state.stack_len >= 1: - context[dist] = min(state.stack[0] - state.i, 5) - else: - context[dist] = 0 - context[N0lv] = min(count_left_kids(get_n0(state)), 5) - context[S0lv] = min(count_left_kids(get_s0(state)), 5) - context[S0rv] = min(count_right_kids(get_s0(state)), 5) - context[S1lv] = min(count_left_kids(get_s1(state)), 5) - context[S1rv] = min(count_right_kids(get_s1(state)), 5) - - context[S0_has_head] = 0 - context[S1_has_head] = 0 - context[S2_has_head] = 0 - if state.stack_len >= 1: - context[S0_has_head] = has_head(get_s0(state)) + 1 - if state.stack_len >= 2: - context[S1_has_head] = has_head(get_s1(state)) + 1 - if state.stack_len >= 3: - context[S2_has_head] = has_head(get_s2(state)) + 1 - - ner = ( (N0W,), (P1W,), diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index 1ff5a523f..fab990ef9 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -40,7 +40,6 @@ from ..gold cimport GoldParse from . import _parse_features from ._parse_features cimport CONTEXT_SIZE -from ._parse_features cimport _new_fill_context from ._parse_features cimport fill_context from .stateclass cimport StateClass @@ -111,7 +110,7 @@ cdef class Parser: words = [w.orth_ for w in tokens] while not stcls.is_final(): #print stcls.print_state(words) - _new_fill_context(context, stcls) + fill_context(context, stcls) scores = self.model.score(context) guess = self.moves.best_valid(scores, stcls) guess.do(stcls, guess.label) @@ -145,7 +144,7 @@ cdef class Parser: loss = 0 words = [w.orth_ for w in tokens] while not stcls.is_final(): - _new_fill_context(context, stcls) + fill_context(context, stcls) scores = self.model.score(context) guess = self.moves.best_valid(scores, stcls) best = self.moves.best_gold(scores, stcls, gold) @@ -188,7 +187,7 @@ cdef class Parser: state = beam.at(i) stcls.from_struct(state) if not is_final(state): - fill_context(context, state) + fill_context(context, stcls) self.model.set_scores(beam.scores[i], context) self.moves.set_valid(beam.is_valid[i], stcls) @@ -213,7 +212,7 @@ cdef class Parser: cdef class_t clas cdef int n_feats for clas in hist: - _new_fill_context(context, stcls) + fill_context(context, stcls) feats = self.model._extractor.get_feats(context, &n_feats) count_feats(counts[clas], feats, n_feats, inc) self.moves.c[clas].do(stcls, self.moves.c[clas].label)