* Remove version of fill_context that takes State*

2015-06-10 01:39:07 +02:00 · 2015-06-10 01:39:07 +02:00 · f14a1526aa
parent d68c686ec1
commit f14a1526aa
3 changed files with 6 additions and 52 deletions
--- a/spacy/syntax/_parse_features.pxd
+++ b/spacy/syntax/_parse_features.pxd
@ -4,8 +4,7 @@ from ._state cimport State
 from .stateclass cimport StateClass


-cdef int fill_context(atom_t* context, State* state) except -1
-cdef int _new_fill_context(atom_t* context, StateClass state) except -1
+cdef int fill_context(atom_t* context, StateClass state) except -1
 # Context elements

 # Ensure each token's attributes are listed: w, p, c, c6, c4. The order
--- a/spacy/syntax/_parse_features.pyx
+++ b/spacy/syntax/_parse_features.pyx
@ -65,7 +65,7 @@ cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
        context[10] = token.ent_iob
        context[11] = token.ent_type

-cdef int _new_fill_context(atom_t* ctxt, StateClass st) except -1:
+cdef int fill_context(atom_t* ctxt, StateClass st) except -1:
    # Take care to fill every element of context!
    # We could memset, but this makes it very easy to have broken features that
    # make almost no impact on accuracy. If instead they're unset, the impact
@ -111,50 +111,6 @@ cdef int _new_fill_context(atom_t* ctxt, StateClass st) except -1:
                ctxt[S2_has_head] = st.has_head(st.S(2)) + 1


-cdef int fill_context(atom_t* context, State* state) except -1:
-    # Take care to fill every element of context!
-    # We could memset, but this makes it very easy to have broken features that
-    # make almost no impact on accuracy. If instead they're unset, the impact
-    # tends to be dramatic, so we get an obvious regression to fix...
-    fill_token(&context[S2w], get_s2(state))
-    fill_token(&context[S1w], get_s1(state))
-    fill_token(&context[S1rw], get_right(state, get_s1(state), 1))
-    fill_token(&context[S0lw], get_left(state, get_s0(state), 1))
-    fill_token(&context[S0l2w], get_left(state, get_s0(state), 2))
-    fill_token(&context[S0w], get_s0(state))
-    fill_token(&context[S0r2w], get_right(state, get_s0(state), 2))
-    fill_token(&context[S0rw], get_right(state, get_s0(state), 1))
-    fill_token(&context[N0lw], get_left(state, get_n0(state), 1))
-    fill_token(&context[N0l2w], get_left(state, get_n0(state), 2))
-    fill_token(&context[N0w], get_n0(state))
-    fill_token(&context[N1w], get_n1(state))
-    fill_token(&context[N2w], get_n2(state))
-    fill_token(&context[P1w], get_p1(state))
-    fill_token(&context[P2w], get_p2(state))
-
-    fill_token(&context[E0w], get_e0(state))
-    fill_token(&context[E1w], get_e1(state))
-    if state.stack_len >= 1:
-        context[dist] = min(state.stack[0] - state.i, 5)
-    else:
-        context[dist] = 0
-    context[N0lv] = min(count_left_kids(get_n0(state)), 5)
-    context[S0lv] = min(count_left_kids(get_s0(state)), 5)
-    context[S0rv] = min(count_right_kids(get_s0(state)), 5)
-    context[S1lv] = min(count_left_kids(get_s1(state)), 5)
-    context[S1rv] = min(count_right_kids(get_s1(state)), 5)
-
-    context[S0_has_head] = 0
-    context[S1_has_head] = 0
-    context[S2_has_head] = 0
-    if state.stack_len >= 1:
-        context[S0_has_head] = has_head(get_s0(state)) + 1
-        if state.stack_len >= 2:
-            context[S1_has_head] = has_head(get_s1(state)) + 1
-            if state.stack_len >= 3:
-                context[S2_has_head] = has_head(get_s2(state)) + 1
-
-
 ner = (
    (N0W,),
    (P1W,),
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@ -40,7 +40,6 @@ from ..gold cimport GoldParse

 from . import _parse_features
 from ._parse_features cimport CONTEXT_SIZE
-from ._parse_features cimport _new_fill_context
 from ._parse_features cimport fill_context
 from .stateclass cimport StateClass

@ -111,7 +110,7 @@ cdef class Parser:
        words = [w.orth_ for w in tokens]
        while not stcls.is_final():
            #print stcls.print_state(words)
-            _new_fill_context(context, stcls)
+            fill_context(context, stcls)
            scores = self.model.score(context)
            guess = self.moves.best_valid(scores, stcls)
            guess.do(stcls, guess.label)
@ -145,7 +144,7 @@ cdef class Parser:
        loss = 0
        words = [w.orth_ for w in tokens]
        while not stcls.is_final():
-            _new_fill_context(context, stcls)
+            fill_context(context, stcls)
            scores = self.model.score(context)
            guess = self.moves.best_valid(scores, stcls)
            best = self.moves.best_gold(scores, stcls, gold)
@ -188,7 +187,7 @@ cdef class Parser:
            state = <State*>beam.at(i)
            stcls.from_struct(state)
            if not is_final(state):
-                fill_context(context, state)
+                fill_context(context, stcls)
                self.model.set_scores(beam.scores[i], context)
                self.moves.set_valid(beam.is_valid[i], stcls)
       
@ -213,7 +212,7 @@ cdef class Parser:
        cdef class_t clas
        cdef int n_feats
        for clas in hist:
-            _new_fill_context(context, stcls)
+            fill_context(context, stcls)
            feats = self.model._extractor.get_feats(context, &n_feats)
            count_feats(counts[clas], feats, n_feats, inc)
            self.moves.c[clas].do(stcls, self.moves.c[clas].label)