spaCy/spacy/syntax/_parse_features.pyx

"""
Fill an array, context, with every _atomic_ value our features reference.
We then write the _actual features_ as tuples of the atoms. The machinery
that translates from the tuples to feature-extractors (which pick the values
out of "context") is in features/extractor.pyx

The atomic feature names are listed in a big enum, so that the feature tuples
can refer to them.
"""
from itertools import combinations

from ..tokens cimport TokenC
from ._state cimport State
from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2
from ._state cimport has_head, get_left, get_right
from ._state cimport count_left_kids, count_right_kids


cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
    if token is NULL:
        context[0] = 0
        context[1] = 0
        context[2] = 0
        context[3] = 0
        context[4] = 0
        context[5] = 0
        context[6] = 0
    else:
        context[0] = token.lex.sic
        context[1] = token.lemma
        context[2] = token.tag
        context[3] = token.lex.cluster
        # We've read in the string little-endian, so now we can take & (2**n)-1
        # to get the first n bits of the cluster.
        # e.g. s = "1110010101"
        # s = ''.join(reversed(s))
        # first_4_bits = int(s, 2)
        # print first_4_bits
        # 5
        # print "{0:b}".format(prefix).ljust(4, '0')
        # 1110
        # What we're doing here is picking a number where all bits are 1, e.g.
        # 15 is 1111, 63 is 111111 and doing bitwise AND, so getting all bits in
        # the source that are set to 1.
        context[4] = token.lex.cluster & 63
        context[5] = token.lex.cluster & 15
        context[6] = token.dep if has_head(token) else 0


cdef int fill_context(atom_t* context, State* state) except -1:
    # This fills in the basic properties of each of our "slot" tokens, e.g.
    # word on top of the stack, word at the front of the buffer, etc.
    fill_token(&context[S2w], get_s2(state))
    fill_token(&context[S1w], get_s1(state))
    fill_token(&context[S1rw], get_right(state, get_s1(state), 1))
    fill_token(&context[S0lw], get_left(state, get_s0(state), 1))
    fill_token(&context[S0l2w], get_left(state, get_s0(state), 2))
    fill_token(&context[S0w], get_s0(state))
    fill_token(&context[S0r2w], get_right(state, get_s0(state), 2))
    fill_token(&context[S0rw], get_right(state, get_s0(state), 1))
    fill_token(&context[N0lw], get_left(state, get_n0(state), 1))
    fill_token(&context[N0l2w], get_left(state, get_n0(state), 2))
    fill_token(&context[N0w], get_n0(state))
    fill_token(&context[N1w], get_n1(state))
    fill_token(&context[N2w], get_n2(state))

    if state.stack_len >= 1:
        context[dist] = state.stack[0] - state.i
    else:
        context[dist] = 0
    context[N0lv] = max(count_left_kids(get_n0(state)), 5)
    context[S0lv] = max(count_left_kids(get_s0(state)), 5)
    context[S0rv] = max(count_right_kids(get_s0(state)), 5)
    context[S1lv] = max(count_left_kids(get_s1(state)), 5)
    context[S1rv] = max(count_right_kids(get_s1(state)), 5)

    context[S0_has_head] = 0
    context[S1_has_head] = 0
    context[S2_has_head] = 0
    if state.stack_len >= 1:
        context[S0_has_head] = has_head(get_s0(state)) + 1
        if state.stack_len >= 2:
            context[S1_has_head] = has_head(get_s1(state)) + 1
            if state.stack_len >= 3:
                context[S2_has_head] = has_head(get_s2(state))

unigrams = (
    (S2W, S2p),
    (S2c6, S2p),
    
    (S1W, S1p),
    (S1c6, S1p),

    (S0W, S0p),
    (S0c6, S0p),
 
    (N0W, N0p),
    (N0p,),
    (N0c,),
    (N0c6, N0p),
    (N0L,),
 
    (N1W, N1p),
    (N1c6, N1p),
 
    (N2W, N2p),
    (N2c6, N2p),

    (S0r2W, S0r2p),
    (S0r2c6, S0r2p),
    (S0r2L,),

    (S0rW, S0rp),
    (S0rc6, S0rp),
    (S0rL,),

    (S0l2W, S0l2p),
    (S0l2c6, S0l2p),
    (S0l2L,),

    (S0lW, S0lp),
    (S0lc6, S0lp),
    (S0lL,),

    (N0l2W, N0l2p),
    (N0l2c6, N0l2p),
    (N0l2L,),

    (N0lW, N0lp),
    (N0lc6, N0lp),
    (N0lL,),
)


s0_n0 = (
    (S0W, S0p, N0W, N0p),
    (S0c, S0p, N0c, N0p),
    (S0c6, S0p, N0c6, N0p),
    (S0c4, S0p, N0c4, N0p),
    (S0p, N0p),
    (S0W, N0p),
    (S0p, N0W),
    (S0W, N0c),
    (S0c, N0W),
    (S0p, N0c),
    (S0c, N0p),
    (S0W, S0rp, N0p),
    (S0p, S0rp, N0p),
    (S0p, N0lp, N0W),
    (S0p, N0lp, N0p),
)


s1_n0 = (
    (S1p, N0p),
    (S1c, N0c),
    (S1c, N0p),
    (S1p, N0c),
    (S1W, S1p, N0p),
    (S1p, N0W, N0p),
    (S1c6, S1p, N0c6, N0p),
)


s0_n1 = (
    (S0p, N1p),
    (S0c, N1c),
    (S0c, N1p),
    (S0p, N1c),
    (S0W, S0p, N1p),
    (S0p, N1W, N1p),
    (S0c6, S0p, N1c6, N1p),
)

n0_n1 = (
    (N0W, N0p, N1W, N1p),
    (N0W, N0p, N1p),
    (N0p, N1W, N1p),
    (N0c, N0p, N1c, N1p),
    (N0c6, N0p, N1c6, N1p),
    (N0c, N1c),
    (N0p, N1c),
)

tree_shape = (
    (dist,),
    (S0p, S0_has_head, S1_has_head, S2_has_head),
    (S0p, S0lv, S0rv),
    (N0p, N0lv),
)

trigrams = (
    (N0p, N1p, N2p),
    (S0p, S0lp, S0l2p),
    (S0p, S0rp, S0r2p),
    (S0p, S1p, S2p),
    (S1p, S0p, N0p),
    (S0p, S0lp, N0p),
    (S0p, N0p, N0lp),
    (N0p, N0lp, N0l2p),
    
    (S0W, S0p, S0rL, S0r2L),
    (S0p, S0rL, S0r2L),

    (S0W, S0p, S0lL, S0l2L),
    (S0p, S0lL, S0l2L),

    (N0W, N0p, N0lL, N0l2L),
    (N0p, N0lL, N0l2L),
)
 

arc_eager = (
    (S0w, S0p),
    (S0w,),
    (S0p,),
    (N0w, N0p),
    (N0w,),
    (N0p,),
    (N1w, N1p),
    (N1w,),
    (N1p,),
    (N2w, N2p),
    (N2w,),
    (N2p,),
    (S0w, S0p, N0w, N0p),
    (S0w, S0p, N0w),
    (S0w, N0w, N0p),
    (S0w, S0p, N0p),
    (S0p, N0w, N0p),
    (S0w, N0w),
    (S0p, N0p),
    (N0p, N1p),
    (N0p, N1p, N2p),
    (S0p, N0p, N1p),
    (S1p, S0p, N0p),
    (S0p, S0lp, N0p),
    (S0p, S0rp, N0p),
    (S0p, N0p, N0lp),
    (dist, S0w),
    (dist, S0p),
    (dist, N0w),
    (dist, N0p),
    (dist, S0w, N0w),
    (dist, S0p, N0p),
    (S0w, S0rv),
    (S0p, S0rv),
    (S0w, S0lv),
    (S0p, S0lv),
    (N0w, N0lv),
    (N0p, N0lv),
    (S1w,),
    (S1p,),
    (S0lw,),
    (S0lp,),
    (S0rw,),
    (S0rp,),
    (N0lw,),
    (N0lp,),
    (S2w,),
    (S2p,),
    (S0l2w,),
    (S0l2p,),
    (S0r2w,),
    (S0r2p,),
    (N0l2w,),
    (N0l2p,),
    (S0p, S0lp, S0l2p),
    (S0p, S0rp, S0r2p),
    (S0p, S1p, S2p),
    (N0p, N0lp, N0l2p),
    (S0L,),
    (S0lL,),
    (S0rL,),
    (N0lL,),
    (S1L,),
    (S0l2L,),
    (S0r2L,),
    (N0l2L,),
    (S0w, S0rL, S0r2L),
    (S0p, S0rL, S0r2L),
    (S0w, S0lL, S0l2L),
    (S0p, S0lL, S0l2L),
    (N0w, N0lL, N0l2L),
    (N0p, N0lL, N0l2L),
)


label_sets = (
   (S0w, S0lL, S0l2L),
   (S0p, S0rL, S0r2L),
   (S0p, S0lL, S0l2L),
   (S0p, S0rL, S0r2L),
   (N0w, N0lL, N0l2L),
   (N0p, N0lL, N0l2L),
)

extra_labels = (
    (S0p, S0lL, S0lp),
    (S0p, S0lL, S0l2L),
    (S0p, S0rL, S0rp),
    (S0p, S0rL, S0r2L),
    (S0p, S0lL, S0rL),
    (S1p, S0L, S0rL),
    (S1p, S0L, S0lL),
)


# Koo et al (2008) dependency features, using Brown clusters.
clusters = (
    # Koo et al have (head, child) --- we have S0, N0 for both.
    (S0c4, N0c4),
    (S0c6, N0c6),
    (S0c, N0c),
    (S0p, N0c4),
    (S0p, N0c6),
    (S0p, N0c),
    (S0c4, N0p),
    (S0c6, N0p),
    (S0c, N0p),
    # Siblings --- right arc
    (S0c4, S0rc4, N0c4),
    (S0c6, S0rc6, N0c6),
    (S0p, S0rc4, N0c4),
    (S0c4, S0rp, N0c4),
    (S0c4, S0rc4, N0p),
    # Siblings --- left arc
    (S0c4, N0lc4, N0c4),
    (S0c6, N0c6, N0c6),
    (S0c4, N0lc4, N0p),
    (S0c4, N0lp, N0c4),
    (S0p, N0lc4, N0c4),
    # Grand-child, right-arc
    (S1c4, S0c4, N0c4),
    (S1c6, S0c6, N0c6),
    (S1p, S0c4, N0c4),
    (S1c4, S0p, N0c4),
    (S1c4, S0c4, N0p),
    # Grand-child, left-arc
    (S0lc4, S0c4, N0c4),
    (S0lc6, S0c6, N0c6),
    (S0lp, S0c4, N0c4),
    (S0lc4, S0p, N0c4),
    (S0lc4, S0c4, N0p)
)


hasty = s0_n0 + n0_n1 + trigrams


def pos_bigrams():
    kernels = [S2w, S1w, S0w, S0lw, S0rw, N0w, N0lw, N1w]
    bitags = []
    for t1, t2 in combinations(kernels, 2):
        feat = (t1 + 1, t2 + 1)
        bitags.append(feat)
    print "Adding %d bitags" % len(bitags)
    return tuple(bitags)
* Work on greedy parser 2014-12-16 11:44:43 +00:00			`"""`
			`Fill an array, context, with every _atomic_ value our features reference.`
			`We then write the _actual features_ as tuples of the atoms. The machinery`
			`that translates from the tuples to feature-extractors (which pick the values`
			`out of "context") is in features/extractor.pyx`

			`The atomic feature names are listed in a big enum, so that the feature tuples`
			`can refer to them.`
			`"""`
			`from itertools import combinations`

			`from ..tokens cimport TokenC`
			`from ._state cimport State`
			`from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2`
* Upd docstrings 2014-12-27 07:45:16 +00:00			`from ._state cimport has_head, get_left, get_right`
			`from ._state cimport count_left_kids, count_right_kids`
* Work on greedy parser 2014-12-16 11:44:43 +00:00

			`cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:`
* Work on greedy parser 2014-12-16 16:19:43 +00:00			`if token is NULL:`
			`context[0] = 0`
			`context[1] = 0`
			`context[2] = 0`
			`context[3] = 0`
			`context[4] = 0`
			`context[5] = 0`
* Upd docstrings 2014-12-27 07:45:16 +00:00			`context[6] = 0`
* Work on greedy parser 2014-12-16 16:19:43 +00:00			`else:`
			`context[0] = token.lex.sic`
* Upd docstrings 2014-12-27 07:45:16 +00:00			`context[1] = token.lemma`
* Work on word vectors, and other stuff 2015-01-17 05:21:17 +00:00			`context[2] = token.tag`
* Upd docstrings 2014-12-27 07:45:16 +00:00			`context[3] = token.lex.cluster`
* Work on greedy parser 2014-12-16 16:19:43 +00:00			`# We've read in the string little-endian, so now we can take & (2**n)-1`
			`# to get the first n bits of the cluster.`
			`# e.g. s = "1110010101"`
			`# s = ''.join(reversed(s))`
			`# first_4_bits = int(s, 2)`
			`# print first_4_bits`
			`# 5`
			`# print "{0:b}".format(prefix).ljust(4, '0')`
			`# 1110`
			`# What we're doing here is picking a number where all bits are 1, e.g.`
			`# 15 is 1111, 63 is 111111 and doing bitwise AND, so getting all bits in`
			`# the source that are set to 1.`
* Upd docstrings 2014-12-27 07:45:16 +00:00			`context[4] = token.lex.cluster & 63`
			`context[5] = token.lex.cluster & 15`
* Work on word vectors, and other stuff 2015-01-17 05:21:17 +00:00			`context[6] = token.dep if has_head(token) else 0`
* Work on greedy parser 2014-12-16 11:44:43 +00:00

			`cdef int fill_context(atom_t* context, State* state) except -1:`
			`# This fills in the basic properties of each of our "slot" tokens, e.g.`
			`# word on top of the stack, word at the front of the buffer, etc.`
			`fill_token(&context[S2w], get_s2(state))`
			`fill_token(&context[S1w], get_s1(state))`
* Work on greedy parser 2014-12-16 16:19:43 +00:00			`fill_token(&context[S1rw], get_right(state, get_s1(state), 1))`
			`fill_token(&context[S0lw], get_left(state, get_s0(state), 1))`
			`fill_token(&context[S0l2w], get_left(state, get_s0(state), 2))`
* Work on greedy parser 2014-12-16 11:44:43 +00:00			`fill_token(&context[S0w], get_s0(state))`
* Work on greedy parser 2014-12-16 16:19:43 +00:00			`fill_token(&context[S0r2w], get_right(state, get_s0(state), 2))`
			`fill_token(&context[S0rw], get_right(state, get_s0(state), 1))`
* Work on greedy parser 2014-12-17 10:09:29 +00:00			`fill_token(&context[N0lw], get_left(state, get_n0(state), 1))`
			`fill_token(&context[N0l2w], get_left(state, get_n0(state), 2))`
* Work on greedy parser 2014-12-16 11:44:43 +00:00			`fill_token(&context[N0w], get_n0(state))`
* Work on greedy parser 2014-12-16 16:19:43 +00:00			`fill_token(&context[N1w], get_n1(state))`
			`fill_token(&context[N2w], get_n2(state))`
* Work on greedy parser 2014-12-16 11:44:43 +00:00
* Work on greedy parser 2014-12-16 16:19:43 +00:00			`if state.stack_len >= 1:`
			`context[dist] = state.stack[0] - state.i`
			`else:`
			`context[dist] = 0`
* Upd docstrings 2014-12-27 07:45:16 +00:00			`context[N0lv] = max(count_left_kids(get_n0(state)), 5)`
			`context[S0lv] = max(count_left_kids(get_s0(state)), 5)`
			`context[S0rv] = max(count_right_kids(get_s0(state)), 5)`
			`context[S1lv] = max(count_left_kids(get_s1(state)), 5)`
			`context[S1rv] = max(count_right_kids(get_s1(state)), 5)`
* Work on greedy parser 2014-12-16 11:44:43 +00:00
* Upd docstrings 2014-12-27 07:45:16 +00:00			`context[S0_has_head] = 0`
			`context[S1_has_head] = 0`
			`context[S2_has_head] = 0`
			`if state.stack_len >= 1:`
			`context[S0_has_head] = has_head(get_s0(state)) + 1`
			`if state.stack_len >= 2:`
			`context[S1_has_head] = has_head(get_s1(state)) + 1`
			`if state.stack_len >= 3:`
			`context[S2_has_head] = has_head(get_s2(state))`

			`unigrams = (`
			`(S2W, S2p),`
			`(S2c6, S2p),`

			`(S1W, S1p),`
			`(S1c6, S1p),`

			`(S0W, S0p),`
			`(S0c6, S0p),`

			`(N0W, N0p),`
			`(N0p,),`
			`(N0c,),`
			`(N0c6, N0p),`
			`(N0L,),`

			`(N1W, N1p),`
			`(N1c6, N1p),`

			`(N2W, N2p),`
			`(N2c6, N2p),`

			`(S0r2W, S0r2p),`
			`(S0r2c6, S0r2p),`
			`(S0r2L,),`

			`(S0rW, S0rp),`
			`(S0rc6, S0rp),`
			`(S0rL,),`

			`(S0l2W, S0l2p),`
			`(S0l2c6, S0l2p),`
			`(S0l2L,),`

			`(S0lW, S0lp),`
			`(S0lc6, S0lp),`
			`(S0lL,),`

			`(N0l2W, N0l2p),`
			`(N0l2c6, N0l2p),`
			`(N0l2L,),`

			`(N0lW, N0lp),`
			`(N0lc6, N0lp),`
			`(N0lL,),`
			`)`


			`s0_n0 = (`
			`(S0W, S0p, N0W, N0p),`
			`(S0c, S0p, N0c, N0p),`
			`(S0c6, S0p, N0c6, N0p),`
			`(S0c4, S0p, N0c4, N0p),`
			`(S0p, N0p),`
			`(S0W, N0p),`
			`(S0p, N0W),`
			`(S0W, N0c),`
			`(S0c, N0W),`
			`(S0p, N0c),`
			`(S0c, N0p),`
			`(S0W, S0rp, N0p),`
			`(S0p, S0rp, N0p),`
			`(S0p, N0lp, N0W),`
			`(S0p, N0lp, N0p),`
			`)`


			`s1_n0 = (`
			`(S1p, N0p),`
			`(S1c, N0c),`
			`(S1c, N0p),`
			`(S1p, N0c),`
			`(S1W, S1p, N0p),`
			`(S1p, N0W, N0p),`
			`(S1c6, S1p, N0c6, N0p),`
			`)`


			`s0_n1 = (`
			`(S0p, N1p),`
			`(S0c, N1c),`
			`(S0c, N1p),`
			`(S0p, N1c),`
			`(S0W, S0p, N1p),`
			`(S0p, N1W, N1p),`
			`(S0c6, S0p, N1c6, N1p),`
			`)`

			`n0_n1 = (`
			`(N0W, N0p, N1W, N1p),`
			`(N0W, N0p, N1p),`
			`(N0p, N1W, N1p),`
			`(N0c, N0p, N1c, N1p),`
			`(N0c6, N0p, N1c6, N1p),`
			`(N0c, N1c),`
			`(N0p, N1c),`
			`)`

			`tree_shape = (`
			`(dist,),`
			`(S0p, S0_has_head, S1_has_head, S2_has_head),`
			`(S0p, S0lv, S0rv),`
			`(N0p, N0lv),`
			`)`

			`trigrams = (`
			`(N0p, N1p, N2p),`
			`(S0p, S0lp, S0l2p),`
			`(S0p, S0rp, S0r2p),`
			`(S0p, S1p, S2p),`
			`(S1p, S0p, N0p),`
			`(S0p, S0lp, N0p),`
			`(S0p, N0p, N0lp),`
			`(N0p, N0lp, N0l2p),`

			`(S0W, S0p, S0rL, S0r2L),`
			`(S0p, S0rL, S0r2L),`

			`(S0W, S0p, S0lL, S0l2L),`
			`(S0p, S0lL, S0l2L),`

			`(N0W, N0p, N0lL, N0l2L),`
			`(N0p, N0lL, N0l2L),`
			`)`

* Work on greedy parser 2014-12-16 11:44:43 +00:00
			`arc_eager = (`
			`(S0w, S0p),`
			`(S0w,),`
			`(S0p,),`
			`(N0w, N0p),`
			`(N0w,),`
			`(N0p,),`
			`(N1w, N1p),`
			`(N1w,),`
			`(N1p,),`
			`(N2w, N2p),`
			`(N2w,),`
			`(N2p,),`
			`(S0w, S0p, N0w, N0p),`
			`(S0w, S0p, N0w),`
			`(S0w, N0w, N0p),`
			`(S0w, S0p, N0p),`
			`(S0p, N0w, N0p),`
			`(S0w, N0w),`
			`(S0p, N0p),`
			`(N0p, N1p),`
			`(N0p, N1p, N2p),`
			`(S0p, N0p, N1p),`
			`(S1p, S0p, N0p),`
			`(S0p, S0lp, N0p),`
			`(S0p, S0rp, N0p),`
			`(S0p, N0p, N0lp),`
			`(dist, S0w),`
			`(dist, S0p),`
			`(dist, N0w),`
			`(dist, N0p),`
			`(dist, S0w, N0w),`
			`(dist, S0p, N0p),`
			`(S0w, S0rv),`
			`(S0p, S0rv),`
			`(S0w, S0lv),`
			`(S0p, S0lv),`
			`(N0w, N0lv),`
			`(N0p, N0lv),`
			`(S1w,),`
			`(S1p,),`
			`(S0lw,),`
			`(S0lp,),`
			`(S0rw,),`
			`(S0rp,),`
			`(N0lw,),`
			`(N0lp,),`
			`(S2w,),`
			`(S2p,),`
			`(S0l2w,),`
			`(S0l2p,),`
			`(S0r2w,),`
			`(S0r2p,),`
			`(N0l2w,),`
			`(N0l2p,),`
			`(S0p, S0lp, S0l2p),`
			`(S0p, S0rp, S0r2p),`
			`(S0p, S1p, S2p),`
			`(N0p, N0lp, N0l2p),`
			`(S0L,),`
			`(S0lL,),`
			`(S0rL,),`
			`(N0lL,),`
			`(S1L,),`
			`(S0l2L,),`
			`(S0r2L,),`
			`(N0l2L,),`
			`(S0w, S0rL, S0r2L),`
			`(S0p, S0rL, S0r2L),`
			`(S0w, S0lL, S0l2L),`
			`(S0p, S0lL, S0l2L),`
			`(N0w, N0lL, N0l2L),`
			`(N0p, N0lL, N0l2L),`
			`)`


			`label_sets = (`
			`(S0w, S0lL, S0l2L),`
			`(S0p, S0rL, S0r2L),`
			`(S0p, S0lL, S0l2L),`
			`(S0p, S0rL, S0r2L),`
			`(N0w, N0lL, N0l2L),`
			`(N0p, N0lL, N0l2L),`
			`)`

			`extra_labels = (`
			`(S0p, S0lL, S0lp),`
			`(S0p, S0lL, S0l2L),`
			`(S0p, S0rL, S0rp),`
			`(S0p, S0rL, S0r2L),`
			`(S0p, S0lL, S0rL),`
			`(S1p, S0L, S0rL),`
			`(S1p, S0L, S0lL),`
			`)`


			`# Koo et al (2008) dependency features, using Brown clusters.`
			`clusters = (`
			`# Koo et al have (head, child) --- we have S0, N0 for both.`
			`(S0c4, N0c4),`
			`(S0c6, N0c6),`
			`(S0c, N0c),`
			`(S0p, N0c4),`
			`(S0p, N0c6),`
			`(S0p, N0c),`
			`(S0c4, N0p),`
			`(S0c6, N0p),`
			`(S0c, N0p),`
			`# Siblings --- right arc`
			`(S0c4, S0rc4, N0c4),`
			`(S0c6, S0rc6, N0c6),`
			`(S0p, S0rc4, N0c4),`
			`(S0c4, S0rp, N0c4),`
			`(S0c4, S0rc4, N0p),`
			`# Siblings --- left arc`
			`(S0c4, N0lc4, N0c4),`
			`(S0c6, N0c6, N0c6),`
			`(S0c4, N0lc4, N0p),`
			`(S0c4, N0lp, N0c4),`
			`(S0p, N0lc4, N0c4),`
			`# Grand-child, right-arc`
			`(S1c4, S0c4, N0c4),`
			`(S1c6, S0c6, N0c6),`
			`(S1p, S0c4, N0c4),`
			`(S1c4, S0p, N0c4),`
			`(S1c4, S0c4, N0p),`
			`# Grand-child, left-arc`
			`(S0lc4, S0c4, N0c4),`
			`(S0lc6, S0c6, N0c6),`
			`(S0lp, S0c4, N0c4),`
			`(S0lc4, S0p, N0c4),`
			`(S0lc4, S0c4, N0p)`
			`)`


* Refactor _ml.Model, and finish implementing HastyModel so far not worthwhile. 2014-12-31 08:40:59 +00:00			`hasty = s0_n0 + n0_n1 + trigrams`


* Work on greedy parser 2014-12-16 11:44:43 +00:00			`def pos_bigrams():`
			`kernels = [S2w, S1w, S0w, S0lw, S0rw, N0w, N0lw, N1w]`
			`bitags = []`
			`for t1, t2 in combinations(kernels, 2):`
			`feat = (t1 + 1, t2 + 1)`
			`bitags.append(feat)`
			`print "Adding %d bitags" % len(bitags)`
			`return tuple(bitags)`