spaCy/spacy/syntax/stateclass.pyx

from libc.string cimport memcpy, memset
from libc.stdint cimport uint32_t
from ..vocab cimport EMPTY_LEXEME
from ..structs cimport Entity
from ..lexeme cimport Lexeme
from ..symbols cimport punct
from ..attrs cimport IS_SPACE


cdef class StateClass:
    def __init__(self, int length):
        cdef Pool mem = Pool()
        cdef int PADDING = 5
        self._buffer = <int*>mem.alloc(length + (PADDING * 2), sizeof(int))
        self._stack = <int*>mem.alloc(length + (PADDING * 2), sizeof(int))
        self.shifted = <bint*>mem.alloc(length + (PADDING * 2), sizeof(bint))
        self._sent = <TokenC*>mem.alloc(length + (PADDING * 2), sizeof(TokenC))
        self._ents = <Entity*>mem.alloc(length + (PADDING * 2), sizeof(Entity))
        cdef int i
        for i in range(length + (PADDING * 2)):
            self._ents[i].end = -1
            self._sent[i].l_edge = i
            self._sent[i].r_edge = i
        for i in range(length, length + (PADDING * 2)):
            self._sent[i].lex = &EMPTY_LEXEME
        self._sent += PADDING
        self._ents += PADDING
        self._buffer += PADDING
        self._stack += PADDING
        self.shifted += PADDING
        self.mem = mem
        self.length = length
        self._break = -1
        self._s_i = 0
        self._b_i = 0
        self._e_i = 0
        for i in range(length):
            self._buffer[i] = i
        self._empty_token.lex = &EMPTY_LEXEME

    @property
    def stack(self):
        return {self.S(i) for i in range(self._s_i)}

    @property
    def queue(self):
        return {self.B(i) for i in range(self._b_i)}

    cdef int E(self, int i) nogil:
        self.c.E(i)
        if self._e_i <= 0 or self._e_i >= self.length:
            return 0
        if i < 0 or i >= self._e_i:
            return 0
        return self._ents[self._e_i - (i+1)].start

    cdef int L(self, int i, int idx) nogil:
        self.c.L(i, idx)
        if idx < 1:
            return -1
        if i < 0 or i >= self.length:
            return -1
        cdef const TokenC* target = &self._sent[i]
        if target.l_kids < idx:
            return -1
        cdef const TokenC* ptr = &self._sent[target.l_edge]

        while ptr < target:
            # If this head is still to the right of us, we can skip to it
            # No token that's between this token and this head could be our
            # child.
            if (ptr.head >= 1) and (ptr + ptr.head) < target:
                ptr += ptr.head

            elif ptr + ptr.head == target:
                idx -= 1
                if idx == 0:
                    return ptr - self._sent
                ptr += 1
            else:
                ptr += 1
        return -1

    cdef int R(self, int i, int idx) nogil:
        self.c.R(i, idx)
        if idx < 1:
            return -1
        if i < 0 or i >= self.length:
            return -1
        cdef const TokenC* target = &self._sent[i]
        if target.r_kids < idx:
            return -1
        cdef const TokenC* ptr = &self._sent[target.r_edge]
        while ptr > target:
            # If this head is still to the right of us, we can skip to it
            # No token that's between this token and this head could be our
            # child.
            if (ptr.head < 0) and ((ptr + ptr.head) > target):
                ptr += ptr.head
            elif ptr + ptr.head == target:
                idx -= 1
                if idx == 0:
                    return ptr - self._sent
                ptr -= 1
            else:
                ptr -= 1
        return -1

    cdef void push(self) nogil:
        self.c.push()
        if self.B(0) != -1:
            self._stack[self._s_i] = self.B(0)
        self._s_i += 1
        self._b_i += 1
        if self._b_i > self._break:
            self._break = -1

    cdef void pop(self) nogil:
        self.c.pop()
        if self._s_i >= 1:
            self._s_i -= 1

    cdef void unshift(self) nogil:
        self.c.unshift()
        self._b_i -= 1
        self._buffer[self._b_i] = self.S(0)
        self._s_i -= 1
        self.shifted[self.B(0)] = True

    cdef void fast_forward(self) nogil:
        self.c.fast_forward()
        while self.buffer_length() == 0 \
        or self.stack_depth() == 0 \
        or Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE):
            if self.buffer_length() == 1 and self.stack_depth() == 0:
                self.push()
                self.pop()
            elif self.buffer_length() == 0 and self.stack_depth() == 1:
                self.pop()
            elif self.buffer_length() == 0 and self.stack_depth() >= 2:
                if self.has_head(self.S(0)):
                    self.pop()
                else:
                    self.unshift()
            elif (self.length - self._b_i) >= 1 and self.stack_depth() == 0:
                self.push()
            elif Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE):
                self.add_arc(self.B(0), self.S(0), 0)
                self.pop()
            else:
                break

    cdef void add_arc(self, int head, int child, int label) nogil:
        self.c.add_arc(head, child, label)
        if self.has_head(child):
            self.del_arc(self.H(child), child)

        cdef int dist = head - child
        self._sent[child].head = dist
        self._sent[child].dep = label
        cdef int i
        if child > head:
            self._sent[head].r_kids += 1
            # Some transition systems can have a word in the buffer have a
            # rightward child, e.g. from Unshift.
            self._sent[head].r_edge = self._sent[child].r_edge
            i = 0
            while self.has_head(head) and i < self.length:
                head = self.H(head)
                self._sent[head].r_edge = self._sent[child].r_edge
                i += 1 # Guard against infinite loops
        else:
            self._sent[head].l_kids += 1
            self._sent[head].l_edge = self._sent[child].l_edge

    cdef void del_arc(self, int h_i, int c_i) nogil:
        self.c.del_arc(h_i, c_i)
        cdef int dist = h_i - c_i
        cdef TokenC* h = &self._sent[h_i]
        if c_i > h_i:
            h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 2 else h_i
            h.r_kids -= 1
        else:
            h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 2 else h_i
            h.l_kids -= 1

    cdef void open_ent(self, int label) nogil:
        self.c.open_ent(label)
        self._ents[self._e_i].start = self.B(0)
        self._ents[self._e_i].label = label
        self._ents[self._e_i].end = -1
        self._e_i += 1

    cdef void close_ent(self) nogil:
        self.c.close_ent()
        # Note that we don't decrement _e_i here! We want to maintain all
        # entities, not over-write them...
        self._ents[self._e_i-1].end = self.B(0)+1
        self._sent[self.B(0)].ent_iob = 1

    cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil:
        self.c.set_ent_tag(i, ent_iob, ent_type)
        if 0 <= i < self.length:
            self._sent[i].ent_iob = ent_iob
            self._sent[i].ent_type = ent_type

    cdef void set_break(self, int _) nogil:
        self.c.set_break(_)
        if 0 <= self.B(0) < self.length: 
            self._sent[self.B(0)].sent_start = True
            self._break = self._b_i

    cdef void clone(self, StateClass src) nogil:
        self.c.clone(src.c)
        memcpy(self._sent, src._sent, self.length * sizeof(TokenC))
        memcpy(self._stack, src._stack, self.length * sizeof(int))
        memcpy(self._buffer, src._buffer, self.length * sizeof(int))
        memcpy(self._ents, src._ents, self.length * sizeof(Entity))
        self._b_i = src._b_i
        self._s_i = src._s_i
        self._e_i = src._e_i
        self._break = src._break

    def print_state(self, words):
        words = list(words) + ['_']
        top = words[self.S(0)] + '_%d' % self.S_(0).head
        second = words[self.S(1)] + '_%d' % self.S_(1).head
        third = words[self.S(2)] + '_%d' % self.S_(2).head
        n0 = words[self.B(0)] 
        n1 = words[self.B(1)] 
        return ' '.join((third, second, top, '|', n0, n1))
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`from libc.string cimport memcpy, memset`
			`from libc.stdint cimport uint32_t`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`from ..vocab cimport EMPTY_LEXEME`
* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 02:20:23 +00:00			`from ..structs cimport Entity`
* Unwind limit to sentence boundary detection that prevents it from inserting boundaries on whitespace. Replace it with a check for whitespace in StateClass.fast_forward, so that whitespace is LeftArced when it's on the stack. This should prevent the previous problem of whitespace-only sentences. Should fix Issue #184, but may cause further problems. Needs testing. 2016-01-19 01:54:15 +00:00			`from ..lexeme cimport Lexeme`
			`from ..symbols cimport punct`
			`from ..attrs cimport IS_SPACE`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00

			`cdef class StateClass:`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`def __init__(self, int length):`
			`cdef Pool mem = Pool()`
* Pad buffers in state 2015-06-28 08:36:14 +00:00			`cdef int PADDING = 5`
			`self._buffer = <int>mem.alloc(length + (PADDING 2), sizeof(int))`
			`self._stack = <int>mem.alloc(length + (PADDING 2), sizeof(int))`
			`self.shifted = <bint>mem.alloc(length + (PADDING 2), sizeof(bint))`
			`self._sent = <TokenC>mem.alloc(length + (PADDING 2), sizeof(TokenC))`
			`self._ents = <Entity>mem.alloc(length + (PADDING 2), sizeof(Entity))`
* Fix handling of entities in StateClass 2015-06-16 21:35:21 +00:00			`cdef int i`
* Pad buffers in state 2015-06-28 08:36:14 +00:00			`for i in range(length + (PADDING * 2)):`
* Fix handling of entities in StateClass 2015-06-16 21:35:21 +00:00			`self._ents[i].end = -1`
* Fix L/R edge bug, by ensuring l_edge and r_edge are preset, and fixing the way the edge update in del_arc. Bugs keep arising here because the edges are absolute positions, where everything else is relative. I'm also not 100% convinced that del_arc is handled correctly. Do we need to update the parents? 2015-09-09 01:39:46 +00:00			`self._sent[i].l_edge = i`
			`self._sent[i].r_edge = i`
* Pad buffers in state 2015-06-28 08:36:14 +00:00			`for i in range(length, length + (PADDING * 2)):`
* Add padding to arrays in stateclass. May be papering over a deeper bug. 2015-06-23 01:03:22 +00:00			`self._sent[i].lex = &EMPTY_LEXEME`
* Pad buffers in state 2015-06-28 08:36:14 +00:00			`self._sent += PADDING`
			`self._ents += PADDING`
			`self._buffer += PADDING`
			`self._stack += PADDING`
			`self.shifted += PADDING`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`self.mem = mem`
			`self.length = length`
* Prepare for break transition, with fast-forwarding. 86.5 on 1k nw gold preproc 2015-06-10 12:08:30 +00:00			`self._break = -1`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`self._s_i = 0`
			`self._b_i = 0`
* Move StateClass into the interface for is_valid 2015-06-09 21:23:28 +00:00			`self._e_i = 0`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`for i in range(length):`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`self._buffer[i] = i`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`self._empty_token.lex = &EMPTY_LEXEME`

* Add stack and queue properties to stateclass, for python access 2015-08-08 21:32:42 +00:00			`@property`
			`def stack(self):`
			`return {self.S(i) for i in range(self._s_i)}`

			`@property`
			`def queue(self):`
* Fix stateclass.queue 2015-08-08 22:39:02 +00:00			`return {self.B(i) for i in range(self._b_i)}`
* Add stack and queue properties to stateclass, for python access 2015-08-08 21:32:42 +00:00
* Move StateClass into the interface for is_valid 2015-06-09 21:23:28 +00:00			`cdef int E(self, int i) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.E(i)`
* Add some bounds checking around state arrays 2015-06-23 02:13:09 +00:00			`if self._e_i <= 0 or self._e_i >= self.length:`
* Fix bounds checking on entities 2015-06-23 02:35:08 +00:00			`return 0`
* Correct screw ups from the previous commits 2015-11-06 19:51:41 +00:00			`if i < 0 or i >= self._e_i:`
* Fix bounds checking on entities 2015-06-23 02:35:08 +00:00			`return 0`
* Fix bone-headed mistake in StateClass.E 2015-11-06 20:35:28 +00:00			`return self._ents[self._e_i - (i+1)].start`
* Move StateClass into the interface for is_valid 2015-06-09 21:23:28 +00:00
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`cdef int L(self, int i, int idx) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.L(i, idx)`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`if idx < 1:`
			`return -1`
			`if i < 0 or i >= self.length:`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`return -1`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`cdef const TokenC* target = &self._sent[i]`
* Check valency in L and R feature methods, to make feaure calculation faster 2015-06-28 22:17:29 +00:00			`if target.l_kids < idx:`
			`return -1`
* Improve efficiency of L and R features, correcting the non-linear-in-length problem. 2015-07-09 10:17:26 +00:00			`cdef const TokenC* ptr = &self._sent[target.l_edge]`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00
			`while ptr < target:`
			`# If this head is still to the right of us, we can skip to it`
			`# No token that's between this token and this head could be our`
			`# child.`
			`if (ptr.head >= 1) and (ptr + ptr.head) < target:`
			`ptr += ptr.head`

			`elif ptr + ptr.head == target:`
			`idx -= 1`
			`if idx == 0:`
			`return ptr - self._sent`
			`ptr += 1`
			`else:`
			`ptr += 1`
			`return -1`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00
			`cdef int R(self, int i, int idx) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.R(i, idx)`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`if idx < 1:`
			`return -1`
			`if i < 0 or i >= self.length:`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`return -1`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`cdef const TokenC* target = &self._sent[i]`
* Check valency in L and R feature methods, to make feaure calculation faster 2015-06-28 22:17:29 +00:00			`if target.r_kids < idx:`
			`return -1`
* Improve efficiency of L and R features, correcting the non-linear-in-length problem. 2015-07-09 10:17:26 +00:00			`cdef const TokenC* ptr = &self._sent[target.r_edge]`
* Prepare to switch to using state class, instead of state struct 2015-06-09 19:20:14 +00:00			`while ptr > target:`
			`# If this head is still to the right of us, we can skip to it`
			`# No token that's between this token and this head could be our`
			`# child.`
			`if (ptr.head < 0) and ((ptr + ptr.head) > target):`
			`ptr += ptr.head`
			`elif ptr + ptr.head == target:`
			`idx -= 1`
			`if idx == 0:`
			`return ptr - self._sent`
			`ptr -= 1`
			`else:`
			`ptr -= 1`
			`return -1`

* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`cdef void push(self) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.push()`
* Add some bounds checking around state arrays 2015-06-23 02:13:09 +00:00			`if self.B(0) != -1:`
			`self._stack[self._s_i] = self.B(0)`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`self._s_i += 1`
			`self._b_i += 1`
* Fixes to unshift/fast-forward strategy. Getting 91.55 greedy on NW dev, gold preproc 2015-06-11 23:50:23 +00:00			`if self._b_i > self._break:`
* Prepare for break transition, with fast-forwarding. 86.5 on 1k nw gold preproc 2015-06-10 12:08:30 +00:00			`self._break = -1`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00
			`cdef void pop(self) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.pop()`
* Add some bounds checking around state arrays 2015-06-23 02:13:09 +00:00			`if self._s_i >= 1:`
			`self._s_i -= 1`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00
* Add unshift action to StateClass, and track which moves have been shifted 2015-06-10 08:13:03 +00:00			`cdef void unshift(self) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.unshift()`
* Add unshift action to StateClass, and track which moves have been shifted 2015-06-10 08:13:03 +00:00			`self._b_i -= 1`
			`self._buffer[self._b_i] = self.S(0)`
			`self._s_i -= 1`
* Fix shifted bit vector in stateclass --- should reflect whether the word has been unshifted. 2015-06-10 09:33:09 +00:00			`self.shifted[self.B(0)] = True`
* Add unshift action to StateClass, and track which moves have been shifted 2015-06-10 08:13:03 +00:00
* Prepare for break transition, with fast-forwarding. 86.5 on 1k nw gold preproc 2015-06-10 12:08:30 +00:00			`cdef void fast_forward(self) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.fast_forward()`
* Unwind limit to sentence boundary detection that prevents it from inserting boundaries on whitespace. Replace it with a check for whitespace in StateClass.fast_forward, so that whitespace is LeftArced when it's on the stack. This should prevent the previous problem of whitespace-only sentences. Should fix Issue #184, but may cause further problems. Needs testing. 2016-01-19 01:54:15 +00:00			`while self.buffer_length() == 0 \`
			`or self.stack_depth() == 0 \`
			`or Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE):`
* Prepare for break transition, with fast-forwarding. 86.5 on 1k nw gold preproc 2015-06-10 12:08:30 +00:00			`if self.buffer_length() == 1 and self.stack_depth() == 0:`
			`self.push()`
			`self.pop()`
			`elif self.buffer_length() == 0 and self.stack_depth() == 1:`
			`self.pop()`
			`elif self.buffer_length() == 0 and self.stack_depth() >= 2:`
			`if self.has_head(self.S(0)):`
			`self.pop()`
			`else:`
			`self.unshift()`
* Fixes to unshift/fast-forward strategy. Getting 91.55 greedy on NW dev, gold preproc 2015-06-11 23:50:23 +00:00			`elif (self.length - self._b_i) >= 1 and self.stack_depth() == 0:`
* Prepare for break transition, with fast-forwarding. 86.5 on 1k nw gold preproc 2015-06-10 12:08:30 +00:00			`self.push()`
* Unwind limit to sentence boundary detection that prevents it from inserting boundaries on whitespace. Replace it with a check for whitespace in StateClass.fast_forward, so that whitespace is LeftArced when it's on the stack. This should prevent the previous problem of whitespace-only sentences. Should fix Issue #184, but may cause further problems. Needs testing. 2016-01-19 01:54:15 +00:00			`elif Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE):`
			`self.add_arc(self.B(0), self.S(0), 0)`
			`self.pop()`
* Prepare for break transition, with fast-forwarding. 86.5 on 1k nw gold preproc 2015-06-10 12:08:30 +00:00			`else:`
			`break`

* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`cdef void add_arc(self, int head, int child, int label) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.add_arc(head, child, label)`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`if self.has_head(child):`
			`self.del_arc(self.H(child), child)`

			`cdef int dist = head - child`
			`self._sent[child].head = dist`
			`self._sent[child].dep = label`
* Fix edge features 2015-06-23 13:50:56 +00:00			`cdef int i`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`if child > head:`
* Fix valency features in StateClass 2015-06-14 15:50:26 +00:00			`self._sent[head].r_kids += 1`
* Bug fixes to edge calculation 2015-06-24 02:28:02 +00:00			`# Some transition systems can have a word in the buffer have a`
			`# rightward child, e.g. from Unshift.`
			`self._sent[head].r_edge = self._sent[child].r_edge`
* Fix edge features 2015-06-23 13:50:56 +00:00			`i = 0`
			`while self.has_head(head) and i < self.length:`
			`head = self.H(head)`
* Bug fixes to edge calculation 2015-06-24 02:28:02 +00:00			`self._sent[head].r_edge = self._sent[child].r_edge`
* Fix edge features 2015-06-23 13:50:56 +00:00			`i += 1 # Guard against infinite loops`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`else:`
* Fix valency features in StateClass 2015-06-14 15:50:26 +00:00			`self._sent[head].l_kids += 1`
* Fix edge features 2015-06-23 13:50:56 +00:00			`self._sent[head].l_edge = self._sent[child].l_edge`

			`cdef void del_arc(self, int h_i, int c_i) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.del_arc(h_i, c_i)`
* Fix edge features 2015-06-23 13:50:56 +00:00			`cdef int dist = h_i - c_i`
			`cdef TokenC* h = &self._sent[h_i]`
			`if c_i > h_i:`
* Fix L/R edge bug, by ensuring l_edge and r_edge are preset, and fixing the way the edge update in del_arc. Bugs keep arising here because the edges are absolute positions, where everything else is relative. I'm also not 100% convinced that del_arc is handled correctly. Do we need to update the parents? 2015-09-09 01:39:46 +00:00			`h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 2 else h_i`
* Fix edge features 2015-06-23 13:50:56 +00:00			`h.r_kids -= 1`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`else:`
* Fix L/R edge bug, by ensuring l_edge and r_edge are preset, and fixing the way the edge update in del_arc. Bugs keep arising here because the edges are absolute positions, where everything else is relative. I'm also not 100% convinced that del_arc is handled correctly. Do we need to update the parents? 2015-09-09 01:39:46 +00:00			`h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 2 else h_i`
* Fix edge features 2015-06-23 13:50:56 +00:00			`h.l_kids -= 1`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00
* Move StateClass into the interface for is_valid 2015-06-09 21:23:28 +00:00			`cdef void open_ent(self, int label) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.open_ent(label)`
* Fix bounds checking on entities 2015-06-23 02:35:08 +00:00			`self._ents[self._e_i].start = self.B(0)`
			`self._ents[self._e_i].label = label`
			`self._ents[self._e_i].end = -1`
			`self._e_i += 1`
* Move StateClass into the interface for is_valid 2015-06-09 21:23:28 +00:00
			`cdef void close_ent(self) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.close_ent()`
* Correct screw ups from the previous commits 2015-11-06 19:51:41 +00:00			`# Note that we don't decrement _e_i here! We want to maintain all`
			`# entities, not over-write them...`
			`self._ents[self._e_i-1].end = self.B(0)+1`
* Move StateClass into the interface for is_valid 2015-06-09 21:23:28 +00:00			`self._sent[self.B(0)].ent_iob = 1`

			`cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.set_ent_tag(i, ent_iob, ent_type)`
* Move StateClass into the interface for is_valid 2015-06-09 21:23:28 +00:00			`if 0 <= i < self.length:`
			`self._sent[i].ent_iob = ent_iob`
			`self._sent[i].ent_type = ent_type`

* Fixes to unshift/fast-forward strategy. Getting 91.55 greedy on NW dev, gold preproc 2015-06-11 23:50:23 +00:00			`cdef void set_break(self, int _) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.set_break(_)`
* Add some bounds checking around state arrays 2015-06-23 02:13:09 +00:00			`if 0 <= self.B(0) < self.length:`
* Rename sent_start to sent_end, to reflect its new usage in the Break transition 2015-06-23 03:39:23 +00:00			`self._sent[self.B(0)].sent_start = True`
* Add some bounds checking around state arrays 2015-06-23 02:13:09 +00:00			`self._break = self._b_i`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00
			`cdef void clone(self, StateClass src) nogil:`
* Shadow StateClass with StateC, to start proxying 2016-02-01 00:16:14 +00:00			`self.c.clone(src.c)`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`memcpy(self._sent, src._sent, self.length * sizeof(TokenC))`
			`memcpy(self._stack, src._stack, self.length * sizeof(int))`
			`memcpy(self._buffer, src._buffer, self.length * sizeof(int))`
* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 02:20:23 +00:00			`memcpy(self._ents, src._ents, self.length * sizeof(Entity))`
* Add StateClass, to replace/refactor the mess in _state 2015-06-08 23:39:54 +00:00			`self._b_i = src._b_i`
			`self._s_i = src._s_i`
* Move StateClass into the interface for is_valid 2015-06-09 21:23:28 +00:00			`self._e_i = src._e_i`
* Add some bounds checking around state arrays 2015-06-23 02:13:09 +00:00			`self._break = src._break`
* Move StateClass into interface of transition functions 2015-06-09 23:35:28 +00:00
			`def print_state(self, words):`
			`words = list(words) + ['_']`
* Add unshift action to StateClass, and track which moves have been shifted 2015-06-10 08:13:03 +00:00			`top = words[self.S(0)] + '_%d' % self.S_(0).head`
			`second = words[self.S(1)] + '_%d' % self.S_(1).head`
			`third = words[self.S(2)] + '_%d' % self.S_(2).head`
* Move StateClass into interface of transition functions 2015-06-09 23:35:28 +00:00			`n0 = words[self.B(0)]`
			`n1 = words[self.B(1)]`
* Upd stateclass.print_state 2015-06-14 15:44:29 +00:00			`return ' '.join((third, second, top, '\|', n0, n1))`