spaCy/spacy/syntax/stateclass.pyx

81 lines
2.3 KiB
Cython

# coding: utf-8
# cython: infer_types=True
from __future__ import unicode_literals
from libc.string cimport memcpy, memset
from libc.stdint cimport uint32_t, uint64_t
from ..vocab cimport EMPTY_LEXEME
from ..structs cimport Entity
from ..lexeme cimport Lexeme
from ..symbols cimport punct
from ..attrs cimport IS_SPACE
from ..attrs cimport attr_id_t
from ..tokens.token cimport Token
from ..tokens.doc cimport Doc
cdef class StateClass:
def __init__(self, Doc doc=None, int offset=0):
cdef Pool mem = Pool()
self.mem = mem
if doc is not None:
self.c = new StateC(doc.c, doc.length)
self.c.offset = offset
def __dealloc__(self):
del self.c
@property
def stack(self):
return {self.S(i) for i in range(self.c._s_i)}
@property
def queue(self):
return {self.B(i) for i in range(self.c.buffer_length())}
@property
def token_vector_lenth(self):
return self.doc.tensor.shape[1]
def is_final(self):
return self.c.is_final()
def copy(self):
cdef StateClass new_state = StateClass.init(self.c._sent, self.c.length)
new_state.c.clone(self.c)
return new_state
def print_state(self, words):
words = list(words) + ['_']
top = words[self.S(0)] + '_%d' % self.S_(0).head
second = words[self.S(1)] + '_%d' % self.S_(1).head
third = words[self.S(2)] + '_%d' % self.S_(2).head
n0 = words[self.B(0)]
n1 = words[self.B(1)]
return ' '.join((third, second, top, '|', n0, n1))
@classmethod
def nr_context_tokens(cls):
return 13
def set_context_tokens(self, int[::1] output):
output[0] = self.B(0)
output[1] = self.B(1)
output[2] = self.S(0)
output[3] = self.S(1)
output[4] = self.S(2)
output[5] = self.L(self.S(0), 1)
output[6] = self.L(self.S(0), 2)
output[6] = self.R(self.S(0), 1)
output[7] = self.L(self.B(0), 1)
output[8] = self.R(self.S(0), 2)
output[9] = self.L(self.S(1), 1)
output[10] = self.L(self.S(1), 2)
output[11] = self.R(self.S(1), 1)
output[12] = self.R(self.S(1), 2)
for i in range(13):
if output[i] != -1:
output[i] += self.c.offset