* Work on get_array method of Tokens

This commit is contained in:
Matthew Honnibal 2014-12-02 23:48:05 +11:00
parent 8c2938fe01
commit 522bb0346e
2 changed files with 15 additions and 0 deletions

View File

@ -1,3 +1,6 @@
import numpy as np
cimport numpy as np
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from .lexeme cimport Lexeme from .lexeme cimport Lexeme
@ -28,6 +31,8 @@ cdef class Tokens:
cdef int push_back(self, int i, Lexeme* lexeme) except -1 cdef int push_back(self, int i, Lexeme* lexeme) except -1
cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1 cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1
cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features)
cdef class Token: cdef class Token:
cdef StringStore _string_store cdef StringStore _string_store

View File

@ -102,6 +102,16 @@ cdef class Tokens:
elif tag_type == ENTITY: elif tag_type == ENTITY:
self.ner[i] = tag self.ner[i] = tag
cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features):
cdef int i, j
cdef np.ndarray[atom_t, ndim=2] output
output = np.ndarray(shape=(self.length, len(features)), dtype=int)
for i in range(self.length):
for j, feature in enumerate(features):
output[i, j] = self.lex[i].sic
#output[i, j] = lexeme_get_feature(self.lex[i], feature)
return output
def _realloc(self, new_size): def _realloc(self, new_size):
self.max_length = new_size self.max_length = new_size
n = new_size + (PADDING * 2) n = new_size + (PADDING * 2)