diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd index d1b2ef10b..36dee698e 100644 --- a/spacy/tokens.pxd +++ b/spacy/tokens.pxd @@ -1,3 +1,6 @@ +import numpy as np +cimport numpy as np + from cymem.cymem cimport Pool from .lexeme cimport Lexeme @@ -28,6 +31,8 @@ cdef class Tokens: cdef int push_back(self, int i, Lexeme* lexeme) except -1 cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1 + cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features) + cdef class Token: cdef StringStore _string_store diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index 721e6bb80..ba8812f2e 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -102,6 +102,16 @@ cdef class Tokens: elif tag_type == ENTITY: self.ner[i] = tag + cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features): + cdef int i, j + cdef np.ndarray[atom_t, ndim=2] output + output = np.ndarray(shape=(self.length, len(features)), dtype=int) + for i in range(self.length): + for j, feature in enumerate(features): + output[i, j] = self.lex[i].sic + #output[i, j] = lexeme_get_feature(self.lex[i], feature) + return output + def _realloc(self, new_size): self.max_length = new_size n = new_size + (PADDING * 2)