* Work on get_array method of Tokens

2014-12-02 23:48:05 +11:00 · 2014-12-02 23:48:05 +11:00 · 522bb0346e
parent 8c2938fe01
commit 522bb0346e
2 changed files with 15 additions and 0 deletions
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@ -1,3 +1,6 @@
 import numpy as np
 cimport numpy as np
 from cymem.cymem cimport Pool
 from .lexeme cimport Lexeme
@ -28,6 +31,8 @@ cdef class Tokens:
    cdef int push_back(self, int i, Lexeme* lexeme) except -1
    cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1
    cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features)
 cdef class Token:
    cdef StringStore _string_store
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@ -102,6 +102,16 @@ cdef class Tokens:
        elif tag_type == ENTITY:
            self.ner[i] = tag
    cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features):
        cdef int i, j
        cdef np.ndarray[atom_t, ndim=2] output
        output = np.ndarray(shape=(self.length, len(features)), dtype=int)
        for i in range(self.length):
            for j, feature in enumerate(features):
                output[i, j] = self.lex[i].sic
                #output[i, j] = lexeme_get_feature(self.lex[i], feature)
        return output
    def _realloc(self, new_size):
        self.max_length = new_size
        n = new_size + (PADDING * 2)