From 5e94b5d58187d45818d5b52dda243aa6b69eadf4 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 23 Jun 2015 00:07:06 +0200 Subject: [PATCH] * Have Tokens return proper numpy arrays, not Cython views. --- spacy/tokens.pxd | 4 ++-- spacy/tokens.pyx | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd index 9ddd126a1..8b3ff9fe9 100644 --- a/spacy/tokens.pxd +++ b/spacy/tokens.pxd @@ -1,7 +1,7 @@ from libc.stdint cimport uint32_t from numpy cimport ndarray -cimport numpy +cimport numpy as np from cymem.cymem cimport Pool from thinc.typedefs cimport atom_t @@ -47,7 +47,7 @@ cdef class Tokens: cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1 - cpdef long[:,:] to_array(self, object features) + cpdef np.ndarray to_array(self, object features) cdef int set_parse(self, const TokenC* parsed) except -1 diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index 3ee559dcf..3b132c4c9 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -18,7 +18,9 @@ from .structs cimport UniStr from unidecode import unidecode -cimport numpy +cimport numpy as np +np.import_array() + import numpy cimport cython @@ -207,7 +209,7 @@ cdef class Tokens: return idx + t.lex.length @cython.boundscheck(False) - cpdef long[:,:] to_array(self, object py_attr_ids): + cpdef np.ndarray to_array(self, object py_attr_ids): """Given a list of M attribute IDs, export the tokens to a numpy ndarray of shape N*M, where N is the length of the sentence. @@ -221,10 +223,10 @@ cdef class Tokens: """ cdef int i, j cdef attr_id_t feature - cdef numpy.ndarray[long, ndim=2] output + cdef np.ndarray[long, ndim=2] output # Make an array from the attributes --- otherwise our inner loop is Python # dict iteration. - cdef numpy.ndarray[long, ndim=1] attr_ids = numpy.asarray(py_attr_ids) + cdef np.ndarray[long, ndim=1] attr_ids = numpy.asarray(py_attr_ids) output = numpy.ndarray(shape=(self.length, len(attr_ids)), dtype=numpy.int) for i in range(self.length): for j, feature in enumerate(attr_ids): @@ -464,7 +466,9 @@ cdef class Token: property repvec: def __get__(self): - return numpy.asarray( self.c.lex.repvec) + cdef int length = self.vocab.repvec_length + repvec_view = self.c.lex.repvec + return numpy.asarray(repvec_view) property n_lefts: def __get__(self):