From e0a2aa92891af890738b6290cd622cc87fe652ac Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 16 Sep 2017 12:45:09 -0500 Subject: [PATCH] Support having word vectors data on GPU --- spacy/vectors.pyx | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 0eec5a00a..b912be80b 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -6,6 +6,8 @@ import msgpack import msgpack_numpy msgpack_numpy.patch() cimport numpy as np +from thinc.neural.util import get_array_module +from thinc.neural._classes.model import Model from .typedefs cimport attr_t from .strings cimport StringStore @@ -31,7 +33,7 @@ cdef class Vectors: self.i = 0 self.data = data self.key2row = {} - self.keys = np.ndarray((self.data.shape[0],), dtype='uint64') + self.keys = np.ndarray((self.data.shape[0],), dtype='uint64') def __reduce__(self): return (Vectors, (self.strings, self.data)) @@ -118,9 +120,14 @@ cdef class Vectors: self.data def to_disk(self, path, **exclude): + xp = get_array_module(self.data) + if xp is numpy: + save_array = lambda arr, file_: xp.save(file_, arr, allow_pickle=False) + else: + save_array = lambda arr, file_: xp.save(file_, arr) serializers = OrderedDict(( - ('vectors', lambda p: numpy.save(p.open('wb'), self.data, allow_pickle=False)), - ('keys', lambda p: numpy.save(p.open('wb'), self.keys, allow_pickle=False)), + ('vectors', lambda p: save_array(self.data, p.open('wb'))), + ('keys', lambda p: xp.save(p.open('wb'), self.keys)) )) return util.to_disk(path, serializers, exclude) @@ -133,8 +140,9 @@ cdef class Vectors: self.key2row[key] = i def load_vectors(path): + xp = Model.ops.xp if path.exists(): - self.data = numpy.load(path) + self.data = xp.load(path) serializers = OrderedDict(( ('keys', load_keys),