* Add way to load vectors from bz2 file to vocab

This commit is contained in:
Matthew Honnibal 2015-09-17 12:58:23 +10:00
parent 77856c4fcd
commit d6945bf880
1 changed files with 20 additions and 0 deletions

View File

@ -262,6 +262,26 @@ cdef class Vocab:
fp.close() fp.close()
def load_vectors(self, loc): def load_vectors(self, loc):
if loc.endswith('bz2'):
self.load_vectors_bz2(loc)
else:
self.load_vectors_bin(loc)
def load_vectors_bz2(self, loc):
cdef LexemeC* lexeme
cdef attr_t orth
with bz2.BZ2File(loc, 'r') as file_:
for line in file_:
pieces = line.split()
word_str = pieces.pop(0)
orth = self.strings[word_str]
lexeme = <LexemeC*><void*>self.get_by_orth(self.mem, orth)
lexeme.repvec = <float*>self.mem.alloc(len(pieces), sizeof(float))
for i, val_str in enumerate(pieces):
lexeme.repvec[i] = float(val_str)
def load_vectors_bin(self, loc):
cdef CFile file_ = CFile(loc, b'rb') cdef CFile file_ = CFile(loc, b'rb')
cdef int32_t word_len cdef int32_t word_len
cdef int32_t vec_len cdef int32_t vec_len