spaCy/examples/vectors_fast_text.py

31 lines
823 B
Python

'''Load vectors for a language trained using FastText
https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
'''
from __future__ import unicode_literals
import plac
import numpy
import spacy.language
def main(vectors_loc):
nlp = spacy.language.Language()
with open(vectors_loc, 'rb') as file_:
header = file_.readline()
nr_row, nr_dim = header.split()
nlp.vocab.clear_vectors(int(nr_dim))
for line in file_:
line = line.decode('utf8')
pieces = line.split()
word = pieces[0]
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
nlp.vocab.set_vector(word, vector)
doc = nlp(u'class colspan')
print(doc[0].similarity(doc[1]))
if __name__ == '__main__':
plac.call(main)