mirror of https://github.com/explosion/spaCy.git
Merge branch 'master' of ssh://github.com/explosion/spaCy
This commit is contained in:
commit
e16e78a737
|
@ -7,6 +7,7 @@ import cytoolz
|
||||||
import numpy
|
import numpy
|
||||||
from keras.models import Sequential, model_from_json
|
from keras.models import Sequential, model_from_json
|
||||||
from keras.layers import LSTM, Dense, Embedding, Dropout, Bidirectional
|
from keras.layers import LSTM, Dense, Embedding, Dropout, Bidirectional
|
||||||
|
from keras.layers import TimeDistributed
|
||||||
from keras.optimizers import Adam
|
from keras.optimizers import Adam
|
||||||
import cPickle as pickle
|
import cPickle as pickle
|
||||||
|
|
||||||
|
@ -48,10 +49,16 @@ class SentimentAnalyser(object):
|
||||||
|
|
||||||
|
|
||||||
def get_features(docs, max_length):
|
def get_features(docs, max_length):
|
||||||
Xs = numpy.zeros((len(list(docs)), max_length), dtype='int32')
|
docs = list(docs)
|
||||||
|
Xs = numpy.zeros((len(docs), max_length), dtype='int32')
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
for j, token in enumerate(doc[:max_length]):
|
j = 0
|
||||||
Xs[i, j] = token.rank if token.has_vector else 0
|
for token in doc:
|
||||||
|
if token.has_vector and not token.is_punct and not token.is_space:
|
||||||
|
Xs[i, j] = token.rank + 1
|
||||||
|
j += 1
|
||||||
|
if j >= max_length:
|
||||||
|
break
|
||||||
return Xs
|
return Xs
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,9 +82,12 @@ def compile_lstm(embeddings, shape, settings):
|
||||||
embeddings.shape[1],
|
embeddings.shape[1],
|
||||||
input_length=shape['max_length'],
|
input_length=shape['max_length'],
|
||||||
trainable=False,
|
trainable=False,
|
||||||
weights=[embeddings]
|
weights=[embeddings],
|
||||||
|
mask_zero=True
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
model.add(TimeDistributed(Dense(shape['nr_hidden'] * 2)))
|
||||||
|
model.add(Dropout(settings['dropout']))
|
||||||
model.add(Bidirectional(LSTM(shape['nr_hidden'])))
|
model.add(Bidirectional(LSTM(shape['nr_hidden'])))
|
||||||
model.add(Dropout(settings['dropout']))
|
model.add(Dropout(settings['dropout']))
|
||||||
model.add(Dense(shape['nr_class'], activation='sigmoid'))
|
model.add(Dense(shape['nr_class'], activation='sigmoid'))
|
||||||
|
@ -87,11 +97,11 @@ def compile_lstm(embeddings, shape, settings):
|
||||||
|
|
||||||
|
|
||||||
def get_embeddings(vocab):
|
def get_embeddings(vocab):
|
||||||
max_rank = max(lex.rank for lex in vocab if lex.has_vector)
|
max_rank = max(lex.rank+1 for lex in vocab if lex.has_vector)
|
||||||
vectors = numpy.ndarray((max_rank+1, vocab.vectors_length), dtype='float32')
|
vectors = numpy.ndarray((max_rank+1, vocab.vectors_length), dtype='float32')
|
||||||
for lex in vocab:
|
for lex in vocab:
|
||||||
if lex.has_vector:
|
if lex.has_vector:
|
||||||
vectors[lex.rank] = lex.vector
|
vectors[lex.rank + 1] = lex.vector
|
||||||
return vectors
|
return vectors
|
||||||
|
|
||||||
|
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -202,7 +202,8 @@ def setup_package():
|
||||||
'six',
|
'six',
|
||||||
'cloudpickle',
|
'cloudpickle',
|
||||||
'pathlib',
|
'pathlib',
|
||||||
'sputnik>=0.9.2,<0.10.0'],
|
'sputnik>=0.9.2,<0.10.0',
|
||||||
|
'ujson>=1.35'],
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'Development Status :: 5 - Production/Stable',
|
'Development Status :: 5 - Production/Stable',
|
||||||
'Environment :: Console',
|
'Environment :: Console',
|
||||||
|
|
|
@ -133,8 +133,8 @@ cdef class Span:
|
||||||
|
|
||||||
property vector_norm:
|
property vector_norm:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'vector_norm' in self.doc.getters_for_spans:
|
if 'vector_norm' in self.doc.user_span_hooks:
|
||||||
return self.doc.getters_for_spans['vector'](self)
|
return self.doc.user_span_hooks['vector'](self)
|
||||||
cdef float value
|
cdef float value
|
||||||
if self._vector_norm is None:
|
if self._vector_norm is None:
|
||||||
self._vector_norm = 1e-20
|
self._vector_norm = 1e-20
|
||||||
|
|
Loading…
Reference in New Issue