mirror of https://github.com/explosion/spaCy.git
Add partial embedding updates to Parikh model, fix dropout, other corrections.
This commit is contained in:
parent
80f473dfb8
commit
ff5ab75f5e
|
@ -93,7 +93,7 @@ def read_snli(path):
|
|||
nr_hidden=("Number of hidden units", "option", "H", int),
|
||||
dropout=("Dropout level", "option", "d", float),
|
||||
learn_rate=("Learning rate", "option", "e", float),
|
||||
batch_size=("Batch size for neural network training", "option", "b", float),
|
||||
batch_size=("Batch size for neural network training", "option", "b", int),
|
||||
nr_epoch=("Number of training epochs", "option", "i", int),
|
||||
tree_truncate=("Truncate sentences by tree distance", "flag", "T", bool),
|
||||
gru_encode=("Encode sentences with bidirectional GRU", "flag", "E", bool),
|
||||
|
|
|
@ -3,8 +3,10 @@
|
|||
import numpy
|
||||
|
||||
from keras.layers import InputSpec, Layer, Input, Dense, merge
|
||||
from keras.layers import Activation, Dropout, Embedding, TimeDistributed
|
||||
from keras.layers import Bidirectional, GRU
|
||||
from keras.layers import Lambda, Activation, Dropout, Embedding, TimeDistributed
|
||||
from keras.layers import Bidirectional, GRU, LSTM
|
||||
from keras.layers.noise import GaussianNoise
|
||||
from keras.layers.advanced_activations import ELU
|
||||
import keras.backend as K
|
||||
from keras.models import Sequential, Model, model_from_json
|
||||
from keras.regularizers import l2
|
||||
|
@ -20,13 +22,13 @@ def build_model(vectors, shape, settings):
|
|||
ids2 = Input(shape=(max_length,), dtype='int32', name='words2')
|
||||
|
||||
# Construct operations, which we'll chain together.
|
||||
embed = _StaticEmbedding(vectors, max_length, nr_hidden)
|
||||
embed = _StaticEmbedding(vectors, max_length, nr_hidden, dropout=0.2, nr_tune=5000)
|
||||
if settings['gru_encode']:
|
||||
encode = _BiRNNEncoding(max_length, nr_hidden)
|
||||
attend = _Attention(max_length, nr_hidden)
|
||||
encode = _BiRNNEncoding(max_length, nr_hidden, dropout=settings['dropout'])
|
||||
attend = _Attention(max_length, nr_hidden, dropout=settings['dropout'])
|
||||
align = _SoftAlignment(max_length, nr_hidden)
|
||||
compare = _Comparison(max_length, nr_hidden)
|
||||
entail = _Entailment(nr_hidden, nr_class)
|
||||
compare = _Comparison(max_length, nr_hidden, dropout=settings['dropout'])
|
||||
entail = _Entailment(nr_hidden, nr_class, dropout=settings['dropout'])
|
||||
|
||||
# Declare the model as a computational graph.
|
||||
sent1 = embed(ids1) # Shape: (i, n)
|
||||
|
@ -59,15 +61,26 @@ def build_model(vectors, shape, settings):
|
|||
|
||||
|
||||
class _StaticEmbedding(object):
|
||||
def __init__(self, vectors, max_length, nr_out):
|
||||
def __init__(self, vectors, max_length, nr_out, nr_tune=1000, dropout=0.0):
|
||||
self.nr_out = nr_out
|
||||
self.max_length = max_length
|
||||
self.embed = Embedding(
|
||||
vectors.shape[0],
|
||||
vectors.shape[1],
|
||||
input_length=max_length,
|
||||
weights=[vectors],
|
||||
name='embed',
|
||||
trainable=False,
|
||||
dropout=0.0)
|
||||
trainable=False)
|
||||
self.tune = Embedding(
|
||||
nr_tune,
|
||||
nr_out,
|
||||
input_length=max_length,
|
||||
weights=None,
|
||||
name='tune',
|
||||
trainable=True,
|
||||
dropout=dropout)
|
||||
self.mod_ids = Lambda(lambda sent: sent % (nr_tune-1)+1,
|
||||
output_shape=(self.max_length,))
|
||||
|
||||
self.project = TimeDistributed(
|
||||
Dense(
|
||||
|
@ -77,23 +90,37 @@ class _StaticEmbedding(object):
|
|||
name='project'))
|
||||
|
||||
def __call__(self, sentence):
|
||||
return self.project(self.embed(sentence))
|
||||
def get_output_shape(shapes):
|
||||
print(shapes)
|
||||
return shapes[0]
|
||||
mod_sent = self.mod_ids(sentence)
|
||||
tuning = self.tune(mod_sent)
|
||||
#tuning = merge([tuning, mod_sent],
|
||||
# mode=lambda AB: AB[0] * (K.clip(K.cast(AB[1], 'float32'), 0, 1)),
|
||||
# output_shape=(self.max_length, self.nr_out))
|
||||
pretrained = self.project(self.embed(sentence))
|
||||
vectors = merge([pretrained, tuning], mode='sum')
|
||||
return vectors
|
||||
|
||||
|
||||
class _BiRNNEncoding(object):
|
||||
def __init__(self, max_length, nr_out):
|
||||
def __init__(self, max_length, nr_out, dropout=0.0):
|
||||
self.model = Sequential()
|
||||
self.model.add(Bidirectional(GRU(int(nr_out/2), return_sequences=True),
|
||||
input_shape=(max_length, nr_out)))
|
||||
self.model.add(Bidirectional(LSTM(nr_out, return_sequences=True,
|
||||
dropout_W=dropout, dropout_U=dropout),
|
||||
input_shape=(max_length, nr_out)))
|
||||
self.model.add(TimeDistributed(Dense(nr_out, activation='relu', init='he_normal')))
|
||||
self.model.add(TimeDistributed(Dropout(0.2)))
|
||||
|
||||
def __call__(self, sentence):
|
||||
return self.model(sentence)
|
||||
|
||||
|
||||
class _Attention(object):
|
||||
def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'):
|
||||
def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'):
|
||||
self.max_length = max_length
|
||||
self.model = Sequential()
|
||||
self.model.add(Dropout(dropout, input_shape=(nr_hidden,)))
|
||||
self.model.add(
|
||||
Dense(nr_hidden, name='attend1',
|
||||
init='he_normal', W_regularizer=l2(L2),
|
||||
|
@ -134,18 +161,17 @@ class _SoftAlignment(object):
|
|||
|
||||
|
||||
class _Comparison(object):
|
||||
def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2):
|
||||
def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0):
|
||||
self.words = words
|
||||
self.model = Sequential()
|
||||
self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
|
||||
self.model.add(Dense(nr_hidden, name='compare1',
|
||||
init='he_normal', W_regularizer=l2(L2),
|
||||
input_shape=(nr_hidden*2,)))
|
||||
init='he_normal', W_regularizer=l2(L2)))
|
||||
self.model.add(Activation('relu'))
|
||||
self.model.add(Dropout(dropout))
|
||||
self.model.add(Dense(nr_hidden, name='compare2',
|
||||
W_regularizer=l2(L2), init='he_normal'))
|
||||
self.model.add(Activation('relu'))
|
||||
self.model.add(Dropout(dropout))
|
||||
self.model = TimeDistributed(self.model)
|
||||
|
||||
def __call__(self, sent, align, **kwargs):
|
||||
|
@ -156,13 +182,16 @@ class _Comparison(object):
|
|||
|
||||
|
||||
class _Entailment(object):
|
||||
def __init__(self, nr_hidden, nr_out, dropout=0.2, L2=1e-4):
|
||||
def __init__(self, nr_hidden, nr_out, dropout=0.0, L2=0.0):
|
||||
self.model = Sequential()
|
||||
self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
|
||||
self.model.add(Dense(nr_hidden, name='entail1',
|
||||
init='he_normal', W_regularizer=l2(L2),
|
||||
input_shape=(nr_hidden*2,)))
|
||||
init='he_normal', W_regularizer=l2(L2)))
|
||||
self.model.add(Activation('relu'))
|
||||
self.model.add(Dropout(dropout))
|
||||
self.model.add(Dense(nr_hidden, name='entail2',
|
||||
init='he_normal', W_regularizer=l2(L2)))
|
||||
self.model.add(Activation('relu'))
|
||||
self.model.add(Dense(nr_out, name='entail_out', activation='softmax',
|
||||
W_regularizer=l2(L2), init='zero'))
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from keras.models import model_from_json
|
||||
import numpy
|
||||
import numpy.random
|
||||
|
||||
|
||||
class KerasSimilarityShim(object):
|
||||
|
@ -31,16 +32,16 @@ class KerasSimilarityShim(object):
|
|||
return scores[0]
|
||||
|
||||
|
||||
def get_embeddings(vocab):
|
||||
max_rank = max(lex.rank+1 for lex in vocab if lex.has_vector)
|
||||
vectors = numpy.ndarray((max_rank+1, vocab.vectors_length), dtype='float32')
|
||||
def get_embeddings(vocab, nr_unk=100):
|
||||
nr_vector = max(lex.rank for lex in vocab) + 1
|
||||
vectors = numpy.zeros((nr_vector+nr_unk+2, vocab.vectors_length), dtype='float32')
|
||||
for lex in vocab:
|
||||
if lex.has_vector:
|
||||
vectors[lex.rank + 1] = lex.vector
|
||||
vectors[lex.rank+1] = lex.vector / lex.vector_norm
|
||||
return vectors
|
||||
|
||||
|
||||
def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100):
|
||||
def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100, nr_unk=100):
|
||||
Xs = numpy.zeros((len(docs), max_length), dtype='int32')
|
||||
for i, doc in enumerate(docs):
|
||||
if tree_truncate:
|
||||
|
@ -50,17 +51,22 @@ def get_word_ids(docs, rnn_encode=False, tree_truncate=False, max_length=100):
|
|||
words = []
|
||||
while len(words) <= max_length and queue:
|
||||
word = queue.pop(0)
|
||||
if rnn_encode or (word.has_vector and not word.is_punct and not word.is_space):
|
||||
if rnn_encode or (not word.is_punct and not word.is_space):
|
||||
words.append(word)
|
||||
if tree_truncate:
|
||||
queue.extend(list(word.lefts))
|
||||
queue.extend(list(word.rights))
|
||||
words.sort()
|
||||
for j, token in enumerate(words):
|
||||
Xs[i, j] = token.rank + 1
|
||||
if token.has_vector:
|
||||
Xs[i, j] = token.rank+1
|
||||
else:
|
||||
Xs[i, j] = (token.shape % (nr_unk-1))+2
|
||||
j += 1
|
||||
if j >= max_length:
|
||||
break
|
||||
else:
|
||||
Xs[i, len(words)] = 1
|
||||
return Xs
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue