spaCy/examples/keras_parikh_entailment/keras_decomposable_attentio...

# Semantic entailment/similarity with decomposable attention (using spaCy and Keras)
# Practical state-of-the-art textual entailment with spaCy and Keras

import numpy as np
from keras import layers, Model, models, optimizers
from keras import backend as K

def build_model(vectors, shape, settings):
    max_length, nr_hidden, nr_class = shape

    input1 = layers.Input(shape=(max_length,), dtype='int32', name='words1')
    input2 = layers.Input(shape=(max_length,), dtype='int32', name='words2')
    
    # embeddings (projected)
    embed = create_embedding(vectors, max_length, nr_hidden)
   
    a = embed(input1)
    b = embed(input2)
    
    # step 1: attend
    F = create_feedforward(nr_hidden)
    att_weights = layers.dot([F(a), F(b)], axes=-1)
    
    G = create_feedforward(nr_hidden)
    
    if settings['entail_dir'] == 'both':
        norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
        norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
        alpha = layers.dot([norm_weights_a, a], axes=1)
        beta  = layers.dot([norm_weights_b, b], axes=1)

        # step 2: compare
        comp1 = layers.concatenate([a, beta])
        comp2 = layers.concatenate([b, alpha])
        v1 = layers.TimeDistributed(G)(comp1)
        v2 = layers.TimeDistributed(G)(comp2)

        # step 3: aggregate
        v1_sum = layers.Lambda(sum_word)(v1)
        v2_sum = layers.Lambda(sum_word)(v2)
        concat = layers.concatenate([v1_sum, v2_sum])

    elif settings['entail_dir'] == 'left':
        norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
        alpha = layers.dot([norm_weights_a, a], axes=1)
        comp2 = layers.concatenate([b, alpha])
        v2 = layers.TimeDistributed(G)(comp2)
        v2_sum = layers.Lambda(sum_word)(v2)
        concat = v2_sum

    else:
        norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
        beta  = layers.dot([norm_weights_b, b], axes=1)
        comp1 = layers.concatenate([a, beta])
        v1 = layers.TimeDistributed(G)(comp1)
        v1_sum = layers.Lambda(sum_word)(v1)
        concat = v1_sum
    
    H = create_feedforward(nr_hidden)
    out = H(concat)
    out = layers.Dense(nr_class, activation='softmax')(out)
    
    model = Model([input1, input2], out)
    
    model.compile(
        optimizer=optimizers.Adam(lr=settings['lr']),
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    
    return model


def create_embedding(vectors, max_length, projected_dim):
    return models.Sequential([
        layers.Embedding(
            vectors.shape[0],
            vectors.shape[1],
            input_length=max_length,
            weights=[vectors],
            trainable=False),
        
        layers.TimeDistributed(
            layers.Dense(projected_dim,
                         activation=None,
                         use_bias=False))
    ])

def create_feedforward(num_units=200, activation='relu', dropout_rate=0.2):
    return models.Sequential([
        layers.Dense(num_units, activation=activation),
        layers.Dropout(dropout_rate),
        layers.Dense(num_units, activation=activation),
        layers.Dropout(dropout_rate)
    ])


def normalizer(axis):
    def _normalize(att_weights):
        exp_weights = K.exp(att_weights)
        sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)
        return exp_weights/sum_weights
    return _normalize

def sum_word(x):
    return K.sum(x, axis=1)


def test_build_model():
    vectors = np.ndarray((100, 8), dtype='float32')
    shape = (10, 16, 3)
    settings = {'lr': 0.001, 'dropout': 0.2, 'gru_encode':True, 'entail_dir':'both'}
    model = build_model(vectors, shape, settings)


def test_fit_model():

    def _generate_X(nr_example, length, nr_vector):
        X1 = np.ndarray((nr_example, length), dtype='int32')
        X1 *= X1 < nr_vector
        X1 *= 0 <= X1
        X2 = np.ndarray((nr_example, length), dtype='int32')
        X2 *= X2 < nr_vector
        X2 *= 0 <= X2
        return [X1, X2]

    def _generate_Y(nr_example, nr_class):
        ys = np.zeros((nr_example, nr_class), dtype='int32')
        for i in range(nr_example):
            ys[i, i % nr_class] = 1
        return ys

    vectors = np.ndarray((100, 8), dtype='float32')
    shape = (10, 16, 3)
    settings = {'lr': 0.001, 'dropout': 0.2, 'gru_encode':True, 'entail_dir':'both'}
    model = build_model(vectors, shape, settings)

    train_X = _generate_X(20, shape[0], vectors.shape[0])
    train_Y = _generate_Y(20, shape[2])
    dev_X = _generate_X(15, shape[0], vectors.shape[0])
    dev_Y = _generate_Y(15, shape[2])

    model.fit(train_X, train_Y, validation_data=(dev_X, dev_Y), epochs=5, batch_size=4)


__all__ = [build_model]
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`# Semantic entailment/similarity with decomposable attention (using spaCy and Keras)`
			`# Practical state-of-the-art textual entailment with spaCy and Keras`
Rename entailment example 2016-11-01 00:51:54 +00:00
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`import numpy as np`
			`from keras import layers, Model, models, optimizers`
			`from keras import backend as K`
Rename entailment example 2016-11-01 00:51:54 +00:00
Fix entailment example, and add a flag for BiRNN encoding. 2016-11-12 17:43:37 +00:00			`def build_model(vectors, shape, settings):`
Rename entailment example 2016-11-01 00:51:54 +00:00			`max_length, nr_hidden, nr_class = shape`
Fix x keras deep learning example 2017-01-31 19:27:13 +00:00
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`input1 = layers.Input(shape=(max_length,), dtype='int32', name='words1')`
			`input2 = layers.Input(shape=(max_length,), dtype='int32', name='words2')`

			`# embeddings (projected)`
			`embed = create_embedding(vectors, max_length, nr_hidden)`

			`a = embed(input1)`
			`b = embed(input2)`

			`# step 1: attend`
			`F = create_feedforward(nr_hidden)`
			`att_weights = layers.dot([F(a), F(b)], axes=-1)`

			`G = create_feedforward(nr_hidden)`

			`if settings['entail_dir'] == 'both':`
			`norm_weights_a = layers.Lambda(normalizer(1))(att_weights)`
			`norm_weights_b = layers.Lambda(normalizer(2))(att_weights)`
			`alpha = layers.dot([norm_weights_a, a], axes=1)`
			`beta = layers.dot([norm_weights_b, b], axes=1)`

			`# step 2: compare`
			`comp1 = layers.concatenate([a, beta])`
			`comp2 = layers.concatenate([b, alpha])`
			`v1 = layers.TimeDistributed(G)(comp1)`
			`v2 = layers.TimeDistributed(G)(comp2)`

			`# step 3: aggregate`
			`v1_sum = layers.Lambda(sum_word)(v1)`
			`v2_sum = layers.Lambda(sum_word)(v2)`
			`concat = layers.concatenate([v1_sum, v2_sum])`

			`elif settings['entail_dir'] == 'left':`
			`norm_weights_a = layers.Lambda(normalizer(1))(att_weights)`
			`alpha = layers.dot([norm_weights_a, a], axes=1)`
			`comp2 = layers.concatenate([b, alpha])`
			`v2 = layers.TimeDistributed(G)(comp2)`
			`v2_sum = layers.Lambda(sum_word)(v2)`
			`concat = v2_sum`

			`else:`
			`norm_weights_b = layers.Lambda(normalizer(2))(att_weights)`
			`beta = layers.dot([norm_weights_b, b], axes=1)`
			`comp1 = layers.concatenate([a, beta])`
			`v1 = layers.TimeDistributed(G)(comp1)`
			`v1_sum = layers.Lambda(sum_word)(v1)`
			`concat = v1_sum`

			`H = create_feedforward(nr_hidden)`
			`out = H(concat)`
			`out = layers.Dense(nr_class, activation='softmax')(out)`

			`model = Model([input1, input2], out)`

Rename entailment example 2016-11-01 00:51:54 +00:00			`model.compile(`
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`optimizer=optimizers.Adam(lr=settings['lr']),`
Rename entailment example 2016-11-01 00:51:54 +00:00			`loss='categorical_crossentropy',`
			`metrics=['accuracy'])`
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00
Rename entailment example 2016-11-01 00:51:54 +00:00			`return model`


Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`def create_embedding(vectors, max_length, projected_dim):`
			`return models.Sequential([`
			`layers.Embedding(`
			`vectors.shape[0],`
			`vectors.shape[1],`
			`input_length=max_length,`
			`weights=[vectors],`
			`trainable=False),`

			`layers.TimeDistributed(`
			`layers.Dense(projected_dim,`
			`activation=None,`
			`use_bias=False))`
			`])`

			`def create_feedforward(num_units=200, activation='relu', dropout_rate=0.2):`
			`return models.Sequential([`
			`layers.Dense(num_units, activation=activation),`
			`layers.Dropout(dropout_rate),`
			`layers.Dense(num_units, activation=activation),`
			`layers.Dropout(dropout_rate)`
			`])`


			`def normalizer(axis):`
			`def _normalize(att_weights):`
			`exp_weights = K.exp(att_weights)`
			`sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)`
			`return exp_weights/sum_weights`
			`return _normalize`

			`def sum_word(x):`
			`return K.sum(x, axis=1)`
Rename entailment example 2016-11-01 00:51:54 +00:00

			`def test_build_model():`
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`vectors = np.ndarray((100, 8), dtype='float32')`
Rename entailment example 2016-11-01 00:51:54 +00:00			`shape = (10, 16, 3)`
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`settings = {'lr': 0.001, 'dropout': 0.2, 'gru_encode':True, 'entail_dir':'both'}`
Rename entailment example 2016-11-01 00:51:54 +00:00			`model = build_model(vectors, shape, settings)`


			`def test_fit_model():`
remove theano dependency, using keras backend functions 2016-11-13 13:06:01 +00:00
Rename entailment example 2016-11-01 00:51:54 +00:00			`def _generate_X(nr_example, length, nr_vector):`
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`X1 = np.ndarray((nr_example, length), dtype='int32')`
Rename entailment example 2016-11-01 00:51:54 +00:00			`X1 *= X1 < nr_vector`
			`X1 *= 0 <= X1`
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`X2 = np.ndarray((nr_example, length), dtype='int32')`
Rename entailment example 2016-11-01 00:51:54 +00:00			`X2 *= X2 < nr_vector`
			`X2 *= 0 <= X2`
			`return [X1, X2]`
remove theano dependency, using keras backend functions 2016-11-13 13:06:01 +00:00
Rename entailment example 2016-11-01 00:51:54 +00:00			`def _generate_Y(nr_example, nr_class):`
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`ys = np.zeros((nr_example, nr_class), dtype='int32')`
Rename entailment example 2016-11-01 00:51:54 +00:00			`for i in range(nr_example):`
			`ys[i, i % nr_class] = 1`
			`return ys`

Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`vectors = np.ndarray((100, 8), dtype='float32')`
Rename entailment example 2016-11-01 00:51:54 +00:00			`shape = (10, 16, 3)`
Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`settings = {'lr': 0.001, 'dropout': 0.2, 'gru_encode':True, 'entail_dir':'both'}`
Rename entailment example 2016-11-01 00:51:54 +00:00			`model = build_model(vectors, shape, settings)`
Fix x keras deep learning example 2017-01-31 19:27:13 +00:00
Corretions for model test example The sentences of test data in sentence entailment example should be generated with integers limited to vocab_size. 2017-05-03 14:41:23 +00:00			`train_X = _generate_X(20, shape[0], vectors.shape[0])`
Rename entailment example 2016-11-01 00:51:54 +00:00			`train_Y = _generate_Y(20, shape[2])`
Corretions for model test example The sentences of test data in sentence entailment example should be generated with integers limited to vocab_size. 2017-05-03 14:41:23 +00:00			`dev_X = _generate_X(15, shape[0], vectors.shape[0])`
Rename entailment example 2016-11-01 00:51:54 +00:00			`dev_Y = _generate_Y(15, shape[2])`

Update Keras Example for (Parikh et al, 2016) implementation (#2803) * bug fixes in keras example * created contributor agreement * baseline for Parikh model * initial version of parikh 2016 implemented * tested asymmetric models * fixed grevious error in normalization * use standard SNLI test file * begin to rework parikh example * initial version of running example * start to document the new version * start to document the new version * Update Decompositional Attention.ipynb * fixed calls to similarity * updated the README * import sys package duh * simplified indexing on mapping word to IDs * stupid python indent error * added code from https://github.com/tensorflow/tensorflow/issues/3388 for tf bug workaround 2018-10-01 08:28:45 +00:00			`model.fit(train_X, train_Y, validation_data=(dev_X, dev_Y), epochs=5, batch_size=4)`
Rename entailment example 2016-11-01 00:51:54 +00:00

			`__all__ = [build_model]`