diff --git a/spacy/_ml.py b/spacy/_ml.py index 91b530fad..c49bad6d4 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -9,7 +9,7 @@ import cytoolz from thinc.neural._classes.convolution import ExtractWindow from thinc.neural._classes.static_vectors import StaticVectors -from thinc.neural._classes.batchnorm import BatchNorm +from thinc.neural._classes.batchnorm import BatchNorm as BN from thinc.neural._classes.layernorm import LayerNorm as LN from thinc.neural._classes.resnet import Residual from thinc.neural import ReLu @@ -22,6 +22,7 @@ from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool from thinc.neural._classes.attention import ParametricAttention from thinc.linear.linear import LinearModel from thinc.api import uniqued, wrap, flatten_add_lengths +from thinc.neural._classes.rnn import BiLSTM from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP @@ -229,14 +230,14 @@ def Tok2Vec(width, embed_size, preprocess=None): suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2, name='embed_suffix') shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size//2, name='embed_shape') - embed = (norm | prefix | suffix | shape ) >> Maxout(width, width*4, pieces=3) + embed = (norm | prefix | suffix | shape ) >> LN(Maxout(width, width*4, pieces=3)) tok2vec = ( with_flatten( asarray(Model.ops, dtype='uint64') >> uniqued(embed, column=5) >> drop_layer( Residual( - (ExtractWindow(nW=1) >> ReLu(width, width*3)) + (ExtractWindow(nW=1) >> BN(Maxout(width, width*3))) ) ) ** 4, pad=4 )