mirror of https://github.com/explosion/spaCy.git
Make it easier to reference embedding tables
This commit is contained in:
parent
293d1b425b
commit
b92a89f87b
12
spacy/_ml.py
12
spacy/_ml.py
|
@ -133,15 +133,16 @@ class PrecomputableMaxouts(Model):
|
||||||
def Tok2Vec(width, embed_size, preprocess=None):
|
def Tok2Vec(width, embed_size, preprocess=None):
|
||||||
cols = [ID, LOWER, PREFIX, SUFFIX, SHAPE]
|
cols = [ID, LOWER, PREFIX, SUFFIX, SHAPE]
|
||||||
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
|
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
|
||||||
lower = get_col(cols.index(LOWER)) >> HashEmbed(width, embed_size)
|
lower = get_col(cols.index(LOWER)) >> HashEmbed(width, embed_size, name='embed_lower')
|
||||||
prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2)
|
prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2, name='embed_prefix')
|
||||||
suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2)
|
suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2, name='embed_suffix')
|
||||||
shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size//2)
|
shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size//2, name='embed_shape')
|
||||||
|
|
||||||
|
embed = (lower | prefix | suffix | shape )
|
||||||
tok2vec = (
|
tok2vec = (
|
||||||
with_flatten(
|
with_flatten(
|
||||||
asarray(Model.ops, dtype='uint64')
|
asarray(Model.ops, dtype='uint64')
|
||||||
>> (lower | prefix | suffix | shape )
|
>> embed
|
||||||
>> Maxout(width, width*4, pieces=3)
|
>> Maxout(width, width*4, pieces=3)
|
||||||
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
|
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
|
||||||
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
|
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3))
|
||||||
|
@ -153,6 +154,7 @@ def Tok2Vec(width, embed_size, preprocess=None):
|
||||||
tok2vec = preprocess >> tok2vec
|
tok2vec = preprocess >> tok2vec
|
||||||
# Work around thinc API limitations :(. TODO: Revise in Thinc 7
|
# Work around thinc API limitations :(. TODO: Revise in Thinc 7
|
||||||
tok2vec.nO = width
|
tok2vec.nO = width
|
||||||
|
tok2vec.embed = embed
|
||||||
return tok2vec
|
return tok2vec
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue