spaCy/spacy/_ml.py

98 lines
4.0 KiB
Python
Raw Normal View History

2017-05-05 18:12:03 +00:00
from thinc.api import layerize, chain, clone, concatenate
2017-05-04 11:31:40 +00:00
from thinc.neural import Model, Maxout, Softmax
from thinc.neural._classes.hash_embed import HashEmbed
2017-05-05 18:12:03 +00:00
from thinc.neural._classes.convolution import ExtractWindow
from thinc.neural._classes.static_vectors import StaticVectors
from .attrs import ID, PREFIX, SUFFIX, SHAPE, TAG, DEP
2017-05-04 11:31:40 +00:00
def get_col(idx):
def forward(X, drop=0.):
return Model.ops.xp.ascontiguousarray(X[:, idx]), None
return layerize(forward)
2017-05-05 17:20:39 +00:00
def build_model(state2vec, width, depth, nr_class):
with Model.define_operators({'>>': chain, '**': clone}):
model = state2vec >> Maxout(width) ** depth >> Softmax(nr_class)
return model
2017-05-04 11:31:40 +00:00
2017-05-05 18:09:50 +00:00
def build_parser_state2vec(width, nr_vector=1000, nF=1, nB=0, nS=1, nL=2, nR=2):
embed_tags = _reshape(chain(get_col(0), HashEmbed(width, nr_vector)))
embed_deps = _reshape(chain(get_col(1), HashEmbed(width, nr_vector)))
ops = embed_tags.ops
2017-05-05 17:20:39 +00:00
attr_names = ops.asarray([TAG, DEP], dtype='i')
2017-05-05 18:09:50 +00:00
extract = build_feature_extractor(attr_names, nF, nB, nS, nL, nR)
2017-05-05 17:20:39 +00:00
def forward(states, drop=0.):
2017-05-05 18:09:50 +00:00
tokens, attr_vals, tokvecs = extract(states)
tagvecs, bp_tagvecs = embed_deps.begin_update(attr_vals, drop=drop)
depvecs, bp_depvecs = embed_tags.begin_update(attr_vals, drop=drop)
tokvecs = tokvecs.reshape((tokvecs.shape[0], tokvecs.shape[1] *
tokvecs.shape[2]))
2017-05-04 11:31:40 +00:00
2017-05-05 17:20:39 +00:00
vector = ops.concatenate((tagvecs, depvecs, tokvecs))
2017-05-04 11:31:40 +00:00
2017-05-05 17:20:39 +00:00
shapes = (tagvecs.shape, depvecs.shape, tokvecs.shape)
def backward(d_vector, sgd=None):
d_depvecs, d_tagvecs, d_tokvecs = ops.backprop_concatenate(d_vector, shapes)
bp_tagvecs(d_tagvecs)
bp_depvecs(d_depvecs)
2017-05-05 18:09:50 +00:00
d_tokvecs = d_tokvecs.reshape((len(states), tokens.shape[1], tokvecs.shape[2]))
2017-05-05 17:20:39 +00:00
return (d_tokvecs, tokens)
return vector, backward
model = layerize(forward)
model._layers = [embed_tags, embed_deps]
2017-05-04 11:31:40 +00:00
return model
2017-05-05 17:20:39 +00:00
2017-05-05 18:09:50 +00:00
def build_feature_extractor(attr_names, nF, nB, nS, nL, nR):
def forward(states, drop=0.):
ops = model.ops
n_tokens = states[0].nr_context_tokens(nF, nB, nS, nL, nR)
vector_length = states[0].token_vector_length
tokens = ops.allocate((len(states), n_tokens), dtype='i')
features = ops.allocate((len(states), n_tokens, attr_names.shape[0]), dtype='i')
tokvecs = ops.allocate((len(states), n_tokens, vector_length), dtype='f')
for i, state in enumerate(states):
state.set_context_tokens(tokens[i], nF, nB, nS, nL, nR)
state.set_attributes(features[i], tokens[i], attr_names)
state.set_token_vectors(tokvecs[i], tokens[i])
def backward(d_features, sgd=None):
return d_features
return (tokens, features, tokvecs), backward
model = layerize(forward)
return model
2017-05-05 17:20:39 +00:00
def _reshape(layer):
def forward(X, drop=0.):
Xh = X.reshape((X.shape[0] * X.shape[1], X.shape[2]))
yh, bp_yh = layer.begin_update(Xh, drop=drop)
n = X.shape[0]
2017-05-05 18:09:50 +00:00
old_shape = X.shape
2017-05-05 17:20:39 +00:00
def backward(d_y, sgd=None):
d_yh = d_y.reshape((n, d_y.size / n))
d_Xh = bp_yh(d_yh, sgd)
return d_Xh.reshape(old_shape)
return yh.reshape((n, yh.shape / n)), backward
model = layerize(forward)
model._layers.append(layer)
2017-05-04 11:31:40 +00:00
return model
2017-05-05 18:12:03 +00:00
def build_tok2vec(lang, width, depth, embed_size, cols):
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone}):
static = get_col(cols.index(ID)) >> StaticVectors(lang, width)
prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size)
suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size)
shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size)
tok2vec = (
(static | prefix | suffix | shape)
>> Maxout(width, width*4)
>> (ExtractWindow(nW=1) >> Maxout(width, width*3)) ** depth
)
return tok2vec