diff --git a/spacy/_ml.py b/spacy/_ml.py index 1018a9c46..173917a36 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -134,13 +134,14 @@ def Tok2Vec(width, embed_size, preprocess=None): shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size//2) tok2vec = ( - flatten - >> (lower | prefix | suffix | shape ) - >> Maxout(width, width*4, pieces=3) - >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) - >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) + with_flatten( + (lower | prefix | suffix | shape ) + >> Maxout(width, width*4, pieces=3) + >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) + >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) + >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) + >> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)), + pad=4, ndim=5) ) if preprocess not in (False, None): tok2vec = preprocess >> tok2vec diff --git a/spacy/language.py b/spacy/language.py index 1e4ae1474..6538b9e27 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -179,10 +179,10 @@ class Language(object): tok2vec = self.pipeline[0] feats = tok2vec.doc2feats(docs) for proc in self.pipeline[1:]: - tokvecs, bp_tokvecs = tok2vec.model.begin_update(feats, drop=drop) grads = {} - d_tokvecs = proc.update((docs, tokvecs), golds, sgd=get_grads, drop=drop) - bp_tokvecs(d_tokvecs, sgd=get_grads) + tokvecses, bp_tokvecses = tok2vec.model.begin_update(feats, drop=drop) + d_tokvecses = proc.update((docs, tokvecses), golds, sgd=get_grads, drop=drop) + bp_tokvecses(d_tokvecses, sgd=get_grads) if sgd is not None: for key, (W, dW) in grads.items(): # TODO: Unhack this when thinc improves diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 4cbb666c0..09e79d67d 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -10,7 +10,7 @@ cimport numpy as np import cytoolz import util -from thinc.api import add, layerize, chain, clone, concatenate +from thinc.api import add, layerize, chain, clone, concatenate, with_flatten from thinc.neural import Model, Maxout, Softmax, Affine from thinc.neural._classes.hash_embed import HashEmbed from thinc.neural.util import to_categorical @@ -52,16 +52,16 @@ class TokenVectorEncoder(object): self.doc2feats = doc2feats() self.model = model - def __call__(self, docs, state=None): + def __call__(self, docs): if isinstance(docs, Doc): docs = [docs] - tokvecs = self.predict(docs) - self.set_annotations(docs, tokvecs) + tokvecses = self.predict(docs) + self.set_annotations(docs, tokvecses) def pipe(self, stream, batch_size=128, n_threads=-1): for docs in cytoolz.partition_all(batch_size, stream): - tokvecs = self.predict(docs) - self.set_annotations(docs, tokvecs) + tokvecses = self.predict(docs) + self.set_annotations(docs, tokvecses) yield from docs def predict(self, docs): @@ -69,11 +69,9 @@ class TokenVectorEncoder(object): tokvecs = self.model(feats) return tokvecs - def set_annotations(self, docs, tokvecs): - start = 0 - for doc in docs: - doc.tensor = tokvecs[start : start + len(doc)] - start += len(doc) + def set_annotations(self, docs, tokvecses): + for doc, tokvecs in zip(docs, tokvecses): + doc.tensor = tokvecs def begin_update(self, docs, drop=0.): if isinstance(docs, Doc): @@ -136,7 +134,7 @@ class NeuralTagger(object): docs, tokvecs = docs_tokvecs if self.model.nI is None: - self.model.nI = tokvecs.shape[1] + self.model.nI = tokvecs[0].shape[1] tag_scores, bp_tag_scores = self.model.begin_update(tokvecs, drop=drop) loss, d_tag_scores = self.get_loss(docs, golds, tag_scores) @@ -146,6 +144,7 @@ class NeuralTagger(object): return d_tokvecs def get_loss(self, docs, golds, scores): + scores = self.model.ops.flatten(scores) tag_index = {tag: i for i, tag in enumerate(self.vocab.morphology.tag_names)} cdef int idx = 0 @@ -161,7 +160,7 @@ class NeuralTagger(object): correct = self.model.ops.xp.array(correct, dtype='i') d_scores = scores - to_categorical(correct, nb_classes=scores.shape[1]) loss = (d_scores**2).sum() - d_scores = self.model.ops.asarray(d_scores, dtype='f') + d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs]) return float(loss), d_scores def begin_training(self, gold_tuples, pipeline=None): @@ -179,9 +178,8 @@ class NeuralTagger(object): vocab.morphology = Morphology(vocab.strings, new_tag_map, vocab.morphology.lemmatizer) token_vector_width = pipeline[0].model.nO - self.model = rebatch(1024, Softmax(self.vocab.morphology.n_tags, - token_vector_width)) - #self.model = Softmax(self.vocab.morphology.n_tags) + self.model = with_flatten( + Softmax(self.vocab.morphology.n_tags, token_vector_width)) def use_params(self, params): with self.model.use_params(params): diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 97685bf4d..32c761be6 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -311,7 +311,8 @@ cdef class Parser: return states def update(self, docs_tokvecs, golds, drop=0., sgd=None): - docs, tokvecs = docs_tokvecs + docs, tokvec_lists = docs_tokvecs + tokvecs = self.model[0].ops.flatten(tokvec_lists) if isinstance(docs, Doc) and isinstance(golds, GoldParse): docs = [docs] golds = [golds] @@ -324,7 +325,8 @@ cdef class Parser: state2vec, vec2scores = self.get_batch_model(len(states), tokvecs, cuda_stream, drop) - todo = [(s, g) for s, g in zip(states, golds) if not s.is_final()] + todo = [(s, g) for (s, g) in zip(states, golds) + if not s.is_final()] backprops = [] cdef float loss = 0. @@ -365,7 +367,7 @@ cdef class Parser: else: xp.add.at(d_tokvecs, token_ids, d_state_features * active_feats) - return d_tokvecs + return self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs]) def get_batch_model(self, batch_size, tokvecs, stream, dropout): lower, upper = self.model