From 88755900818535527f2724d7286d2229a2e748d3 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 20 Aug 2017 14:42:07 +0200 Subject: [PATCH] Add optimizer in Language.update if sgd=None --- spacy/language.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index ed880d9ca..50ed0a166 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -200,6 +200,7 @@ class Language(object): else: flat_list.append(pipe) self.pipeline = flat_list + self._optimizer = None @property def meta(self): @@ -278,7 +279,7 @@ class Language(object): return self.tokenizer(text) def update(self, docs, golds, drop=0., sgd=None, losses=None, - update_tensors=False): + update_shared=False): """Update the models in the pipeline. docs (iterable): A batch of `Doc` objects. @@ -298,6 +299,10 @@ class Language(object): "Got: %d, %d" % (len(docs), len(golds))) if len(docs) == 0: return + if sgd is None: + if self._optimizer is None: + self._optimizer = Adam(Model.ops, 0.001) + sgd = self._optimizer tok2vec = self.pipeline[0] feats = tok2vec.doc2feats(docs) grads = {} @@ -312,12 +317,13 @@ class Language(object): continue d_tokvecses = proc.update((docs, tokvecses), golds, drop=drop, sgd=get_grads, losses=losses) - if update_tensors and d_tokvecses is not None: + if update_shared and d_tokvecses is not None: for i, d_tv in enumerate(d_tokvecses): all_d_tokvecses[i] += d_tv - bp_tokvecses(all_d_tokvecses, sgd=sgd) - for key, (W, dW) in grads.items(): - sgd(W, dW, key=key) + if update_shared and bp_tokvecses is not None: + bp_tokvecses(all_d_tokvecses, sgd=sgd) + for key, (W, dW) in grads.items(): + sgd(W, dW, key=key) # Clear the tensor variable, to free GPU memory. # If we don't do this, the memory leak gets pretty # bad, because we may be holding part of a batch. @@ -378,11 +384,11 @@ class Language(object): eps = util.env_opt('optimizer_eps', 1e-08) L2 = util.env_opt('L2_penalty', 1e-6) max_grad_norm = util.env_opt('grad_norm_clip', 1.) - optimizer = Adam(Model.ops, learn_rate, L2=L2, beta1=beta1, - beta2=beta2, eps=eps) - optimizer.max_grad_norm = max_grad_norm - optimizer.device = device - return optimizer + self._optimizer = Adam(Model.ops, learn_rate, L2=L2, beta1=beta1, + beta2=beta2, eps=eps) + self._optimizer.max_grad_norm = max_grad_norm + self._optimizer.device = device + return self._optimizer def evaluate(self, docs_golds): scorer = Scorer()