Add optimizer in Language.update if sgd=None

This commit is contained in:
Matthew Honnibal 2017-08-20 14:42:07 +02:00
parent 84b7ed49e4
commit 8875590081
1 changed files with 16 additions and 10 deletions

View File

@ -200,6 +200,7 @@ class Language(object):
else: else:
flat_list.append(pipe) flat_list.append(pipe)
self.pipeline = flat_list self.pipeline = flat_list
self._optimizer = None
@property @property
def meta(self): def meta(self):
@ -278,7 +279,7 @@ class Language(object):
return self.tokenizer(text) return self.tokenizer(text)
def update(self, docs, golds, drop=0., sgd=None, losses=None, def update(self, docs, golds, drop=0., sgd=None, losses=None,
update_tensors=False): update_shared=False):
"""Update the models in the pipeline. """Update the models in the pipeline.
docs (iterable): A batch of `Doc` objects. docs (iterable): A batch of `Doc` objects.
@ -298,6 +299,10 @@ class Language(object):
"Got: %d, %d" % (len(docs), len(golds))) "Got: %d, %d" % (len(docs), len(golds)))
if len(docs) == 0: if len(docs) == 0:
return return
if sgd is None:
if self._optimizer is None:
self._optimizer = Adam(Model.ops, 0.001)
sgd = self._optimizer
tok2vec = self.pipeline[0] tok2vec = self.pipeline[0]
feats = tok2vec.doc2feats(docs) feats = tok2vec.doc2feats(docs)
grads = {} grads = {}
@ -312,12 +317,13 @@ class Language(object):
continue continue
d_tokvecses = proc.update((docs, tokvecses), golds, d_tokvecses = proc.update((docs, tokvecses), golds,
drop=drop, sgd=get_grads, losses=losses) drop=drop, sgd=get_grads, losses=losses)
if update_tensors and d_tokvecses is not None: if update_shared and d_tokvecses is not None:
for i, d_tv in enumerate(d_tokvecses): for i, d_tv in enumerate(d_tokvecses):
all_d_tokvecses[i] += d_tv all_d_tokvecses[i] += d_tv
bp_tokvecses(all_d_tokvecses, sgd=sgd) if update_shared and bp_tokvecses is not None:
for key, (W, dW) in grads.items(): bp_tokvecses(all_d_tokvecses, sgd=sgd)
sgd(W, dW, key=key) for key, (W, dW) in grads.items():
sgd(W, dW, key=key)
# Clear the tensor variable, to free GPU memory. # Clear the tensor variable, to free GPU memory.
# If we don't do this, the memory leak gets pretty # If we don't do this, the memory leak gets pretty
# bad, because we may be holding part of a batch. # bad, because we may be holding part of a batch.
@ -378,11 +384,11 @@ class Language(object):
eps = util.env_opt('optimizer_eps', 1e-08) eps = util.env_opt('optimizer_eps', 1e-08)
L2 = util.env_opt('L2_penalty', 1e-6) L2 = util.env_opt('L2_penalty', 1e-6)
max_grad_norm = util.env_opt('grad_norm_clip', 1.) max_grad_norm = util.env_opt('grad_norm_clip', 1.)
optimizer = Adam(Model.ops, learn_rate, L2=L2, beta1=beta1, self._optimizer = Adam(Model.ops, learn_rate, L2=L2, beta1=beta1,
beta2=beta2, eps=eps) beta2=beta2, eps=eps)
optimizer.max_grad_norm = max_grad_norm self._optimizer.max_grad_norm = max_grad_norm
optimizer.device = device self._optimizer.device = device
return optimizer return self._optimizer
def evaluate(self, docs_golds): def evaluate(self, docs_golds):
scorer = Scorer() scorer = Scorer()