Improve model saving in train script

This commit is contained in:
Matthew Honnibal 2017-05-26 05:52:09 -05:00
parent 22d7b448a5
commit d65f99a720
1 changed files with 25 additions and 18 deletions

View File

@ -57,9 +57,9 @@ def train(_, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
# starts high and decays sharply, to force the optimizer to explore. # starts high and decays sharply, to force the optimizer to explore.
# Batch size starts at 1 and grows, so that we make updates quickly # Batch size starts at 1 and grows, so that we make updates quickly
# at the beginning of training. # at the beginning of training.
dropout_rates = util.decaying(util.env_opt('dropout_from', 0.5), dropout_rates = util.decaying(util.env_opt('dropout_from', 0.2),
util.env_opt('dropout_to', 0.2), util.env_opt('dropout_to', 0.2),
util.env_opt('dropout_decay', 1e-4)) util.env_opt('dropout_decay', 0.0))
batch_sizes = util.compounding(util.env_opt('batch_from', 1), batch_sizes = util.compounding(util.env_opt('batch_from', 1),
util.env_opt('batch_to', 64), util.env_opt('batch_to', 64),
util.env_opt('batch_compound', 1.001)) util.env_opt('batch_compound', 1.001))
@ -71,23 +71,30 @@ def train(_, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
optimizer = nlp.begin_training(lambda: corpus.train_tuples, use_gpu=use_gpu) optimizer = nlp.begin_training(lambda: corpus.train_tuples, use_gpu=use_gpu)
print("Itn.\tDep. Loss\tUAS\tNER P.\tNER R.\tNER F.\tTag %\tToken %") print("Itn.\tDep. Loss\tUAS\tNER P.\tNER R.\tNER F.\tTag %\tToken %")
for i in range(n_iter): try:
with tqdm.tqdm(total=corpus.count_train(), leave=False) as pbar: for i in range(n_iter):
train_docs = corpus.train_docs(nlp, projectivize=True, with tqdm.tqdm(total=corpus.count_train(), leave=False) as pbar:
gold_preproc=False, shuffle=i) train_docs = corpus.train_docs(nlp, projectivize=True,
losses = {} gold_preproc=False, max_length=1000)
for batch in minibatch(train_docs, size=batch_sizes): losses = {}
docs, golds = zip(*batch) for batch in minibatch(train_docs, size=batch_sizes):
nlp.update(docs, golds, sgd=optimizer, docs, golds = zip(*batch)
drop=next(dropout_rates), losses=losses) nlp.update(docs, golds, sgd=optimizer,
pbar.update(len(docs)) drop=next(dropout_rates), losses=losses)
pbar.update(len(docs))
with nlp.use_params(optimizer.averages): with nlp.use_params(optimizer.averages):
scorer = nlp.evaluate(corpus.dev_docs(nlp, gold_preproc=False)) scorer = nlp.evaluate(corpus.dev_docs(nlp, gold_preproc=False))
print_progress(i, losses, scorer.scores) with (output_path / ('model%d.pickle' % i)).open('wb') as file_:
with (output_path / 'model.bin').open('wb') as file_: dill.dump(nlp, file_, -1)
with nlp.use_params(optimizer.averages):
dill.dump(nlp, file_, -1)
print_progress(i, losses, scorer.scores)
finally:
print("Saving model...")
with (output_path / 'model-final.pickle').open('wb') as file_:
with nlp.use_params(optimizer.averages):
dill.dump(nlp, file_, -1)
def _render_parses(i, to_render): def _render_parses(i, to_render):