mirror of https://github.com/explosion/spaCy.git
Improve model saving in train script
This commit is contained in:
parent
22d7b448a5
commit
d65f99a720
|
@ -57,9 +57,9 @@ def train(_, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
|
||||||
# starts high and decays sharply, to force the optimizer to explore.
|
# starts high and decays sharply, to force the optimizer to explore.
|
||||||
# Batch size starts at 1 and grows, so that we make updates quickly
|
# Batch size starts at 1 and grows, so that we make updates quickly
|
||||||
# at the beginning of training.
|
# at the beginning of training.
|
||||||
dropout_rates = util.decaying(util.env_opt('dropout_from', 0.5),
|
dropout_rates = util.decaying(util.env_opt('dropout_from', 0.2),
|
||||||
util.env_opt('dropout_to', 0.2),
|
util.env_opt('dropout_to', 0.2),
|
||||||
util.env_opt('dropout_decay', 1e-4))
|
util.env_opt('dropout_decay', 0.0))
|
||||||
batch_sizes = util.compounding(util.env_opt('batch_from', 1),
|
batch_sizes = util.compounding(util.env_opt('batch_from', 1),
|
||||||
util.env_opt('batch_to', 64),
|
util.env_opt('batch_to', 64),
|
||||||
util.env_opt('batch_compound', 1.001))
|
util.env_opt('batch_compound', 1.001))
|
||||||
|
@ -71,10 +71,11 @@ def train(_, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
|
||||||
optimizer = nlp.begin_training(lambda: corpus.train_tuples, use_gpu=use_gpu)
|
optimizer = nlp.begin_training(lambda: corpus.train_tuples, use_gpu=use_gpu)
|
||||||
|
|
||||||
print("Itn.\tDep. Loss\tUAS\tNER P.\tNER R.\tNER F.\tTag %\tToken %")
|
print("Itn.\tDep. Loss\tUAS\tNER P.\tNER R.\tNER F.\tTag %\tToken %")
|
||||||
|
try:
|
||||||
for i in range(n_iter):
|
for i in range(n_iter):
|
||||||
with tqdm.tqdm(total=corpus.count_train(), leave=False) as pbar:
|
with tqdm.tqdm(total=corpus.count_train(), leave=False) as pbar:
|
||||||
train_docs = corpus.train_docs(nlp, projectivize=True,
|
train_docs = corpus.train_docs(nlp, projectivize=True,
|
||||||
gold_preproc=False, shuffle=i)
|
gold_preproc=False, max_length=1000)
|
||||||
losses = {}
|
losses = {}
|
||||||
for batch in minibatch(train_docs, size=batch_sizes):
|
for batch in minibatch(train_docs, size=batch_sizes):
|
||||||
docs, golds = zip(*batch)
|
docs, golds = zip(*batch)
|
||||||
|
@ -84,8 +85,14 @@ def train(_, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
|
||||||
|
|
||||||
with nlp.use_params(optimizer.averages):
|
with nlp.use_params(optimizer.averages):
|
||||||
scorer = nlp.evaluate(corpus.dev_docs(nlp, gold_preproc=False))
|
scorer = nlp.evaluate(corpus.dev_docs(nlp, gold_preproc=False))
|
||||||
|
with (output_path / ('model%d.pickle' % i)).open('wb') as file_:
|
||||||
|
dill.dump(nlp, file_, -1)
|
||||||
|
|
||||||
|
|
||||||
print_progress(i, losses, scorer.scores)
|
print_progress(i, losses, scorer.scores)
|
||||||
with (output_path / 'model.bin').open('wb') as file_:
|
finally:
|
||||||
|
print("Saving model...")
|
||||||
|
with (output_path / 'model-final.pickle').open('wb') as file_:
|
||||||
with nlp.use_params(optimizer.averages):
|
with nlp.use_params(optimizer.averages):
|
||||||
dill.dump(nlp, file_, -1)
|
dill.dump(nlp, file_, -1)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue