Fix train loop to avoid swallowing tracebacks (#6693)

* Avoid swallowing tracebacks in train loop

* Format

* Handle first
This commit is contained in:
Matthew Honnibal 2021-01-09 11:25:47 +11:00 committed by GitHub
parent a612a5ba3f
commit c04bab6bae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 27 additions and 23 deletions

View File

@ -59,6 +59,19 @@ def train(
batcher = T["batcher"]
train_logger = T["logger"]
before_to_disk = create_before_to_disk_callback(T["before_to_disk"])
# Helper function to save checkpoints. This is a closure for convenience,
# to avoid passing in all the args all the time.
def save_checkpoint(is_best):
with nlp.use_params(optimizer.averages):
before_to_disk(nlp).to_disk(output_path / DIR_MODEL_LAST)
if is_best:
# Avoid saving twice (saving will be more expensive than
# the dir copy)
if (output_path / DIR_MODEL_BEST).exists():
shutil.rmtree(output_path / DIR_MODEL_BEST)
shutil.copytree(output_path / DIR_MODEL_LAST, output_path / DIR_MODEL_BEST)
# Components that shouldn't be updated during training
frozen_components = T["frozen_components"]
# Create iterator, which yields out info after each optimization step.
@ -87,38 +100,29 @@ def train(
if is_best_checkpoint is not None and output_path is not None:
with nlp.select_pipes(disable=frozen_components):
update_meta(T, nlp, info)
with nlp.use_params(optimizer.averages):
nlp = before_to_disk(nlp)
nlp.to_disk(output_path / DIR_MODEL_LAST)
if is_best_checkpoint:
with nlp.use_params(optimizer.averages):
nlp.to_disk(output_path / DIR_MODEL_BEST)
save_checkpoint(is_best_checkpoint)
except Exception as e:
if output_path is not None:
# We don't want to swallow the traceback if we don't have a
# specific error, but we do want to warn that we're trying
# to do something here.
stdout.write(
msg.warn(
f"Aborting and saving the final best model. "
f"Encountered exception: {str(e)}"
f"Encountered exception: {repr(e)}"
)
+ "\n"
)
raise e
finally:
finalize_logger()
save_checkpoint(False)
# This will only run if we did't hit an error
if optimizer.averages:
nlp.use_params(optimizer.averages)
if output_path is not None:
final_model_path = output_path / DIR_MODEL_LAST
nlp.to_disk(final_model_path)
# This will only run if we don't hit an error
stdout.write(
msg.good("Saved pipeline to output directory", final_model_path) + "\n"
msg.good("Saved pipeline to output directory", output_path / DIR_MODEL_LAST)
+ "\n"
)
return (nlp, final_model_path)
return (nlp, output_path / DIR_MODEL_LAST)
else:
return (nlp, None)