mirror of https://github.com/explosion/spaCy.git
Fix train loop to avoid swallowing tracebacks (#6693)
* Avoid swallowing tracebacks in train loop * Format * Handle first
This commit is contained in:
parent
a612a5ba3f
commit
c04bab6bae
|
@ -59,6 +59,19 @@ def train(
|
|||
batcher = T["batcher"]
|
||||
train_logger = T["logger"]
|
||||
before_to_disk = create_before_to_disk_callback(T["before_to_disk"])
|
||||
|
||||
# Helper function to save checkpoints. This is a closure for convenience,
|
||||
# to avoid passing in all the args all the time.
|
||||
def save_checkpoint(is_best):
|
||||
with nlp.use_params(optimizer.averages):
|
||||
before_to_disk(nlp).to_disk(output_path / DIR_MODEL_LAST)
|
||||
if is_best:
|
||||
# Avoid saving twice (saving will be more expensive than
|
||||
# the dir copy)
|
||||
if (output_path / DIR_MODEL_BEST).exists():
|
||||
shutil.rmtree(output_path / DIR_MODEL_BEST)
|
||||
shutil.copytree(output_path / DIR_MODEL_LAST, output_path / DIR_MODEL_BEST)
|
||||
|
||||
# Components that shouldn't be updated during training
|
||||
frozen_components = T["frozen_components"]
|
||||
# Create iterator, which yields out info after each optimization step.
|
||||
|
@ -87,40 +100,31 @@ def train(
|
|||
if is_best_checkpoint is not None and output_path is not None:
|
||||
with nlp.select_pipes(disable=frozen_components):
|
||||
update_meta(T, nlp, info)
|
||||
with nlp.use_params(optimizer.averages):
|
||||
nlp = before_to_disk(nlp)
|
||||
nlp.to_disk(output_path / DIR_MODEL_LAST)
|
||||
if is_best_checkpoint:
|
||||
with nlp.use_params(optimizer.averages):
|
||||
nlp.to_disk(output_path / DIR_MODEL_BEST)
|
||||
|
||||
save_checkpoint(is_best_checkpoint)
|
||||
except Exception as e:
|
||||
if output_path is not None:
|
||||
# We don't want to swallow the traceback if we don't have a
|
||||
# specific error, but we do want to warn that we're trying
|
||||
# to do something here.
|
||||
stdout.write(
|
||||
msg.warn(
|
||||
f"Aborting and saving the final best model. "
|
||||
f"Encountered exception: {str(e)}"
|
||||
f"Encountered exception: {repr(e)}"
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
raise e
|
||||
finally:
|
||||
finalize_logger()
|
||||
if optimizer.averages:
|
||||
nlp.use_params(optimizer.averages)
|
||||
if output_path is not None:
|
||||
final_model_path = output_path / DIR_MODEL_LAST
|
||||
nlp.to_disk(final_model_path)
|
||||
# This will only run if we don't hit an error
|
||||
stdout.write(
|
||||
msg.good("Saved pipeline to output directory", final_model_path) + "\n"
|
||||
)
|
||||
return (nlp, final_model_path)
|
||||
else:
|
||||
return (nlp, None)
|
||||
save_checkpoint(False)
|
||||
# This will only run if we did't hit an error
|
||||
if optimizer.averages:
|
||||
nlp.use_params(optimizer.averages)
|
||||
if output_path is not None:
|
||||
stdout.write(
|
||||
msg.good("Saved pipeline to output directory", output_path / DIR_MODEL_LAST)
|
||||
+ "\n"
|
||||
)
|
||||
return (nlp, output_path / DIR_MODEL_LAST)
|
||||
else:
|
||||
return (nlp, None)
|
||||
|
||||
|
||||
def train_while_improving(
|
||||
|
|
Loading…
Reference in New Issue