From c04bab6bae214ca31ba5c11ce6e46e4cb23923ac Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 9 Jan 2021 11:25:47 +1100 Subject: [PATCH] Fix train loop to avoid swallowing tracebacks (#6693) * Avoid swallowing tracebacks in train loop * Format * Handle first --- spacy/training/loop.py | 50 +++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/spacy/training/loop.py b/spacy/training/loop.py index 577c80cb3..fe2d4b18f 100644 --- a/spacy/training/loop.py +++ b/spacy/training/loop.py @@ -59,6 +59,19 @@ def train( batcher = T["batcher"] train_logger = T["logger"] before_to_disk = create_before_to_disk_callback(T["before_to_disk"]) + + # Helper function to save checkpoints. This is a closure for convenience, + # to avoid passing in all the args all the time. + def save_checkpoint(is_best): + with nlp.use_params(optimizer.averages): + before_to_disk(nlp).to_disk(output_path / DIR_MODEL_LAST) + if is_best: + # Avoid saving twice (saving will be more expensive than + # the dir copy) + if (output_path / DIR_MODEL_BEST).exists(): + shutil.rmtree(output_path / DIR_MODEL_BEST) + shutil.copytree(output_path / DIR_MODEL_LAST, output_path / DIR_MODEL_BEST) + # Components that shouldn't be updated during training frozen_components = T["frozen_components"] # Create iterator, which yields out info after each optimization step. @@ -87,40 +100,31 @@ def train( if is_best_checkpoint is not None and output_path is not None: with nlp.select_pipes(disable=frozen_components): update_meta(T, nlp, info) - with nlp.use_params(optimizer.averages): - nlp = before_to_disk(nlp) - nlp.to_disk(output_path / DIR_MODEL_LAST) - if is_best_checkpoint: - with nlp.use_params(optimizer.averages): - nlp.to_disk(output_path / DIR_MODEL_BEST) - + save_checkpoint(is_best_checkpoint) except Exception as e: if output_path is not None: - # We don't want to swallow the traceback if we don't have a - # specific error, but we do want to warn that we're trying - # to do something here. stdout.write( msg.warn( f"Aborting and saving the final best model. " - f"Encountered exception: {str(e)}" + f"Encountered exception: {repr(e)}" ) + "\n" ) raise e finally: finalize_logger() - if optimizer.averages: - nlp.use_params(optimizer.averages) - if output_path is not None: - final_model_path = output_path / DIR_MODEL_LAST - nlp.to_disk(final_model_path) - # This will only run if we don't hit an error - stdout.write( - msg.good("Saved pipeline to output directory", final_model_path) + "\n" - ) - return (nlp, final_model_path) - else: - return (nlp, None) + save_checkpoint(False) + # This will only run if we did't hit an error + if optimizer.averages: + nlp.use_params(optimizer.averages) + if output_path is not None: + stdout.write( + msg.good("Saved pipeline to output directory", output_path / DIR_MODEL_LAST) + + "\n" + ) + return (nlp, output_path / DIR_MODEL_LAST) + else: + return (nlp, None) def train_while_improving(