diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index b4119abdf..3f368f57d 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -101,13 +101,14 @@ def debug_data( # Create the gold corpus to be able to better analyze data dot_names = [T["train_corpus"], T["dev_corpus"]] train_corpus, dev_corpus = resolve_dot_names(config, dot_names) + + nlp.initialize(lambda: train_corpus(nlp)) + msg.good("Pipeline can be initialized with data") + train_dataset = list(train_corpus(nlp)) dev_dataset = list(dev_corpus(nlp)) msg.good("Corpus is loadable") - nlp.initialize(lambda: train_dataset) - msg.good("Pipeline can be initialized with data") - # Create all gold data here to avoid iterating over the train_dataset constantly gold_train_data = _compile_gold(train_dataset, factory_names, nlp, make_proj=True) gold_train_unpreprocessed_data = _compile_gold(