mirror of https://github.com/explosion/spaCy.git
Reload train corpus in debug data after initialize (#8776)
This commit is contained in:
parent
d48c01a6f7
commit
6bbc2b1956
|
@ -101,13 +101,14 @@ def debug_data(
|
|||
# Create the gold corpus to be able to better analyze data
|
||||
dot_names = [T["train_corpus"], T["dev_corpus"]]
|
||||
train_corpus, dev_corpus = resolve_dot_names(config, dot_names)
|
||||
|
||||
nlp.initialize(lambda: train_corpus(nlp))
|
||||
msg.good("Pipeline can be initialized with data")
|
||||
|
||||
train_dataset = list(train_corpus(nlp))
|
||||
dev_dataset = list(dev_corpus(nlp))
|
||||
msg.good("Corpus is loadable")
|
||||
|
||||
nlp.initialize(lambda: train_dataset)
|
||||
msg.good("Pipeline can be initialized with data")
|
||||
|
||||
# Create all gold data here to avoid iterating over the train_dataset constantly
|
||||
gold_train_data = _compile_gold(train_dataset, factory_names, nlp, make_proj=True)
|
||||
gold_train_unpreprocessed_data = _compile_gold(
|
||||
|
|
Loading…
Reference in New Issue