mirror of https://github.com/explosion/spaCy.git
Reload train corpus in debug data after initialize (#8776)
This commit is contained in:
parent
d48c01a6f7
commit
6bbc2b1956
|
@ -101,13 +101,14 @@ def debug_data(
|
||||||
# Create the gold corpus to be able to better analyze data
|
# Create the gold corpus to be able to better analyze data
|
||||||
dot_names = [T["train_corpus"], T["dev_corpus"]]
|
dot_names = [T["train_corpus"], T["dev_corpus"]]
|
||||||
train_corpus, dev_corpus = resolve_dot_names(config, dot_names)
|
train_corpus, dev_corpus = resolve_dot_names(config, dot_names)
|
||||||
|
|
||||||
|
nlp.initialize(lambda: train_corpus(nlp))
|
||||||
|
msg.good("Pipeline can be initialized with data")
|
||||||
|
|
||||||
train_dataset = list(train_corpus(nlp))
|
train_dataset = list(train_corpus(nlp))
|
||||||
dev_dataset = list(dev_corpus(nlp))
|
dev_dataset = list(dev_corpus(nlp))
|
||||||
msg.good("Corpus is loadable")
|
msg.good("Corpus is loadable")
|
||||||
|
|
||||||
nlp.initialize(lambda: train_dataset)
|
|
||||||
msg.good("Pipeline can be initialized with data")
|
|
||||||
|
|
||||||
# Create all gold data here to avoid iterating over the train_dataset constantly
|
# Create all gold data here to avoid iterating over the train_dataset constantly
|
||||||
gold_train_data = _compile_gold(train_dataset, factory_names, nlp, make_proj=True)
|
gold_train_data = _compile_gold(train_dataset, factory_names, nlp, make_proj=True)
|
||||||
gold_train_unpreprocessed_data = _compile_gold(
|
gold_train_unpreprocessed_data = _compile_gold(
|
||||||
|
|
Loading…
Reference in New Issue