diff --git a/spacy/default_config_pretraining.cfg b/spacy/default_config_pretraining.cfg index 7032eac03..9120db338 100644 --- a/spacy/default_config_pretraining.cfg +++ b/spacy/default_config_pretraining.cfg @@ -1,13 +1,23 @@ [pretraining] max_epochs = 1000 -min_length = 5 -max_length = 500 dropout = 0.2 n_save_every = null -batch_size = 3000 -seed = ${system.seed} -use_pytorch_for_gpu_memory = ${system.use_pytorch_for_gpu_memory} -tok2vec_model = "components.tok2vec.model" +component = "tok2vec" +layer = "" + +[pretraining.batcher] +@batchers = "spacy.batch_by_words.v1" +size = 3000 +discard_oversize = false +tolerance = 0.2 +get_length = null + +[pretraining.corpus] +@readers = "spacy.JsonlReader.v1" +path = ${paths.raw} +min_length = 5 +max_length = 500 +limit = 0 [pretraining.objective] type = "characters"