diff --git a/examples/experiments/onto-joint/pretrain.cfg b/examples/experiments/onto-joint/pretrain.cfg index 1637cceae..4f1898d69 100644 --- a/examples/experiments/onto-joint/pretrain.cfg +++ b/examples/experiments/onto-joint/pretrain.cfg @@ -53,17 +53,7 @@ n_save_every = null batch_size = 3000 seed = ${training:seed} use_pytorch_for_gpu_memory = ${training:use_pytorch_for_gpu_memory} - -[pretraining.model] -@architectures = "spacy.HashEmbedCNN.v1" -pretrained_vectors = ${nlp:vectors} -width = 256 -depth = 6 -window_size = 1 -embed_size = 2000 -maxout_pieces = 3 -subword_features = true -dropout = null +tok2vec_model = "nlp.pipeline.tok2vec.model" [pretraining.optimizer] @optimizers = "Adam.v1" diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index 96564b98b..921eb38ab 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -109,7 +109,10 @@ def pretrain( with msg.loading(f"Loading model '{vectors_model}'..."): nlp = util.load_model(vectors_model) msg.good(f"Loaded model '{vectors_model}'") - tok2vec = pretrain_config["model"] + tok2vec_path = pretrain_config["tok2vec_model"] + tok2vec = config + for subpath in tok2vec_path.split("."): + tok2vec = tok2vec.get(subpath) model = create_pretraining_model(nlp, tok2vec) optimizer = pretrain_config["optimizer"] diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py index 970d31899..8000d1aff 100644 --- a/spacy/ml/models/multi_task.py +++ b/spacy/ml/models/multi_task.py @@ -102,4 +102,4 @@ def _replace_word(word, random_words, mask="[MASK]"): elif roll < 0.9: return random_words.next() else: - return word \ No newline at end of file + return word