From 6b027d76893de1b535f17a9b2848aba93bb2bb41 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 4 Jun 2020 15:49:23 +0200 Subject: [PATCH] remove duplicate model definition of tok2vec layer --- examples/experiments/onto-joint/pretrain.cfg | 12 +----------- spacy/cli/pretrain.py | 5 ++++- spacy/ml/models/multi_task.py | 2 +- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/examples/experiments/onto-joint/pretrain.cfg b/examples/experiments/onto-joint/pretrain.cfg index 1637cceae..4f1898d69 100644 --- a/examples/experiments/onto-joint/pretrain.cfg +++ b/examples/experiments/onto-joint/pretrain.cfg @@ -53,17 +53,7 @@ n_save_every = null batch_size = 3000 seed = ${training:seed} use_pytorch_for_gpu_memory = ${training:use_pytorch_for_gpu_memory} - -[pretraining.model] -@architectures = "spacy.HashEmbedCNN.v1" -pretrained_vectors = ${nlp:vectors} -width = 256 -depth = 6 -window_size = 1 -embed_size = 2000 -maxout_pieces = 3 -subword_features = true -dropout = null +tok2vec_model = "nlp.pipeline.tok2vec.model" [pretraining.optimizer] @optimizers = "Adam.v1" diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index 96564b98b..921eb38ab 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -109,7 +109,10 @@ def pretrain( with msg.loading(f"Loading model '{vectors_model}'..."): nlp = util.load_model(vectors_model) msg.good(f"Loaded model '{vectors_model}'") - tok2vec = pretrain_config["model"] + tok2vec_path = pretrain_config["tok2vec_model"] + tok2vec = config + for subpath in tok2vec_path.split("."): + tok2vec = tok2vec.get(subpath) model = create_pretraining_model(nlp, tok2vec) optimizer = pretrain_config["optimizer"] diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py index 970d31899..8000d1aff 100644 --- a/spacy/ml/models/multi_task.py +++ b/spacy/ml/models/multi_task.py @@ -102,4 +102,4 @@ def _replace_word(word, random_words, mask="[MASK]"): elif roll < 0.9: return random_words.next() else: - return word \ No newline at end of file + return word