remove duplicate model definition of tok2vec layer

This commit is contained in:
svlandeg 2020-06-04 15:49:23 +02:00
parent 1775f54a26
commit 6b027d7689
3 changed files with 6 additions and 13 deletions

View File

@ -53,17 +53,7 @@ n_save_every = null
batch_size = 3000
seed = ${training:seed}
use_pytorch_for_gpu_memory = ${training:use_pytorch_for_gpu_memory}
[pretraining.model]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = ${nlp:vectors}
width = 256
depth = 6
window_size = 1
embed_size = 2000
maxout_pieces = 3
subword_features = true
dropout = null
tok2vec_model = "nlp.pipeline.tok2vec.model"
[pretraining.optimizer]
@optimizers = "Adam.v1"

View File

@ -109,7 +109,10 @@ def pretrain(
with msg.loading(f"Loading model '{vectors_model}'..."):
nlp = util.load_model(vectors_model)
msg.good(f"Loaded model '{vectors_model}'")
tok2vec = pretrain_config["model"]
tok2vec_path = pretrain_config["tok2vec_model"]
tok2vec = config
for subpath in tok2vec_path.split("."):
tok2vec = tok2vec.get(subpath)
model = create_pretraining_model(nlp, tok2vec)
optimizer = pretrain_config["optimizer"]

View File

@ -102,4 +102,4 @@ def _replace_word(word, random_words, mask="[MASK]"):
elif roll < 0.9:
return random_words.next()
else:
return word
return word