From d1fd3438c31a3be94c111cdedd1a3c3a92c66b05 Mon Sep 17 00:00:00 2001 From: Matthw Honnibal Date: Tue, 7 Jul 2020 01:38:15 +0200 Subject: [PATCH] Add dropout to parser hidden layer --- spacy/ml/_precomputable_affine.py | 3 ++- spacy/ml/tb_framework.py | 2 +- spacy/syntax/_parser_model.pyx | 14 ++++++++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/spacy/ml/_precomputable_affine.py b/spacy/ml/_precomputable_affine.py index 3b5f09e7b..20d5fb3fb 100644 --- a/spacy/ml/_precomputable_affine.py +++ b/spacy/ml/_precomputable_affine.py @@ -1,13 +1,14 @@ from thinc.api import Model, normal_init -def PrecomputableAffine(nO, nI, nF, nP): +def PrecomputableAffine(nO, nI, nF, nP, dropout=0.1): model = Model( "precomputable_affine", forward, init=init, dims={"nO": nO, "nI": nI, "nF": nF, "nP": nP}, params={"W": None, "b": None, "pad": None}, + attrs={"dropout_rate": dropout} ) return model diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py index 88f27f0bf..39d4b0a14 100644 --- a/spacy/ml/tb_framework.py +++ b/spacy/ml/tb_framework.py @@ -2,7 +2,7 @@ from thinc.api import Model, noop, use_ops, Linear from ..syntax._parser_model import ParserStepModel -def TransitionModel(tok2vec, lower, upper, unseen_classes=set()): +def TransitionModel(tok2vec, lower, upper, dropout=0.2, unseen_classes=set()): """Set up a stepwise transition-based model""" if upper is None: has_upper = False diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx index 853facdc6..42baa737b 100644 --- a/spacy/syntax/_parser_model.pyx +++ b/spacy/syntax/_parser_model.pyx @@ -219,9 +219,11 @@ cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) no class ParserStepModel(Model): - def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True): + def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True, + dropout=0.1): Model.__init__(self, name="parser_step_model", forward=step_forward) self.attrs["has_upper"] = has_upper + self.attrs["dropout_rate"] = dropout self.tokvecs, self.bp_tokvecs = layers[0](docs, is_train=train) if layers[1].get_dim("nP") >= 2: activation = "maxout" @@ -289,11 +291,17 @@ class ParserStepModel(Model): self.bp_tokvecs(d_tokvecs[:-1]) return d_tokvecs +NUMPY_OPS = NumpyOps() def step_forward(model: ParserStepModel, states, is_train): token_ids = model.get_token_ids(states) vector, get_d_tokvecs = model.state2vec(token_ids, is_train) + mask = None if model.attrs["has_upper"]: + dropout_rate = model.attrs["dropout_rate"] + if is_train and dropout_rate > 0: + mask = NUMPY_OPS.get_dropout_mask(vector.shape, 0.1) + vector *= mask scores, get_d_vector = model.vec2scores(vector, is_train) else: scores = NumpyOps().asarray(vector) @@ -305,6 +313,8 @@ def step_forward(model: ParserStepModel, states, is_train): # Zero vectors for unseen classes d_scores *= model._class_mask d_vector = get_d_vector(d_scores) + if mask is not None: + d_vector *= mask if isinstance(model.state2vec.ops, CupyOps) \ and not isinstance(token_ids, model.state2vec.ops.xp.ndarray): # Move token_ids and d_vector to GPU, asynchronously @@ -437,7 +447,7 @@ cdef class precompute_hiddens: sum_state_features(state_vector.data, feat_weights, &ids[0,0], token_ids.shape[0], self.nF, self.nO*self.nP) - state_vector = state_vector + self.bias + state_vector += self.bias state_vector, bp_nonlinearity = self._nonlinearity(state_vector) def backward(d_state_vector_ids):