From 40703988bc01ea6c41d5e8a920634d4692231c05 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 8 Mar 2017 01:38:51 +0100 Subject: [PATCH] Use FTRL training in parser --- spacy/syntax/parser.pyx | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index eb094fa97..34ee920c6 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -124,6 +124,8 @@ cdef class Parser: elif 'features' not in cfg: cfg['features'] = self.feature_templates self.model = ParserModel(cfg['features']) + self.model.l1_penalty = 1e-7 + self.cfg = cfg def __reduce__(self): @@ -258,15 +260,20 @@ cdef class Parser: self.model.set_featuresC(&eg.c, stcls.c) self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold) self.model.set_scoresC(eg.c.scores, eg.c.features, eg.c.nr_feat) - self.model.updateC(&eg.c) + self.model.time += 1 guess = VecVec.arg_max_if_true(eg.c.scores, eg.c.is_valid, eg.c.nr_class) - - action = self.moves.c[eg.guess] + if eg.c.costs[guess] > 0: + best = VecVec.arg_max_if_zero(eg.c.scores, eg.c.costs, eg.c.nr_class) + for feat in eg.c.features[:eg.c.nr_feat]: + self.model.update_weight_ftrl(feat.key, best, -feat.value * eg.costs[guess]) + self.model.update_weight_ftrl(feat.key, guess, feat.value * eg.costs[guess]) + + action = self.moves.c[guess] action.do(stcls.c, action.label) - loss += eg.costs[eg.guess] - eg.fill_scores(0, eg.nr_class) - eg.fill_costs(0, eg.nr_class) - eg.fill_is_valid(1, eg.nr_class) + loss += eg.costs[guess] + eg.fill_scores(0, eg.c.nr_class) + eg.fill_costs(0, eg.c.nr_class) + eg.fill_is_valid(1, eg.c.nr_class) return loss def step_through(self, Doc doc): @@ -296,7 +303,7 @@ cdef class Parser: # Doesn't set label into serializer -- subclasses override it to do that. for action in self.moves.action_types: self.moves.add_action(action, label) - + cdef class StepwiseState: cdef readonly StateClass stcls