From f4986d5d3cd5f565d4dc613c5199d6439f343b65 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 28 Jun 2015 22:36:03 +0200 Subject: [PATCH] * Use new Example class --- spacy/_ml.pxd | 1 + spacy/_ml.pyx | 16 ++++++---------- spacy/_theano.pyx | 15 ++++++--------- spacy/syntax/parser.pyx | 29 ++++++++++++++++++----------- 4 files changed, 31 insertions(+), 30 deletions(-) diff --git a/spacy/_ml.pxd b/spacy/_ml.pxd index 3562b4a32..40281cad2 100644 --- a/spacy/_ml.pxd +++ b/spacy/_ml.pxd @@ -5,6 +5,7 @@ from cymem.cymem cimport Pool from thinc.learner cimport LinearModel from thinc.features cimport Extractor, Feature from thinc.typedefs cimport atom_t, feat_t, weight_t, class_t +from thinc.api cimport ExampleC from preshed.maps cimport PreshMapArray diff --git a/spacy/_ml.pyx b/spacy/_ml.pyx index cabc4318a..f84068778 100644 --- a/spacy/_ml.pyx +++ b/spacy/_ml.pyx @@ -61,18 +61,14 @@ cdef class Model: self._model.load(self.model_loc, freq_thresh=0) def predict(self, Example eg): - self.set_scores(&eg.scores[0], &eg.atoms[0]) - eg.guess = arg_max_if_true(&eg.scores[0], &eg.is_valid[0], - self.n_classes) + self.set_scores(eg.c.scores, eg.c.atoms) + eg.c.guess = arg_max_if_true(eg.c.scores, eg.c.is_valid, self.n_classes) def train(self, Example eg): - self.set_scores(&eg.scores[0], &eg.atoms[0]) - eg.guess = arg_max_if_true(&eg.scores[0], - &eg.is_valid[0], self.n_classes) - eg.best = arg_max_if_zero(&eg.scores[0], &eg.costs[0], - self.n_classes) - eg.cost = eg.costs[eg.guess] - self.update(&eg.atoms[0], eg.guess, eg.best, eg.cost) + self.predict(eg) + eg.c.best = arg_max_if_zero(eg.c.scores, eg.c.costs, self.n_classes) + eg.c.cost = eg.c.costs[eg.c.guess] + self.update(eg.c.atoms, eg.c.guess, eg.c.best, eg.c.cost) cdef const weight_t* score(self, atom_t* context) except NULL: cdef int n_feats diff --git a/spacy/_theano.pyx b/spacy/_theano.pyx index ab6e0b089..69896e72a 100644 --- a/spacy/_theano.pyx +++ b/spacy/_theano.pyx @@ -1,4 +1,4 @@ -from thinc.api cimport Example +from thinc.api cimport Example, ExampleC from thinc.typedefs cimport weight_t from ._ml cimport arg_max_if_true @@ -33,20 +33,17 @@ cdef class TheanoModel(Model): cdef int i for i in range(self.n_classes): eg.scores[i] = theano_scores[i] - eg.guess = arg_max_if_true(&eg.scores[0], eg.is_valid[0], - self.n_classes) + eg.guess = arg_max_if_true(eg.c.scores, eg.c.is_valid, self.n_classes) def train(self, Example eg): self.input_layer.fill(eg.embeddings, eg.atoms, use_avg=False) theano_scores, update, y = self.train_func(eg.embeddings, eg.costs, self.eta) self.input_layer.update(update, eg.atoms, self.t, self.eta, self.mu) for i in range(self.n_classes): - eg.scores[i] = theano_scores[i] - eg.guess = arg_max_if_true(&eg.scores[0], eg.is_valid[0], - self.n_classes) - eg.best = arg_max_if_zero(&eg.scores[0], eg.costs[0], - self.n_classes) - eg.cost = eg.costs[eg.guess] + eg.c.scores[i] = theano_scores[i] + eg.guess = arg_max_if_true(eg.c.scores, eg.c.is_valid, self.n_classes) + eg.best = arg_max_if_zero(eg.c.scores, eg.c.costs, self.n_classes) + eg.cost = eg.c.costs[eg.guess] self.t += 1 def end_training(self): diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index cf9d71736..2ea60b149 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -71,14 +71,17 @@ cdef class Parser: cdef StateClass stcls = StateClass.init(tokens.data, tokens.length) self.moves.initialize_state(stcls) - cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE, self.model.n_feats) + cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE, + self.model.n_feats, self.model.n_feats) while not stcls.is_final(): - eg.wipe() - fill_context(&eg.atoms[0], stcls) - self.moves.set_valid(&eg.is_valid[0], stcls) + memset(eg.c.scores, 0, eg.c.nr_class * sizeof(weight_t)) + + self.moves.set_valid(eg.c.is_valid, stcls) + fill_context(eg.c.atoms, stcls) + self.model.predict(eg) - self.moves.c[eg.guess].do(stcls, self.moves.c[eg.guess].label) + self.moves.c[eg.c.guess].do(stcls, self.moves.c[eg.c.guess].label) self.moves.finalize_state(stcls) tokens.set_parse(stcls._sent) @@ -86,20 +89,24 @@ cdef class Parser: self.moves.preprocess_gold(gold) cdef StateClass stcls = StateClass.init(tokens.data, tokens.length) self.moves.initialize_state(stcls) - cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE, self.model.n_feats) + cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE, + self.model.n_feats, self.model.n_feats) cdef int cost = 0 while not stcls.is_final(): - eg.wipe() - fill_context(&eg.atoms[0], stcls) - self.moves.set_costs(&eg.is_valid[0], &eg.costs[0], stcls, gold) + memset(eg.c.scores, 0, eg.c.nr_class * sizeof(weight_t)) + + self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold) + + fill_context(eg.c.atoms, stcls) self.model.train(eg) - self.moves.c[eg.guess].do(stcls, self.moves.c[eg.guess].label) - cost += eg.cost + self.moves.c[eg.c.guess].do(stcls, self.moves.c[eg.c.guess].label) + cost += eg.c.cost return cost + # These are passed as callbacks to thinc.search.Beam """ cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1: