From ff252dd535010e73ba740ca393c4e4a9e394ac32 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 20 Dec 2014 03:48:51 +1100 Subject: [PATCH] * Clean up 'guess_cache' idea, which didnt work well enough --- spacy/syntax/parser.pxd | 2 -- spacy/syntax/parser.pyx | 30 ++++-------------------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/spacy/syntax/parser.pxd b/spacy/syntax/parser.pxd index 14f439e16..be315059f 100644 --- a/spacy/syntax/parser.pxd +++ b/spacy/syntax/parser.pxd @@ -6,7 +6,6 @@ from .arc_eager cimport TransitionSystem from ..tokens cimport Tokens, TokenC from ._state cimport State -from ..index cimport DecisionMemory cdef class GreedyParser: @@ -14,6 +13,5 @@ cdef class GreedyParser: cdef Extractor extractor cdef readonly LinearModel model cdef TransitionSystem moves - cdef readonly DecisionMemory guess_cache cpdef int parse(self, Tokens tokens) except -1 diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index 0b02bf777..9b23cea1c 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -65,15 +65,8 @@ cdef class GreedyParser: self.extractor = Extractor(get_templates(self.cfg.features)) self.moves = TransitionSystem(self.cfg.left_labels, self.cfg.right_labels) self.model = LinearModel(self.moves.n_moves, self.extractor.n_templ) - # Classes for decision memory - classes = ['S', 'D'] - classes += ['L-%s' % label for label in self.cfg.left_labels] - classes += ['R-%s' % label for label in self.cfg.right_labels] - self.guess_cache = DecisionMemory(classes) if os.path.exists(pjoin(model_dir, 'model')): self.model.load(pjoin(model_dir, 'model')) - if os.path.exists(pjoin(model_dir, 'guess_cache')): - self.guess_cache.load(pjoin(model_dir, 'guess_cache')) cpdef int parse(self, Tokens tokens) except -1: cdef: @@ -86,18 +79,11 @@ cdef class GreedyParser: cdef int n_feats cdef Pool mem = Pool() cdef State* state = init_state(mem, tokens.data, tokens.length) - cdef int guess_clas while not is_final(state): - state_key = _approx_hash_state(state) - guess_clas = self.guess_cache.get(state_key) - if guess_clas == -1: - fill_context(context, state) - feats = self.extractor.get_feats(context, &n_feats) - scores = self.model.get_scores(feats, n_feats) - guess = self.moves.best_valid(scores, state) - self.guess_cache.inc(state_key, guess.clas, 1) - else: - guess = self.moves._moves[guess_clas] + fill_context(context, state) + feats = self.extractor.get_feats(context, &n_feats) + scores = self.model.get_scores(feats, n_feats) + guess = self.moves.best_valid(scores, state) self.moves.transition(state, &guess) return 0 @@ -134,14 +120,6 @@ cdef class GreedyParser: return n_corr -cdef inline uint64_t _approx_hash_state(const State* state) nogil: - cdef int[3] context - context[0] = get_s0(state).lex.sic - context[1] = get_n0(state).lex.sic - context[2] = get_n1(state).pos if state.i < (state.sent_len - 1) else 0 - return hash64(context, sizeof(int) * 3, 0) - - cdef dict _get_counts(int guess, int best, const Feature* feats, const int n_feats, int inc): if guess == best: