From 889b7b48b4e5e0f50d4e2b4b721b811ba2a58181 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 30 Oct 2014 13:38:55 +1100 Subject: [PATCH] * Fix POS tagger, so that it loads correctly. Lexemes are being read in. --- spacy/lang.pyx | 3 +++ spacy/pos.pyx | 16 +++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/spacy/lang.pyx b/spacy/lang.pyx index e01727313..a09c28172 100644 --- a/spacy/lang.pyx +++ b/spacy/lang.pyx @@ -300,6 +300,7 @@ cdef class Lexicon: assert fp != NULL cdef size_t st cdef Lexeme* lexeme + i = 0 while True: lexeme = self.mem.alloc(sizeof(Lexeme), 1) st = fread(lexeme, sizeof(Lexeme), 1, fp) @@ -307,6 +308,8 @@ cdef class Lexicon: break self.lexemes.push_back(lexeme) self._dict.set(lexeme.hash, lexeme) + i += 1 + print "Load %d lexemes" % i fclose(fp) diff --git a/spacy/pos.pyx b/spacy/pos.pyx index 263f88edb..8722a1639 100644 --- a/spacy/pos.pyx +++ b/spacy/pos.pyx @@ -24,21 +24,19 @@ cdef class Tagger: tags = {'NULL': NULL_TAG} def __init__(self, model_dir): self.mem = Pool() - self.extractor = Extractor(TEMPLATES, [ConjFeat for _ in TEMPLATES]) + tags_loc = path.join(model_dir, 'postags.json') + if path.exists(tags_loc): + with open(tags_loc) as file_: + Tagger.tags.update(ujson.load(file_)) self.model = LinearModel(len(self.tags), self.extractor.n) + if path.exists(path.join(model_dir, 'model')): + self.model.load(path.join(model_dir, 'model')) + self.extractor = Extractor(TEMPLATES, [ConjFeat for _ in TEMPLATES]) self._atoms = self.mem.alloc(CONTEXT_SIZE, sizeof(atom_t)) self._feats = self.mem.alloc(self.extractor.n+1, sizeof(feat_t)) self._values = self.mem.alloc(self.extractor.n+1, sizeof(weight_t)) self._scores = self.mem.alloc(len(self.tags), sizeof(weight_t)) self._guess = NULL_TAG - if path.exists(path.join(model_dir, 'model')): - self.model.load(path.join(model_dir, 'model')) - tags_loc = path.join(model_dir, 'postags.json') - if path.exists(tags_loc): - with open(tags_loc) as file_: - Tagger.tags.update(ujson.load(file_)) - if path.exists(path.join(model_dir, 'strings')): - EN.lexicon.strings.load(path.join(model_dir, 'strings')) cpdef class_t predict(self, int i, Tokens tokens, class_t prev, class_t prev_prev) except 0: assert i >= 0