From 889b7b48b4e5e0f50d4e2b4b721b811ba2a58181 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Thu, 30 Oct 2014 13:38:55 +1100
Subject: [PATCH] * Fix POS tagger, so that it loads correctly. Lexemes are
 being read in.

---
 spacy/lang.pyx |  3 +++
 spacy/pos.pyx  | 16 +++++++---------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/spacy/lang.pyx b/spacy/lang.pyx
index e01727313..a09c28172 100644
--- a/spacy/lang.pyx
+++ b/spacy/lang.pyx
@@ -300,6 +300,7 @@ cdef class Lexicon:
         assert fp != NULL
         cdef size_t st
         cdef Lexeme* lexeme
+        i = 0
         while True:
             lexeme = <Lexeme*>self.mem.alloc(sizeof(Lexeme), 1)
             st = fread(lexeme, sizeof(Lexeme), 1, fp)
@@ -307,6 +308,8 @@ cdef class Lexicon:
                 break
             self.lexemes.push_back(lexeme)
             self._dict.set(lexeme.hash, lexeme)
+            i += 1
+        print "Load %d lexemes" % i
         fclose(fp)
         
 
diff --git a/spacy/pos.pyx b/spacy/pos.pyx
index 263f88edb..8722a1639 100644
--- a/spacy/pos.pyx
+++ b/spacy/pos.pyx
@@ -24,21 +24,19 @@ cdef class Tagger:
     tags = {'NULL': NULL_TAG}
     def __init__(self, model_dir):
         self.mem = Pool()
-        self.extractor = Extractor(TEMPLATES, [ConjFeat for _ in TEMPLATES])
+        tags_loc = path.join(model_dir, 'postags.json')
+        if path.exists(tags_loc):
+            with open(tags_loc) as file_:
+                Tagger.tags.update(ujson.load(file_))
         self.model = LinearModel(len(self.tags), self.extractor.n)
+        if path.exists(path.join(model_dir, 'model')):
+            self.model.load(path.join(model_dir, 'model'))
+        self.extractor = Extractor(TEMPLATES, [ConjFeat for _ in TEMPLATES])
         self._atoms = <atom_t*>self.mem.alloc(CONTEXT_SIZE, sizeof(atom_t))
         self._feats = <feat_t*>self.mem.alloc(self.extractor.n+1, sizeof(feat_t))
         self._values = <weight_t*>self.mem.alloc(self.extractor.n+1, sizeof(weight_t))
         self._scores = <weight_t*>self.mem.alloc(len(self.tags), sizeof(weight_t))
         self._guess = NULL_TAG
-        if path.exists(path.join(model_dir, 'model')):
-            self.model.load(path.join(model_dir, 'model'))
-        tags_loc = path.join(model_dir, 'postags.json')
-        if path.exists(tags_loc):
-            with open(tags_loc) as file_:
-                Tagger.tags.update(ujson.load(file_))
-        if path.exists(path.join(model_dir, 'strings')):
-            EN.lexicon.strings.load(path.join(model_dir, 'strings'))
             
     cpdef class_t predict(self, int i, Tokens tokens, class_t prev, class_t prev_prev) except 0:
         assert i >= 0