From 4e30195c6d61a2d6ad8aa59e44e0792046807877 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sat, 20 Dec 2014 07:27:28 +1100
Subject: [PATCH] * Refactor morphology.pyx

---
 spacy/morphology.pxd | 35 ++++-------------------------------
 spacy/morphology.pyx | 19 ++++++++++---------
 2 files changed, 14 insertions(+), 40 deletions(-)

diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd
index 9c5d342e9..a6f020159 100644
--- a/spacy/morphology.pxd
+++ b/spacy/morphology.pxd
@@ -1,36 +1,9 @@
-
-from .tokens cimport TokenC
-from .lexeme cimport Lexeme
-from .utf8string cimport StringStore
-from .typedefs cimport id_t, Morphology
-
-from preshed.maps cimport PreshMapArray
 from cymem.cymem cimport Pool
+from preshed.maps cimport PreshMapArray
 
-
-# Google universal tag set
-cpdef enum univ_tag_t:
-    NO_TAG
-    ADJ
-    ADV
-    ADP
-    CONJ
-    DET
-    NOUN
-    NUM
-    PRON
-    PRT
-    VERB
-    X
-    PUNCT
-    EOL
-    N_UNIV_TAGS
-
-
-cdef struct PosTag:
-    Morphology morph
-    int id
-    univ_tag_t pos
+from .structs cimport TokenC, Lexeme, Morphology, PosTag
+from .strings cimport StringStore
+from .typedefs cimport id_t, univ_tag_t
 
 
 cdef class Morphologizer:
diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx
index 4d3600f8b..30e4aef4c 100644
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@@ -4,7 +4,9 @@ from os import path
 import json
 
 from .lemmatizer import Lemmatizer
-from .typedefs cimport id_t
+from .typedefs cimport id_t, univ_tag_t
+from .typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT
+from .typedefs cimport VERB, X, PUNCT, EOL
 from . import util
 
 
@@ -34,13 +36,12 @@ cdef struct _Cached:
 cdef class Morphologizer:
     """Given a POS tag and a Lexeme, find its lemma and morphological analysis.
     """
-    def __init__(self, StringStore strings, data_dir):
+    def __init__(self, StringStore strings, object lemmatizer, **kwargs):
         self.mem = Pool()
         self.strings = strings
-        cfg = json.load(open(path.join(data_dir, 'config.json')))
-        tag_map = cfg['tag_map']
-        self.tag_names = cfg['tag_names']
-        self.lemmatizer = Lemmatizer(path.join(util.DATA_DIR, 'wordnet'))
+        tag_map = kwargs['tag_map']
+        self.tag_names = kwargs['tag_names']
+        self.lemmatizer = lemmatizer
         self._cache = PreshMapArray(len(self.tag_names))
         self.tags = <PosTag*>self.mem.alloc(len(self.tag_names), sizeof(PosTag))
         for i, tag in enumerate(self.tag_names):
@@ -54,9 +55,9 @@ cdef class Morphologizer:
             self.tags[i].morph.person = props.get('person', 0)
             self.tags[i].morph.case = props.get('case', 0)
             self.tags[i].morph.misc = props.get('misc', 0)
-        if path.exists(path.join(data_dir, 'morphs.json')):
-            with open(path.join(data_dir, 'morphs.json')) as file_:
-                self.load_exceptions(json.load(file_))
+        #if path.exists(path.join(data_dir, 'morphs.json')):
+        #    with open(path.join(data_dir, 'morphs.json')) as file_:
+        #        self.load_exceptions(json.load(file_))
 
     cdef int lemmatize(self, const univ_tag_t pos, const Lexeme* lex) except -1:
         if self.lemmatizer is None: