From 12b034e3efeaa3d21919d1642eb7ce35844ee08f Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 25 Jan 2015 16:31:07 +1100
Subject: [PATCH] * Move POS tag definitions to parts_of_speech.pxd

---
 spacy/en/pos.pxd   |  4 ++--
 spacy/en/pos.pyx   |  8 ++++----
 spacy/structs.pxd  |  7 ++++---
 spacy/tokens.pxd   |  5 +++--
 spacy/tokens.pyx   |  4 ++--
 spacy/typedefs.pxd | 19 -------------------
 spacy/typedefs.pyx | 18 ------------------
 7 files changed, 15 insertions(+), 50 deletions(-)

diff --git a/spacy/en/pos.pxd b/spacy/en/pos.pxd
index d3697b97e..7ec88a7d5 100644
--- a/spacy/en/pos.pxd
+++ b/spacy/en/pos.pxd
@@ -4,7 +4,7 @@ from cymem.cymem cimport Pool
 from .._ml cimport Model
 from ..strings cimport StringStore
 from ..structs cimport TokenC, LexemeC, Morphology, PosTag
-from ..typedefs cimport univ_tag_t
+from ..parts_of_speech cimport univ_pos_t
 from .lemmatizer import Lemmatizer
 
 
@@ -21,5 +21,5 @@ cdef class EnPosTagger:
     cdef readonly int n_tags
 
     cdef int set_morph(self, const int i, TokenC* tokens) except -1
-    cdef int lemmatize(self, const univ_tag_t pos, const LexemeC* lex) except -1
+    cdef int lemmatize(self, const univ_pos_t pos, const LexemeC* lex) except -1
 
diff --git a/spacy/en/pos.pyx b/spacy/en/pos.pyx
index f71b82f20..1e19b9b82 100644
--- a/spacy/en/pos.pyx
+++ b/spacy/en/pos.pyx
@@ -8,9 +8,9 @@ from libc.string cimport memset
 from cymem.cymem cimport Address
 from thinc.typedefs cimport atom_t, weight_t
 
-from ..typedefs cimport univ_tag_t
-from ..typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT, VERB
-from ..typedefs cimport X, PUNCT, EOL
+from ..parts_of_speech cimport univ_pos_t
+from ..parts_of_speech cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT, VERB
+from ..parts_of_speech cimport X, PUNCT, EOL
 from ..typedefs cimport id_t
 from ..structs cimport TokenC, Morphology, LexemeC
 from ..tokens cimport Tokens
@@ -282,7 +282,7 @@ cdef class EnPosTagger:
         tokens[i].lemma = cached.lemma
         tokens[i].morph = cached.morph
 
-    cdef int lemmatize(self, const univ_tag_t pos, const LexemeC* lex) except -1:
+    cdef int lemmatize(self, const univ_pos_t pos, const LexemeC* lex) except -1:
         if self.lemmatizer is None:
             return lex.orth
         cdef unicode py_string = self.strings[lex.orth]
diff --git a/spacy/structs.pxd b/spacy/structs.pxd
index 1d6de506c..f7f8b23d3 100644
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@@ -1,6 +1,7 @@
 from libc.stdint cimport uint8_t, uint32_t
 
-from .typedefs cimport flags_t, attr_t, id_t, hash_t, univ_tag_t
+from .typedefs cimport flags_t, attr_t, id_t, hash_t
+from .parts_of_speech cimport univ_pos_t
 
 
 cdef struct LexemeC:
@@ -37,13 +38,13 @@ cdef struct Morphology:
 cdef struct PosTag:
     Morphology morph
     int id
-    univ_tag_t pos
+    univ_pos_t pos
 
 
 cdef struct TokenC:
     const LexemeC* lex
     Morphology morph
-    univ_tag_t pos
+    univ_pos_t pos
     int tag
     int idx
     int lemma
diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd
index 772f3e10c..9202b7c64 100644
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@@ -6,7 +6,8 @@ cimport numpy
 from cymem.cymem cimport Pool
 from thinc.typedefs cimport atom_t
 
-from .typedefs cimport flags_t, attr_id_t, attr_t, univ_tag_t
+from .typedefs cimport flags_t, attr_id_t, attr_t
+from .parts_of_speech cimport univ_pos_t
 from .structs cimport Morphology, TokenC, LexemeC
 from .vocab cimport Vocab
 from .strings cimport StringStore
@@ -66,7 +67,7 @@ cdef class Token:
     cdef readonly float sentiment
     cdef readonly attr_t flags
     cdef readonly attr_t lemma
-    cdef readonly univ_tag_t pos
+    cdef readonly univ_pos_t pos
     cdef readonly attr_t tag
     cdef readonly attr_t dep
     cdef readonly ndarray repvec
diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx
index 41b409989..0042dd608 100644
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@@ -8,7 +8,7 @@ from .typedefs cimport attr_id_t, attr_t
 from .typedefs cimport LEMMA
 from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
 from .typedefs cimport POS, LEMMA
-from .typedefs import UNIV_TAG_NAMES
+from .parts_of_speech import UNIV_POS_NAMES
 
 from unidecode import unidecode
 
@@ -325,7 +325,7 @@ cdef class Token:
 
     property pos_:
         def __get__(self):
-            id_to_string = {id_: string for string, id_ in UNIV_TAG_NAMES.items()}
+            id_to_string = {id_: string for string, id_ in UNIV_POS_NAMES.items()}
             return id_to_string[self.pos]
 
     property tag_:
diff --git a/spacy/typedefs.pxd b/spacy/typedefs.pxd
index 9d086827a..3c2ee234e 100644
--- a/spacy/typedefs.pxd
+++ b/spacy/typedefs.pxd
@@ -2,25 +2,6 @@ from libc.stdint cimport uint16_t, uint32_t, uint64_t, uintptr_t
 from libc.stdint cimport uint8_t
 
 
-# Google universal tag set
-cpdef enum univ_tag_t:
-    NO_TAG
-    ADJ
-    ADV
-    ADP
-    CONJ
-    DET
-    NOUN
-    NUM
-    PRON
-    PRT
-    VERB
-    X
-    PUNCT
-    EOL
-    N_UNIV_TAGS
-
-
 # Reserve 64 values for flag features
 cpdef enum attr_id_t:
     FLAG0
diff --git a/spacy/typedefs.pyx b/spacy/typedefs.pyx
index 020660f0c..8b1378917 100644
--- a/spacy/typedefs.pyx
+++ b/spacy/typedefs.pyx
@@ -1,19 +1 @@
-from __future__ import unicode_literals
 
-
-UNIV_TAG_NAMES = {
-    "NO_TAG": NO_TAG,
-    "ADJ": ADJ,
-    "ADV": ADV,
-    "ADP": ADP,
-    "CONJ": CONJ,
-    "DET": DET,
-    "NOUN": NOUN,
-    "NUM": NUM,
-    "PRON": PRON,
-    "PRT": PRT,
-    "VERB": VERB,
-    "X": X,
-    "PUNCT": PUNCT,
-    "EOL": EOL
-}