* Rename NORM1 and NORM2 attrs to lower and norm

2015-01-24 06:17:03 +11:00 · 2015-01-24 06:17:03 +11:00 · fda94271af
parent 75feb52c5d
commit fda94271af
9 changed files with 42 additions and 47 deletions
--- a/spacy/en/init.py
+++ b/spacy/en/init.py
@ -20,8 +20,8 @@ def get_lex_props(string):
        'flags': get_flags(string),
        'length': len(string),
        'orth': string,
-        'norm1': string.lower(),
+        'lower': string.lower(),
-        'norm2': string,
+        'norm': string,
        'shape': orth.word_shape(string),
        'prefix': string[0],
        'suffix': string[-3:],
--- a/spacy/en/attrs.pxd
+++ b/spacy/en/attrs.pxd
@ -2,13 +2,14 @@ from ..attrs cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7
 from ..attrs cimport FLAG8, FLAG9, FLAG10
 from ..attrs cimport ORTH as _ORTH
 from ..attrs cimport SHAPE as _SHAPE
-from ..attrs cimport NORM1 as _NORM1
+from ..attrs cimport LOWER as _LOWER
-from ..attrs cimport NORM2 as _NORM2
+from ..attrs cimport NORM as _NORM
 from ..attrs cimport CLUSTER as _CLUSTER
 from ..attrs cimport PREFIX as _PREFIX
 from ..attrs cimport SUFFIX as _SUFFIX
 from ..attrs cimport LEMMA as _LEMMA
 from ..attrs cimport POS as _POS
 from ..attrs cimport TAG as _TAG
 cpdef enum:
@ -26,10 +27,11 @@ cpdef enum:
    ORTH = _ORTH
    SHAPE = _SHAPE
-    LOWER = _NORM1
+    LOWER = _LOWER
-    NORM2 = _NORM2
+    NORM = _NORM
    PREFIX = _PREFIX
    SUFFIX = _SUFFIX
    CLUSTER = _CLUSTER
    LEMMA = _LEMMA
    POS = _POS
    TAG = _TAG
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@ -1,5 +1,5 @@
 from .typedefs cimport hash_t, flags_t, id_t, len_t, tag_t, attr_t, attr_id_t
-from .typedefs cimport ID, ORTH, NORM1, NORM2, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
+from .typedefs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
 from .structs cimport LexemeC
 from .strings cimport StringStore
@ -21,15 +21,15 @@ cdef class Lexeme:
    cdef readonly attr_t length
    cdef readonly attr_t orth
-    cdef readonly attr_t norm1
+    cdef readonly attr_t lower
-    cdef readonly attr_t norm2
+    cdef readonly attr_t norm
    cdef readonly attr_t shape
    cdef readonly attr_t prefix
    cdef readonly attr_t suffix
    cdef readonly unicode orth_
-    cdef readonly unicode norm1_
+    cdef readonly unicode lower_
-    cdef readonly unicode norm2_
+    cdef readonly unicode norm_
    cdef readonly unicode shape_
    cdef readonly unicode prefix_
    cdef readonly unicode suffix_
@ -50,15 +50,15 @@ cdef class Lexeme:
        py.length = ptr.length
        py.orth = ptr.orth
-        py.norm1 = ptr.norm1
+        py.lower = ptr.lower
-        py.norm2 = ptr.norm2
+        py.norm = ptr.norm
        py.shape = ptr.shape
        py.prefix = ptr.prefix
        py.suffix = ptr.suffix
        py.orth_ = strings[ptr.orth]
-        py.norm1_ = strings[ptr.norm1]
+        py.lower_ = strings[ptr.lower]
-        py.norm2_ = strings[ptr.norm2]
+        py.norm_ = strings[ptr.norm]
        py.shape_ = strings[ptr.shape]
        py.prefix_ = strings[ptr.prefix]
        py.suffix_ = strings[ptr.suffix]
@ -80,10 +80,10 @@ cdef inline attr_t get_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
        return lex.id
    elif feat_name == ORTH:
        return lex.orth
-    elif feat_name == NORM1:
+    elif feat_name == LOWER:
-        return lex.norm1
+        return lex.norm
-    elif feat_name == NORM2:
+    elif feat_name == NORM:
-        return lex.norm2
+        return lex.norm
    elif feat_name == SHAPE:
        return lex.shape
    elif feat_name == PREFIX:
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -17,8 +17,8 @@ cdef int set_lex_struct_props(LexemeC* lex, dict props, StringStore string_store
                              const float* empty_vec) except -1:
    lex.length = props['length']
    lex.orth = string_store[props['orth']]
-    lex.norm1 = string_store[props['norm1']] 
+    lex.lower = string_store[props['lower']] 
-    lex.norm2 = string_store[props['norm2']] 
+    lex.norm = string_store[props['norm']] 
    lex.shape = string_store[props['shape']] 
    lex.prefix = string_store[props['prefix']]
    lex.suffix = string_store[props['suffix']]
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@ -12,8 +12,8 @@ cdef struct LexemeC:
    attr_t length
    attr_t orth
-    attr_t norm1
+    attr_t lower
-    attr_t norm2
+    attr_t norm
    attr_t shape
    attr_t prefix
    attr_t suffix
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@ -51,8 +51,8 @@ cdef class Token:
    cdef readonly attr_t cluster
    cdef readonly attr_t length
    cdef readonly attr_t orth
-    cdef readonly attr_t norm1
+    cdef readonly attr_t lower
-    cdef readonly attr_t norm2
+    cdef readonly attr_t norm
    cdef readonly attr_t shape
    cdef readonly attr_t prefix
    cdef readonly attr_t suffix
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@ -7,7 +7,7 @@ from preshed.counter cimport PreshCounter
 from .vocab cimport EMPTY_LEXEME
 from .typedefs cimport attr_id_t, attr_t
 from .typedefs cimport LEMMA
-from .typedefs cimport ID, ORTH, NORM1, NORM2, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
+from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
 from .typedefs cimport POS, LEMMA
 from unidecode import unidecode
@ -44,10 +44,10 @@ cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
        return lex.id
    elif feat_name == ORTH:
        return lex.orth
-    elif feat_name == NORM1:
+    elif feat_name == LOWER:
-        return lex.norm1
+        return lex.lower
-    elif feat_name == NORM2:
+    elif feat_name == NORM:
-        return lex.norm2
+        return lex.norm
    elif feat_name == SHAPE:
        return lex.shape
    elif feat_name == PREFIX:
@ -223,8 +223,8 @@ cdef class Token:
        self.cluster = t.lex.cluster
        self.length = t.lex.length
        self.orth = t.lex.orth
-        self.norm1 = t.lex.norm1
+        self.lower = t.lex.lower
-        self.norm2 = t.lex.norm2
+        self.norm = t.lex.norm
        self.shape = t.lex.shape
        self.prefix = t.lex.prefix
        self.suffix = t.lex.suffix
@ -254,12 +254,6 @@ cdef class Token:
        """
        return self._seq.data[self.i].lex.length
    def check_flag(self, attr_id_t flag):
        return self.flags & (1 << flag)
    def is_pos(self, univ_tag_t pos):
        return self.tag == pos
    property head:
        """The token predicted by the parser to be the head of the current token."""
        def __get__(self):
@ -267,7 +261,6 @@ cdef class Token:
            return Token(self._seq, self.i + t.head)
    property string:
        """The unicode string of the word, with no whitespace padding."""
        def __get__(self):
            cdef const TokenC* t = &self._seq.data[self.i]
            if t.lex.orth == 0:
@ -279,13 +272,13 @@ cdef class Token:
        def __get__(self):
            return self._seq.vocab.strings[self.orth]
-    property norm1_:
+    property lower_:
        def __get__(self):
-            return self._seq.vocab.strings[self.norm1]
+            return self._seq.vocab.strings[self.lower]
-    property norm2_:
+    property norm_:
        def __get__(self):
-            return self._seq.vocab.strings[self.norm2]
+            return self._seq.vocab.strings[self.norm]
    property shape_:
        def __get__(self):
--- a/spacy/typedefs.pxd
+++ b/spacy/typedefs.pxd
@ -90,8 +90,8 @@ cpdef enum attr_id_t:
    ID
    ORTH
-    NORM1
+    LOWER
-    NORM2
+    NORM
    SHAPE
    PREFIX
    SUFFIX
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -195,8 +195,8 @@ cdef class Vocab:
        for i in range(self.lexemes.size()):
            # Cast away the const, cos we can modify our lexemes
            lex = <LexemeC*>self.lexemes[i]
-            if lex.norm1 < vectors.size():
+            if lex.lower < vectors.size():
-                lex.repvec = vectors[lex.norm1]
+                lex.repvec = vectors[lex.lower]
            else:
                lex.repvec = EMPTY_VEC