* Tweak documentation for Tokens, and hide constructor as __cinit__

2015-01-27 18:57:52 +11:00 · 2015-01-27 18:57:52 +11:00 · e6c3d3471f
parent e10b712d29
commit e6c3d3471f
1 changed files with 6 additions and 10 deletions
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@ -63,9 +63,11 @@ cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil:


 cdef class Tokens:
-    """Access and set annotations onto some text.
    """
-    def __init__(self, Vocab vocab, unicode string):
+    Container class for annotated text.  Constructed via English.__call__ or
+    Tokenizer.__call__.
+    """
+    def __cinit__(self, Vocab vocab, unicode string):
        self.vocab = vocab
        self._string = string
        string_length = len(string)
@ -222,7 +224,7 @@ cdef class Tokens:
@cython.freelist(64)
 cdef class Token:
    """An individual token."""
-    def __init__(self, Tokens tokens, int i):
+    def __cinit__(self, Tokens tokens, int i):
        self._seq = tokens
        self.i = i
        cdef const TokenC* t = &tokens.data[i]
@ -249,11 +251,6 @@ cdef class Token:
        self.string = tokens._string[self.idx:next_idx]

    def __len__(self):
-        """The number of unicode code-points in the original string.
-
-        Returns:
-            length (int):
-        """
        return self._seq.data[self.i].lex.length

    def nbor(self, int i=1):
@ -287,7 +284,7 @@ cdef class Token:
            cdef const TokenC* t = &self._seq.data[self.i]
            return Token(self._seq, self.i + t.head)

-    property whitespace:
+    property whitespace_:
        def __get__(self):
            return self.string[self.length:]

@ -337,7 +334,6 @@ cdef class Token:
            return self._seq._dep_strings[self.dep]


-
 cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
    cdef int i
    for i in range(32):