* Rearrange code in tokens.pyx

2015-04-13 05:41:25 +02:00 · 2015-04-13 05:41:25 +02:00 · fbd48c571d
parent 5ce51ce8d6
commit fbd48c571d
1 changed files with 54 additions and 36 deletions
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@ -136,8 +136,9 @@ cdef class Tokens:
        cdef const TokenC* last = &self.data[self.length - 1]
        return self._string[:last.idx + last.lex.length]
-    property ents:
+    @property
-        def __get__(self):
+    def ents(self):
        """Yields named-entity Span objects."""
        cdef int i
        cdef const TokenC* token
        cdef int start = -1
@ -158,6 +159,23 @@ cdef class Tokens:
        if start != -1:
            yield Span(self, start, self.length, label=label)
    @property
    def sents(self):
        """Yield a list of sentence Span objects, calculated from the dependency
        parse.
        """
        cdef int i
        cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:])
        start = None
        for i in range(self.length):
            if start is None:
                start = i
            if self.data[i].sent_end:
                yield Span(self, start, i+1)
                start = None
        if start is not None:
            yield Span(self, start, self.length) 
    cdef int push_back(self, int idx, LexemeOrToken lex_or_tok) except -1:
        if self.length == self.max_length:
            self._realloc(self.length * 2)
@ -238,21 +256,6 @@ cdef class Tokens:
        for i in range(self.length, self.max_length + PADDING):
            self.data[i].lex = &EMPTY_LEXEME
    @property
    def sents(self):
        """This is really only a place-holder for a proper solution."""
        cdef int i
        cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:])
        start = None
        for i in range(self.length):
            if start is None:
                start = i
            if self.data[i].sent_end:
                yield Span(self, start, i+1)
                start = None
        if start is not None:
            yield Span(self, start, self.length) 
    cdef int set_parse(self, const TokenC* parsed) except -1:
        # TODO: This method is fairly misleading atm. It's used by GreedyParser
        # to actually apply the parse calculated. Need to rethink this.
@ -263,6 +266,8 @@ cdef class Tokens:
    def merge(self, int start_idx, int end_idx, unicode tag, unicode lemma,
              unicode ent_type):
        """Merge a multi-word expression into a single token.  Currently
        experimental; API is likely to change."""
        cdef int i
        cdef int start = -1
        cdef int end = -1
@ -526,10 +531,23 @@ cdef class Token:
                               self.c + self.c.head, self.i + self.c.head, self.array_len,
                               self._seq)
    property ent_type:
        def __get__(self):
            return self.c.ent_type
    property ent_iob:
        def __get__(self):
            return self.c.ent_iob
    property ent_type_:
        def __get__(self):
            return self.vocab.strings[self.c.ent_type]
    property ent_iob_:
        def __get__(self):
            iob_strings = ('', 'I', 'O', 'B')
            return iob_strings[self.c.ent_iob]
    property whitespace_:
        def __get__(self):
            return self.string[self.c.lex.length:]