mirror of https://github.com/explosion/spaCy.git
* Rearrange code in tokens.pyx
This commit is contained in:
parent
5ce51ce8d6
commit
fbd48c571d
|
@ -136,27 +136,45 @@ cdef class Tokens:
|
||||||
cdef const TokenC* last = &self.data[self.length - 1]
|
cdef const TokenC* last = &self.data[self.length - 1]
|
||||||
return self._string[:last.idx + last.lex.length]
|
return self._string[:last.idx + last.lex.length]
|
||||||
|
|
||||||
property ents:
|
@property
|
||||||
def __get__(self):
|
def ents(self):
|
||||||
cdef int i
|
"""Yields named-entity Span objects."""
|
||||||
cdef const TokenC* token
|
cdef int i
|
||||||
cdef int start = -1
|
cdef const TokenC* token
|
||||||
cdef int label = 0
|
cdef int start = -1
|
||||||
for i in range(self.length):
|
cdef int label = 0
|
||||||
token = &self.data[i]
|
for i in range(self.length):
|
||||||
if token.ent_iob == 1:
|
token = &self.data[i]
|
||||||
assert start != -1
|
if token.ent_iob == 1:
|
||||||
pass
|
assert start != -1
|
||||||
elif token.ent_iob == 2:
|
pass
|
||||||
if start != -1:
|
elif token.ent_iob == 2:
|
||||||
yield Span(self, start, i, label=label)
|
if start != -1:
|
||||||
start = -1
|
yield Span(self, start, i, label=label)
|
||||||
label = 0
|
start = -1
|
||||||
elif token.ent_iob == 3:
|
label = 0
|
||||||
start = i
|
elif token.ent_iob == 3:
|
||||||
label = token.ent_type
|
start = i
|
||||||
if start != -1:
|
label = token.ent_type
|
||||||
yield Span(self, start, self.length, label=label)
|
if start != -1:
|
||||||
|
yield Span(self, start, self.length, label=label)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sents(self):
|
||||||
|
"""Yield a list of sentence Span objects, calculated from the dependency
|
||||||
|
parse.
|
||||||
|
"""
|
||||||
|
cdef int i
|
||||||
|
cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:])
|
||||||
|
start = None
|
||||||
|
for i in range(self.length):
|
||||||
|
if start is None:
|
||||||
|
start = i
|
||||||
|
if self.data[i].sent_end:
|
||||||
|
yield Span(self, start, i+1)
|
||||||
|
start = None
|
||||||
|
if start is not None:
|
||||||
|
yield Span(self, start, self.length)
|
||||||
|
|
||||||
cdef int push_back(self, int idx, LexemeOrToken lex_or_tok) except -1:
|
cdef int push_back(self, int idx, LexemeOrToken lex_or_tok) except -1:
|
||||||
if self.length == self.max_length:
|
if self.length == self.max_length:
|
||||||
|
@ -238,21 +256,6 @@ cdef class Tokens:
|
||||||
for i in range(self.length, self.max_length + PADDING):
|
for i in range(self.length, self.max_length + PADDING):
|
||||||
self.data[i].lex = &EMPTY_LEXEME
|
self.data[i].lex = &EMPTY_LEXEME
|
||||||
|
|
||||||
@property
|
|
||||||
def sents(self):
|
|
||||||
"""This is really only a place-holder for a proper solution."""
|
|
||||||
cdef int i
|
|
||||||
cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:])
|
|
||||||
start = None
|
|
||||||
for i in range(self.length):
|
|
||||||
if start is None:
|
|
||||||
start = i
|
|
||||||
if self.data[i].sent_end:
|
|
||||||
yield Span(self, start, i+1)
|
|
||||||
start = None
|
|
||||||
if start is not None:
|
|
||||||
yield Span(self, start, self.length)
|
|
||||||
|
|
||||||
cdef int set_parse(self, const TokenC* parsed) except -1:
|
cdef int set_parse(self, const TokenC* parsed) except -1:
|
||||||
# TODO: This method is fairly misleading atm. It's used by GreedyParser
|
# TODO: This method is fairly misleading atm. It's used by GreedyParser
|
||||||
# to actually apply the parse calculated. Need to rethink this.
|
# to actually apply the parse calculated. Need to rethink this.
|
||||||
|
@ -263,6 +266,8 @@ cdef class Tokens:
|
||||||
|
|
||||||
def merge(self, int start_idx, int end_idx, unicode tag, unicode lemma,
|
def merge(self, int start_idx, int end_idx, unicode tag, unicode lemma,
|
||||||
unicode ent_type):
|
unicode ent_type):
|
||||||
|
"""Merge a multi-word expression into a single token. Currently
|
||||||
|
experimental; API is likely to change."""
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef int start = -1
|
cdef int start = -1
|
||||||
cdef int end = -1
|
cdef int end = -1
|
||||||
|
@ -526,10 +531,23 @@ cdef class Token:
|
||||||
self.c + self.c.head, self.i + self.c.head, self.array_len,
|
self.c + self.c.head, self.i + self.c.head, self.array_len,
|
||||||
self._seq)
|
self._seq)
|
||||||
|
|
||||||
|
property ent_type:
|
||||||
|
def __get__(self):
|
||||||
|
return self.c.ent_type
|
||||||
|
|
||||||
|
property ent_iob:
|
||||||
|
def __get__(self):
|
||||||
|
return self.c.ent_iob
|
||||||
|
|
||||||
property ent_type_:
|
property ent_type_:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.vocab.strings[self.c.ent_type]
|
return self.vocab.strings[self.c.ent_type]
|
||||||
|
|
||||||
|
property ent_iob_:
|
||||||
|
def __get__(self):
|
||||||
|
iob_strings = ('', 'I', 'O', 'B')
|
||||||
|
return iob_strings[self.c.ent_iob]
|
||||||
|
|
||||||
property whitespace_:
|
property whitespace_:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.string[self.c.lex.length:]
|
return self.string[self.c.lex.length:]
|
||||||
|
|
Loading…
Reference in New Issue