mirror of https://github.com/explosion/spaCy.git
* Improve array features in tokens
This commit is contained in:
parent
43d5964e13
commit
7018b53d3a
|
@ -1,5 +1,6 @@
|
||||||
from spacy.lexeme cimport LexemeC
|
from spacy.lexeme cimport LexemeC
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
|
from thinc.typedefs cimport atom_t
|
||||||
|
|
||||||
|
|
||||||
cdef class Tokens:
|
cdef class Tokens:
|
||||||
|
@ -9,6 +10,12 @@ cdef class Tokens:
|
||||||
|
|
||||||
cdef int extend(self, int i, LexemeC** lexemes, int n) except -1
|
cdef int extend(self, int i, LexemeC** lexemes, int n) except -1
|
||||||
cdef int push_back(self, int i, LexemeC* lexeme) except -1
|
cdef int push_back(self, int i, LexemeC* lexeme) except -1
|
||||||
|
cdef int int_array(self, atom_t* atoms, int i, int* indices, int n_idx,
|
||||||
|
int* features, int n_feat) except -1
|
||||||
|
cdef int string_array(self, atom_t* atoms, int i, int* indices, int n_idx,
|
||||||
|
int* features, int n_feat) except -1
|
||||||
|
cdef int bool_array(self, atom_t* atoms, int i, int* indices, int n_idx,
|
||||||
|
int* features, int n_feat) except -1
|
||||||
|
|
||||||
cpdef int id(self, size_t i) except -1
|
cpdef int id(self, size_t i) except -1
|
||||||
cpdef float prob(self, size_t i) except 1
|
cpdef float prob(self, size_t i) except 1
|
||||||
|
|
|
@ -2,6 +2,9 @@
|
||||||
from .word cimport Lexeme
|
from .word cimport Lexeme
|
||||||
|
|
||||||
from .lexeme cimport *
|
from .lexeme cimport *
|
||||||
|
cimport numpy
|
||||||
|
cimport cython
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
|
||||||
cdef class Tokens:
|
cdef class Tokens:
|
||||||
|
@ -38,6 +41,8 @@ cdef class Tokens:
|
||||||
del self.pos
|
del self.pos
|
||||||
|
|
||||||
def __getitem__(self, i):
|
def __getitem__(self, i):
|
||||||
|
if i >= self.lex.size():
|
||||||
|
raise IndexError
|
||||||
return Lexeme(<size_t>self.lex.at(i))
|
return Lexeme(<size_t>self.lex.at(i))
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
@ -48,6 +53,45 @@ cdef class Tokens:
|
||||||
self.idx.push_back(idx)
|
self.idx.push_back(idx)
|
||||||
return idx + lexeme.ints[<int>LexInt_length]
|
return idx + lexeme.ints[<int>LexInt_length]
|
||||||
|
|
||||||
|
cdef int int_array(self, atom_t* output, int i, int* indices, int n_idx,
|
||||||
|
int* features, int n_feat):
|
||||||
|
cdef int feat_id, idx
|
||||||
|
cdef int length = self.lex.size()
|
||||||
|
for feat_id in features[:n_feat]:
|
||||||
|
for idx in indices[:n_idx]:
|
||||||
|
if idx < 0 or idx >= length:
|
||||||
|
output[i] = 0
|
||||||
|
else:
|
||||||
|
output[i] = self.lex[0][idx].ints[<int>feat_id]
|
||||||
|
i += 1
|
||||||
|
return i
|
||||||
|
|
||||||
|
cdef int string_array(self, atom_t* output, int i, int* indices, int n_idx,
|
||||||
|
int* features, int n_feat):
|
||||||
|
cdef int feat_id, idx
|
||||||
|
cdef int length = self.lex.size()
|
||||||
|
for feat_id in features[:n_feat]:
|
||||||
|
for idx in indices[:n_idx]:
|
||||||
|
if idx < 0 or idx >= length:
|
||||||
|
output[i] = 0
|
||||||
|
else:
|
||||||
|
output[i] = <atom_t>self.lex[0][idx].strings[<int>feat_id]
|
||||||
|
i += 1
|
||||||
|
return i
|
||||||
|
|
||||||
|
cdef int bool_array(self, atom_t* output, int i, int* indices, int n_idx,
|
||||||
|
int* features, int n_feat):
|
||||||
|
cdef int feat_id, idx
|
||||||
|
cdef int length = self.lex.size()
|
||||||
|
for feat_id in features[:n_feat]:
|
||||||
|
for idx in indices[:n_idx]:
|
||||||
|
if idx < 0 or idx >= length:
|
||||||
|
output[i] = 0
|
||||||
|
else:
|
||||||
|
output[i] = lexeme_check_dist_flag(self.lex[0][idx], feat_id)
|
||||||
|
i += 1
|
||||||
|
return i
|
||||||
|
|
||||||
cdef int extend(self, int idx, LexemeC** lexemes, int n) except -1:
|
cdef int extend(self, int idx, LexemeC** lexemes, int n) except -1:
|
||||||
cdef int i
|
cdef int i
|
||||||
if lexemes == NULL:
|
if lexemes == NULL:
|
||||||
|
@ -89,6 +133,8 @@ cdef class Tokens:
|
||||||
# methods, which requires them to know the IDs.
|
# methods, which requires them to know the IDs.
|
||||||
|
|
||||||
cpdef unicode string(self, size_t i):
|
cpdef unicode string(self, size_t i):
|
||||||
|
if i >= self.lex.size():
|
||||||
|
raise IndexError
|
||||||
return self.orig(i)
|
return self.orig(i)
|
||||||
|
|
||||||
cpdef unicode orig(self, size_t i):
|
cpdef unicode orig(self, size_t i):
|
||||||
|
|
Loading…
Reference in New Issue