mirror of https://github.com/explosion/spaCy.git
* Work on docstrings
This commit is contained in:
parent
6352e3e2a2
commit
fe2a5e0370
|
@ -115,6 +115,17 @@ cdef class Tokens:
|
||||||
|
|
||||||
@cython.boundscheck(False)
|
@cython.boundscheck(False)
|
||||||
cpdef np.ndarray[long, ndim=2] to_array(self, object attr_ids):
|
cpdef np.ndarray[long, ndim=2] to_array(self, object attr_ids):
|
||||||
|
"""Given a list of M attribute IDs, export the tokens to a numpy ndarray
|
||||||
|
of shape N*M, where N is the length of the sentence.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
attr_ids (list[int]): A list of attribute ID ints.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
feat_array (numpy.ndarray[long, ndim=2]): A feature matrix, with one
|
||||||
|
row per word, and one column per attribute indicated in the input
|
||||||
|
attr_ids.
|
||||||
|
"""
|
||||||
cdef int i, j
|
cdef int i, j
|
||||||
cdef attr_id_t feature
|
cdef attr_id_t feature
|
||||||
cdef np.ndarray[long, ndim=2] output
|
cdef np.ndarray[long, ndim=2] output
|
||||||
|
@ -125,6 +136,20 @@ cdef class Tokens:
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def count_by(self, attr_id_t attr_id):
|
def count_by(self, attr_id_t attr_id):
|
||||||
|
"""Produce a dict of {attribute (int): count (ints)} frequencies, keyed
|
||||||
|
by the values of the given attribute ID.
|
||||||
|
|
||||||
|
>>> from spacy.en import English, attrs
|
||||||
|
>>> nlp = English()
|
||||||
|
>>> tokens = nlp(u'apple apple orange banana')
|
||||||
|
>>> tokens.count_by(attrs.SIC)
|
||||||
|
{12800L: 1, 11880L: 2, 7561L: 1}
|
||||||
|
>>> tokens.to_array([attrs.SIC])
|
||||||
|
array([[11880],
|
||||||
|
[11880],
|
||||||
|
[ 7561],
|
||||||
|
[12800]])
|
||||||
|
"""
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef attr_t attr
|
cdef attr_t attr
|
||||||
cdef size_t count
|
cdef size_t count
|
||||||
|
|
Loading…
Reference in New Issue