* Work on docstrings

2014-12-27 21:46:04 +11:00 · 2014-12-27 21:46:04 +11:00 · fe2a5e0370
parent 6352e3e2a2
commit fe2a5e0370
1 changed files with 25 additions and 0 deletions
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@ -115,6 +115,17 @@ cdef class Tokens:
    @cython.boundscheck(False)
    cpdef np.ndarray[long, ndim=2] to_array(self, object attr_ids):
        """Given a list of M attribute IDs, export the tokens to a numpy ndarray
        of shape N*M, where N is the length of the sentence.
        Arguments:
            attr_ids (list[int]): A list of attribute ID ints.
        Returns:
            feat_array (numpy.ndarray[long, ndim=2]): A feature matrix, with one
                row per word, and one column per attribute indicated in the input
                attr_ids.
        """
        cdef int i, j
        cdef attr_id_t feature
        cdef np.ndarray[long, ndim=2] output
@ -125,6 +136,20 @@ cdef class Tokens:
        return output
    def count_by(self, attr_id_t attr_id):
        """Produce a dict of {attribute (int): count (ints)} frequencies, keyed
        by the values of the given attribute ID.
          >>> from spacy.en import English, attrs
          >>> nlp = English()
          >>> tokens = nlp(u'apple apple orange banana')
          >>> tokens.count_by(attrs.SIC)
          {12800L: 1, 11880L: 2, 7561L: 1}
          >>> tokens.to_array([attrs.SIC])
          array([[11880],
                 [11880],
                 [ 7561],
                 [12800]])
        """
        cdef int i
        cdef attr_t attr
        cdef size_t count