From 62ceec4fc6e9e3f89fe208d66b38d397e67bbbc4 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 19 May 2017 18:47:46 +0200 Subject: [PATCH] Update docstrings and API docs for Span --- spacy/tokens/span.pyx | 31 ++++++++-- website/docs/api/span.jade | 114 ++++++++++++++++++++++--------------- 2 files changed, 93 insertions(+), 52 deletions(-) diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index f7b10572e..4357df500 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -121,7 +121,7 @@ cdef class Span: return self.doc.merge(self.start_char, self.end_char, *args, **attributes) def similarity(self, other): - """ Make a semantic similarity estimate. The default estimate is cosine + """Make a semantic similarity estimate. The default estimate is cosine similarity using an average of word vectors. other (object): The object to compare with. By default, accepts `Doc`, @@ -168,14 +168,23 @@ cdef class Span: return self.doc[root.l_edge : root.r_edge + 1] property has_vector: - # TODO: docstring + """A boolean value indicating whether a word vector is associated with + the object. + + RETURNS (bool): Whether a word vector is associated with the object. + """ def __get__(self): if 'has_vector' in self.doc.user_span_hooks: return self.doc.user_span_hooks['has_vector'](self) return any(token.has_vector for token in self) property vector: - # TODO: docstring + """A real-valued meaning representation. Defaults to an average of the + token vectors. + + RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array + representing the span's semantics. + """ def __get__(self): if 'vector' in self.doc.user_span_hooks: return self.doc.user_span_hooks['vector'](self) @@ -184,7 +193,10 @@ cdef class Span: return self._vector property vector_norm: - # TODO: docstring + """The L2 norm of the document's vector representation. + + RETURNS (float): The L2 norm of the vector representation. + """ def __get__(self): if 'vector_norm' in self.doc.user_span_hooks: return self.doc.user_span_hooks['vector'](self) @@ -206,7 +218,10 @@ cdef class Span: return sum([token.sentiment for token in self]) / len(self) property text: - # TODO: docstring + """A unicode representation of the span text. + + RETURNS (unicode): The original verbatim text of the span. + """ def __get__(self): text = self.text_with_ws if self[-1].whitespace_: @@ -214,7 +229,11 @@ cdef class Span: return text property text_with_ws: - # TODO: docstring + """The text content of the span with a trailing whitespace character if + the last token has one. + + RETURNS (unicode): The text content of the span (with trailing whitespace). + """ def __get__(self): return u''.join([t.text_with_ws for t in self]) diff --git a/website/docs/api/span.jade b/website/docs/api/span.jade index 9fa322f3e..3b6a4857b 100644 --- a/website/docs/api/span.jade +++ b/website/docs/api/span.jade @@ -127,9 +127,7 @@ p | similarity using an average of word vectors. +aside-code("Example"). - doc = nlp(u'apples and oranges') - apples = doc[0] - oranges = doc[1] + apples, and, oranges = nlp(u'apples and oranges') apples_oranges = apples.similarity(oranges) oranges_apples = oranges.similarity(apples) assert apples_oranges == oranges_apples @@ -165,49 +163,6 @@ p Retokenize the document, such that the span is merged into a single token. +cell #[code Token] +cell The newly merged token. -+h(2, "text") Span.text - +tag property - -+aside-code("Example"). - doc = nlp('Give it back! He pleaded.') - assert doc[1:4].text == 'it back!' - -p A unicode representation of the span text. - -+table(["Name", "Type", "Description"]) - +footrow - +cell returns - +cell unicode - +cell The original verbatim text of the span. - -+h(2, "text_with_ws") Span.text_with_ws - +tag property - -+aside-code("Example"). - doc = nlp('Give it back! He pleaded.') - assert doc[1:4].text_with_ws == 'it back! ' - -p - | The text content of the span with a trailing whitespace character if the - | last token has one. - -+table(["Name", "Type", "Description"]) - +footrow - +cell returns - +cell unicode - +cell The text content of the span (with trailing whitespace). - -+h(2, "sent") Span.sent - +tag property - -p The sentence span that this span is a part of. - -+table(["Name", "Type", "Description"]) - +footrow - +cell returns - +cell #[code Span] - +cell The sentence this is part of. - +h(2, "root") Span.root +tag property @@ -262,6 +217,56 @@ p Tokens that descend from tokens in the span, but fall outside it. +cell #[code Token] +cell A descendant of a token within the span. ++h(2, "has_vector") Span.has_vector + +tag property + +tag requires model + +p + | A boolean value indicating whether a word vector is associated with the + | object. + ++aside-code("Example"). + apple = nlp(u'apple') + assert apple.has_vector + ++table(["Name", "Type", "Description"]) + +footrow + +cell returns + +cell bool + +cell Whether the span has a vector data attached. + ++h(2, "vector") Span.vector + +tag property + +tag requires model + +p + | A real-valued meaning representation. Defaults to an average of the + | token vectors. + ++aside-code("Example"). + apple = nlp(u'apple') + (apple.vector.dtype, apple.vector.shape) + # (dtype('float32'), (300,)) + ++table(["Name", "Type", "Description"]) + +footrow + +cell returns + +cell #[code numpy.ndarray[ndim=1, dtype='float32']] + +cell A 1D numpy array representing the span's semantics. + ++h(2, "vector_norm") Span.vector_norm + +tag property + +tag requires model + +p + | The L2 norm of the span's vector representation. + ++table(["Name", "Type", "Description"]) + +footrow + +cell returns + +cell float + +cell The L2 norm of the vector representation. + +h(2, "attributes") Attributes +table(["Name", "Type", "Description"]) @@ -270,6 +275,11 @@ p Tokens that descend from tokens in the span, but fall outside it. +cell #[code Doc] +cell The parent document. + +row + +cell #[code sent] + +cell #[code Span] + +cell The sentence span that this span is a part of. + +row +cell #[code start] +cell int @@ -290,6 +300,18 @@ p Tokens that descend from tokens in the span, but fall outside it. +cell int +cell The character offset for the end of the span. + +row + +cell #[code text] + +cell unicode + +cell A unicode representation of the span text. + + +row + +cell #[code text_with_ws] + +cell unicode + +cell + | The text content of the span with a trailing whitespace character + | if the last token has one. + +row +cell #[code label] +cell int