From 62ceec4fc6e9e3f89fe208d66b38d397e67bbbc4 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Fri, 19 May 2017 18:47:46 +0200
Subject: [PATCH] Update docstrings and API docs for Span

---
 spacy/tokens/span.pyx      |  31 ++++++++--
 website/docs/api/span.jade | 114 ++++++++++++++++++++++---------------
 2 files changed, 93 insertions(+), 52 deletions(-)

diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index f7b10572e..4357df500 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -121,7 +121,7 @@ cdef class Span:
         return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
 
     def similarity(self, other):
-        """ Make a semantic similarity estimate. The default estimate is cosine
+        """Make a semantic similarity estimate. The default estimate is cosine
         similarity using an average of word vectors.
 
         other (object): The object to compare with. By default, accepts `Doc`,
@@ -168,14 +168,23 @@ cdef class Span:
             return self.doc[root.l_edge : root.r_edge + 1]
 
     property has_vector:
-        # TODO: docstring
+        """A boolean value indicating whether a word vector is associated with
+        the object.
+
+        RETURNS (bool): Whether a word vector is associated with the object.
+        """
         def __get__(self):
             if 'has_vector' in self.doc.user_span_hooks:
                 return self.doc.user_span_hooks['has_vector'](self)
             return any(token.has_vector for token in self)
 
     property vector:
-        # TODO: docstring
+        """A real-valued meaning representation. Defaults to an average of the
+        token vectors.
+
+        RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
+            representing the span's semantics.
+        """
         def __get__(self):
             if 'vector' in self.doc.user_span_hooks:
                 return self.doc.user_span_hooks['vector'](self)
@@ -184,7 +193,10 @@ cdef class Span:
             return self._vector
 
     property vector_norm:
-        # TODO: docstring
+        """The L2 norm of the document's vector representation.
+
+        RETURNS (float): The L2 norm of the vector representation.
+        """
         def __get__(self):
             if 'vector_norm' in self.doc.user_span_hooks:
                 return self.doc.user_span_hooks['vector'](self)
@@ -206,7 +218,10 @@ cdef class Span:
                 return sum([token.sentiment for token in self]) / len(self)
 
     property text:
-        # TODO: docstring
+        """A unicode representation of the span text.
+
+        RETURNS (unicode): The original verbatim text of the span.
+        """
         def __get__(self):
             text = self.text_with_ws
             if self[-1].whitespace_:
@@ -214,7 +229,11 @@ cdef class Span:
             return text
 
     property text_with_ws:
-        # TODO: docstring
+        """The text content of the span with a trailing whitespace character if
+        the last token has one.
+
+        RETURNS (unicode): The text content of the span (with trailing whitespace).
+        """
         def __get__(self):
             return u''.join([t.text_with_ws for t in self])
 
diff --git a/website/docs/api/span.jade b/website/docs/api/span.jade
index 9fa322f3e..3b6a4857b 100644
--- a/website/docs/api/span.jade
+++ b/website/docs/api/span.jade
@@ -127,9 +127,7 @@ p
     |  similarity using an average of word vectors.
 
 +aside-code("Example").
-    doc = nlp(u'apples and oranges')
-    apples = doc[0]
-    oranges = doc[1]
+    apples, and, oranges = nlp(u'apples and oranges')
     apples_oranges = apples.similarity(oranges)
     oranges_apples = oranges.similarity(apples)
     assert apples_oranges == oranges_apples
@@ -165,49 +163,6 @@ p Retokenize the document, such that the span is merged into a single token.
         +cell #[code Token]
         +cell The newly merged token.
 
-+h(2, "text") Span.text
-    +tag property
-
-+aside-code("Example").
-    doc = nlp('Give it back! He pleaded.')
-    assert doc[1:4].text == 'it back!'
-
-p A unicode representation of the span text.
-
-+table(["Name", "Type", "Description"])
-    +footrow
-        +cell returns
-        +cell unicode
-        +cell The original verbatim text of the span.
-
-+h(2, "text_with_ws") Span.text_with_ws
-    +tag property
-
-+aside-code("Example").
-    doc = nlp('Give it back! He pleaded.')
-    assert doc[1:4].text_with_ws == 'it back! '
-
-p
-    |  The text content of the span with a trailing whitespace character if the
-    |  last token has one.
-
-+table(["Name", "Type", "Description"])
-    +footrow
-        +cell returns
-        +cell unicode
-        +cell The text content of the span (with trailing whitespace).
-
-+h(2, "sent") Span.sent
-    +tag property
-
-p The sentence span that this span is a part of.
-
-+table(["Name", "Type", "Description"])
-    +footrow
-        +cell returns
-        +cell #[code Span]
-        +cell The sentence this is part of.
-
 +h(2, "root") Span.root
     +tag property
 
@@ -262,6 +217,56 @@ p Tokens that descend from tokens in the span, but fall outside it.
         +cell #[code Token]
         +cell A descendant of a token within the span.
 
++h(2, "has_vector") Span.has_vector
+    +tag property
+    +tag requires model
+
+p
+    |  A boolean value indicating whether a word vector is associated with the
+    |  object.
+
++aside-code("Example").
+    apple = nlp(u'apple')
+    assert apple.has_vector
+
++table(["Name", "Type", "Description"])
+    +footrow
+        +cell returns
+        +cell bool
+        +cell Whether the span has a vector data attached.
+
++h(2, "vector") Span.vector
+    +tag property
+    +tag requires model
+
+p
+    |  A real-valued meaning representation. Defaults to an average of the
+    |  token vectors.
+
++aside-code("Example").
+    apple = nlp(u'apple')
+    (apple.vector.dtype, apple.vector.shape)
+    # (dtype('float32'), (300,))
+
++table(["Name", "Type", "Description"])
+    +footrow
+        +cell returns
+        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+        +cell A 1D numpy array representing the span's semantics.
+
++h(2, "vector_norm") Span.vector_norm
+    +tag property
+    +tag requires model
+
+p
+    |  The L2 norm of the span's vector representation.
+
++table(["Name", "Type", "Description"])
+    +footrow
+        +cell returns
+        +cell float
+        +cell The L2 norm of the vector representation.
+
 +h(2, "attributes") Attributes
 
 +table(["Name", "Type", "Description"])
@@ -270,6 +275,11 @@ p Tokens that descend from tokens in the span, but fall outside it.
         +cell #[code Doc]
         +cell The parent document.
 
+    +row
+        +cell #[code sent]
+        +cell #[code Span]
+        +cell The sentence span that this span is a part of.
+
     +row
         +cell #[code start]
         +cell int
@@ -290,6 +300,18 @@ p Tokens that descend from tokens in the span, but fall outside it.
         +cell int
         +cell The character offset for the end of the span.
 
+    +row
+        +cell #[code text]
+        +cell unicode
+        +cell A unicode representation of the span text.
+
+    +row
+        +cell #[code text_with_ws]
+        +cell unicode
+        +cell
+            |  The text content of the span with a trailing whitespace character
+            |  if the last token has one.
+
     +row
         +cell #[code label]
         +cell int