Update docstrings for Span class

This commit is contained in:
ines 2017-05-18 22:17:24 +02:00
parent b87066ff10
commit 593361ee3c
1 changed files with 79 additions and 84 deletions

View File

@ -20,22 +20,17 @@ from .. import about
cdef class Span:
"""
A slice from a Doc object.
"""
"""A slice from a Doc object."""
def __cinit__(self, Doc doc, int start, int end, int label=0, vector=None,
vector_norm=None):
"""
Create a Span object from the slice doc[start : end]
"""Create a `Span` object from the slice `doc[start : end]`.
Arguments:
doc (Doc): The parent document.
start (int): The index of the first token of the span.
end (int): The index of the first token after the span.
label (int): A label to attach to the Span, e.g. for named entities.
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
Returns:
Span The newly constructed object.
doc (Doc): The parent document.
start (int): The index of the first token of the span.
end (int): The index of the first token after the span.
label (int): A label to attach to the Span, e.g. for named entities.
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
RETURNS (Span): The newly constructed object.
"""
if not (0 <= start <= end <= len(doc)):
raise IndexError
@ -70,7 +65,6 @@ cdef class Span:
def __hash__(self):
return hash((self.doc, self.label, self.start_char, self.end_char))
def __len__(self):
self._recalculate_indices()
if self.end < self.start:
@ -99,30 +93,21 @@ cdef class Span:
yield self.doc[i]
def merge(self, *args, **attributes):
"""
Retokenize the document, such that the span is merged into a single token.
"""Retokenize the document, such that the span is merged into a single token.
Arguments:
**attributes:
Attributes to assign to the merged token. By default, attributes
are inherited from the syntactic root token of the span.
Returns:
token (Token):
The newly merged token.
**attributes: Attributes to assign to the merged token. By default,
attributes are inherited from the syntactic root token of the span.
RETURNS (Token): The newly merged token.
"""
return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
def similarity(self, other):
"""
Make a semantic similarity estimate. The default estimate is cosine
""" Make a semantic similarity estimate. The default estimate is cosine
similarity using an average of word vectors.
Arguments:
other (object): The object to compare with. By default, accepts Doc,
Span, Token and Lexeme objects.
Return:
score (float): A scalar similarity score. Higher is more similar.
other (object): The object to compare with. By default, accepts `Doc`,
`Span`, `Token` and `Lexeme` objects.
RETURNS (float): A scalar similarity score. Higher is more similar.
"""
if 'similarity' in self.doc.user_span_hooks:
self.doc.user_span_hooks['similarity'](self, other)
@ -145,11 +130,9 @@ cdef class Span:
self.end = end + 1
property sent:
"""
The sentence span that this span is a part of.
"""The sentence span that this span is a part of.
Returns:
Span The sentence this is part of.
RETURNS (Span): The sentence span that the span is a part of.
"""
def __get__(self):
if 'sent' in self.doc.user_span_hooks:
@ -166,12 +149,14 @@ cdef class Span:
return self.doc[root.l_edge : root.r_edge + 1]
property has_vector:
# TODO: docstring
def __get__(self):
if 'has_vector' in self.doc.user_span_hooks:
return self.doc.user_span_hooks['has_vector'](self)
return any(token.has_vector for token in self)
property vector:
# TODO: docstring
def __get__(self):
if 'vector' in self.doc.user_span_hooks:
return self.doc.user_span_hooks['vector'](self)
@ -180,6 +165,7 @@ cdef class Span:
return self._vector
property vector_norm:
# TODO: docstring
def __get__(self):
if 'vector_norm' in self.doc.user_span_hooks:
return self.doc.user_span_hooks['vector'](self)
@ -193,6 +179,7 @@ cdef class Span:
return self._vector_norm
property sentiment:
# TODO: docstring
def __get__(self):
if 'sentiment' in self.doc.user_span_hooks:
return self.doc.user_span_hooks['sentiment'](self)
@ -200,6 +187,7 @@ cdef class Span:
return sum([token.sentiment for token in self]) / len(self)
property text:
# TODO: docstring
def __get__(self):
text = self.text_with_ws
if self[-1].whitespace_:
@ -207,16 +195,17 @@ cdef class Span:
return text
property text_with_ws:
# TODO: docstring
def __get__(self):
return u''.join([t.text_with_ws for t in self])
property noun_chunks:
"""
Yields base noun-phrase #[code Span] objects, if the document
has been syntactically parsed. A base noun phrase, or
'NP chunk', is a noun phrase that does not permit other NPs to
be nested within it so no NP-level coordination, no prepositional
phrases, and no relative clauses. For example:
"""Yields base noun-phrase `Span` objects, if the document has been
syntactically parsed. A base noun phrase, or "NP chunk", is a noun
phrase that does not permit other NPs to be nested within it so no
NP-level coordination, no prepositional phrases, and no relative clauses.
YIELDS (Span): Base noun-phrase `Span` objects
"""
def __get__(self):
if not self.doc.is_parsed:
@ -235,49 +224,47 @@ cdef class Span:
yield span
property root:
"""
The token within the span that's highest in the parse tree. If there's a
tie, the earlist is prefered.
"""The token within the span that's highest in the parse tree.
If there's a tie, the earliest is prefered.
Returns:
Token: The root token.
RETURNS (Token): The root token.
i.e. has the shortest path to the root of the sentence (or is the root
itself). If multiple words are equally high in the tree, the first word
is taken. For example:
EXAMPLE: The root token has the shortest path to the root of the sentence
(or is the root itself). If multiple words are equally high in the
tree, the first word is taken. For example:
>>> toks = nlp(u'I like New York in Autumn.')
>>> toks = nlp(u'I like New York in Autumn.')
Let's name the indices --- easier than writing "toks[4]" etc.
Let's name the indices easier than writing `toks[4]` etc.
>>> i, like, new, york, in_, autumn, dot = range(len(toks))
>>> i, like, new, york, in_, autumn, dot = range(len(toks))
The head of 'new' is 'York', and the head of 'York' is 'like'
The head of 'new' is 'York', and the head of "York" is "like"
>>> toks[new].head.orth_
'York'
>>> toks[york].head.orth_
'like'
>>> toks[new].head.orth_
'York'
>>> toks[york].head.orth_
'like'
Create a span for "New York". Its root is "York".
Create a span for "New York". Its root is "York".
>>> new_york = toks[new:york+1]
>>> new_york.root.orth_
'York'
>>> new_york = toks[new:york+1]
>>> new_york.root.orth_
'York'
Here's a more complicated case, raise by Issue #214
Here's a more complicated case, raised by issue #214:
>>> toks = nlp(u'to, north and south carolina')
>>> to, north, and_, south, carolina = toks
>>> south.head.text, carolina.head.text
('north', 'to')
>>> toks = nlp(u'to, north and south carolina')
>>> to, north, and_, south, carolina = toks
>>> south.head.text, carolina.head.text
('north', 'to')
Here 'south' is a child of 'north', which is a child of 'carolina'.
Carolina is the root of the span:
Here "south" is a child of "north", which is a child of "carolina".
Carolina is the root of the span:
>>> south_carolina = toks[-2:]
>>> south_carolina.root.text
'carolina'
>>> south_carolina = toks[-2:]
>>> south_carolina.root.text
'carolina'
"""
def __get__(self):
self._recalculate_indices()
@ -314,10 +301,10 @@ cdef class Span:
return self.doc[root]
property lefts:
"""
Tokens that are to the left of the span, whose head is within the Span.
""" Tokens that are to the left of the span, whose head is within the
`Span`.
Yields: Token A left-child of a token of the span.
YIELDS (Token):A left-child of a token of the span.
"""
def __get__(self):
for token in reversed(self): # Reverse, so we get the tokens in order
@ -326,10 +313,10 @@ cdef class Span:
yield left
property rights:
"""
Tokens that are to the right of the Span, whose head is within the Span.
"""Tokens that are to the right of the Span, whose head is within the
`Span`.
Yields: Token A right-child of a token of the span.
YIELDS (Token): A right-child of a token of the span.
"""
def __get__(self):
for token in self:
@ -338,10 +325,9 @@ cdef class Span:
yield right
property subtree:
"""
Tokens that descend from tokens in the span, but fall outside it.
"""Tokens that descend from tokens in the span, but fall outside it.
Yields: Token A descendant of a token within the span.
YIELDS (Token): A descendant of a token within the span.
"""
def __get__(self):
for word in self.lefts:
@ -351,8 +337,9 @@ cdef class Span:
yield from word.subtree
property ent_id:
"""
An (integer) entity ID. Usually assigned by patterns in the Matcher.
"""An (integer) entity ID. Usually assigned by patterns in the `Matcher`.
RETURNS (int): The entity ID.
"""
def __get__(self):
return self.root.ent_id
@ -362,9 +349,11 @@ cdef class Span:
raise NotImplementedError(
"Can't yet set ent_id from Span. Vote for this feature on the issue "
"tracker: http://github.com/explosion/spaCy/issues")
property ent_id_:
"""
A (string) entity ID. Usually assigned by patterns in the Matcher.
"""A (string) entity ID. Usually assigned by patterns in the `Matcher`.
RETURNS (unicode): The entity ID.
"""
def __get__(self):
return self.root.ent_id_
@ -376,26 +365,32 @@ cdef class Span:
"tracker: http://github.com/explosion/spaCy/issues")
property orth_:
# TODO: docstring
def __get__(self):
return ''.join([t.string for t in self]).strip()
property lemma_:
# TODO: docstring
def __get__(self):
return ' '.join([t.lemma_ for t in self]).strip()
property upper_:
# TODO: docstring
def __get__(self):
return ''.join([t.string.upper() for t in self]).strip()
property lower_:
# TODO: docstring
def __get__(self):
return ''.join([t.string.lower() for t in self]).strip()
property string:
# TODO: docstring
def __get__(self):
return ''.join([t.string for t in self])
property label_:
# TODO: docstring
def __get__(self):
return self.doc.vocab.strings[self.label]