mirror of https://github.com/explosion/spaCy.git
Update docstrings and API docs for Token
This commit is contained in:
parent
62ceec4fc6
commit
e9e62b01b0
|
@ -23,10 +23,14 @@ from .. import about
|
|||
|
||||
|
||||
cdef class Token:
|
||||
"""
|
||||
An individual token --- i.e. a word, punctuation symbol, whitespace, etc.
|
||||
"""
|
||||
"""An individual token – i.e. a word, punctuation symbol, whitespace, etc."""
|
||||
def __cinit__(self, Vocab vocab, Doc doc, int offset):
|
||||
"""Construct a `Token` object.
|
||||
|
||||
vocab (Vocab): A storage container for lexical types.
|
||||
doc (Doc): The parent document.
|
||||
offset (int): The index of the token within the document.
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.doc = doc
|
||||
self.c = &self.doc.c[offset]
|
||||
|
@ -36,8 +40,9 @@ cdef class Token:
|
|||
return hash((self.doc, self.i))
|
||||
|
||||
def __len__(self):
|
||||
"""
|
||||
Number of unicode characters in token.text.
|
||||
"""The number of unicode characters in the token, i.e. `token.text`.
|
||||
|
||||
RETURNS (int): The number of unicode characters in the token.
|
||||
"""
|
||||
return self.c.lex.length
|
||||
|
||||
|
@ -75,37 +80,35 @@ cdef class Token:
|
|||
raise ValueError(op)
|
||||
|
||||
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
||||
"""
|
||||
Check the value of a boolean flag.
|
||||
"""Check the value of a boolean flag.
|
||||
|
||||
Arguments:
|
||||
flag_id (int): The ID of the flag attribute.
|
||||
Returns:
|
||||
is_set (bool): Whether the flag is set.
|
||||
flag_id (int): The ID of the flag attribute.
|
||||
RETURNS (bool): Whether the flag is set.
|
||||
|
||||
EXAMPLE:
|
||||
>>> from spacy.attrs import IS_TITLE
|
||||
>>> doc = nlp(u'Give it back! He pleaded.')
|
||||
>>> token = doc[0]
|
||||
>>> token.check_flag(IS_TITLE)
|
||||
True
|
||||
"""
|
||||
return Lexeme.c_check_flag(self.c.lex, flag_id)
|
||||
|
||||
def nbor(self, int i=1):
|
||||
"""
|
||||
Get a neighboring token.
|
||||
"""Get a neighboring token.
|
||||
|
||||
Arguments:
|
||||
i (int): The relative position of the token to get. Defaults to 1.
|
||||
Returns:
|
||||
neighbor (Token): The token at position self.doc[self.i+i]
|
||||
i (int): The relative position of the token to get. Defaults to 1.
|
||||
RETURNS (Token): The token at position `self.doc[self.i+i]`.
|
||||
"""
|
||||
return self.doc[self.i+i]
|
||||
|
||||
def similarity(self, other):
|
||||
"""
|
||||
Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||
"""Make a semantic similarity estimate. The default estimate is cosine
|
||||
similarity using an average of word vectors.
|
||||
|
||||
Arguments:
|
||||
other:
|
||||
The object to compare with. By default, accepts Doc, Span,
|
||||
Token and Lexeme objects.
|
||||
Returns:
|
||||
score (float): A scalar similarity score. Higher is more similar.
|
||||
other (object): The object to compare with. By default, accepts `Doc`,
|
||||
`Span`, `Token` and `Lexeme` objects.
|
||||
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||
"""
|
||||
if 'similarity' in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks['similarity'](self)
|
||||
|
@ -114,10 +117,14 @@ cdef class Token:
|
|||
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||
|
||||
property lex_id:
|
||||
"""ID of the token's lexical type.
|
||||
|
||||
RETURNS (int): ID of the token's lexical type."""
|
||||
def __get__(self):
|
||||
return self.c.lex.id
|
||||
|
||||
property rank:
|
||||
# TODO: add docstring
|
||||
def __get__(self):
|
||||
return self.c.lex.id
|
||||
|
||||
|
@ -126,10 +133,19 @@ cdef class Token:
|
|||
return self.text_with_ws
|
||||
|
||||
property text:
|
||||
"""A unicode representation of the token text.
|
||||
|
||||
RETURNS (unicode): The original verbatim text of the token.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.orth_
|
||||
|
||||
property text_with_ws:
|
||||
"""The text content of the token with a trailing whitespace character if
|
||||
it has one.
|
||||
|
||||
RETURNS (unicode): The text content of the span (with trailing whitespace).
|
||||
"""
|
||||
def __get__(self):
|
||||
cdef unicode orth = self.vocab.strings[self.c.lex.orth]
|
||||
if self.c.spacy:
|
||||
|
@ -184,6 +200,10 @@ cdef class Token:
|
|||
return self.c.lex.suffix
|
||||
|
||||
property lemma:
|
||||
"""Base form of the word, with no inflectional suffixes.
|
||||
|
||||
RETURNS (int): Token lemma.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.lemma
|
||||
def __set__(self, int lemma):
|
||||
|
@ -206,8 +226,10 @@ cdef class Token:
|
|||
self.c.dep = label
|
||||
|
||||
property has_vector:
|
||||
"""
|
||||
A boolean value indicating whether a word vector is associated with the object.
|
||||
"""A boolean value indicating whether a word vector is associated with
|
||||
the object.
|
||||
|
||||
RETURNS (bool): Whether a word vector is associated with the object.
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'has_vector' in self.doc.user_token_hooks:
|
||||
|
@ -220,10 +242,10 @@ cdef class Token:
|
|||
return False
|
||||
|
||||
property vector:
|
||||
"""
|
||||
A real-valued meaning representation.
|
||||
"""A real-valued meaning representation.
|
||||
|
||||
Type: numpy.ndarray[ndim=1, dtype='float32']
|
||||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||
representing the token's semantics.
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'vector' in self.doc.user_token_hooks:
|
||||
|
@ -239,15 +261,11 @@ cdef class Token:
|
|||
vector_view = <float[:length,]>self.c.lex.vector
|
||||
return numpy.asarray(vector_view)
|
||||
|
||||
property repvec:
|
||||
def __get__(self):
|
||||
raise AttributeError("repvec was renamed to vector in v0.100")
|
||||
|
||||
property has_repvec:
|
||||
def __get__(self):
|
||||
raise AttributeError("has_repvec was renamed to has_vector in v0.100")
|
||||
|
||||
property vector_norm:
|
||||
"""The L2 norm of the document's vector representation.
|
||||
|
||||
RETURNS (float): The L2 norm of the vector representation.
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'vector_norm' in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks['vector_norm'](self)
|
||||
|
@ -324,28 +342,26 @@ cdef class Token:
|
|||
yield from word.subtree
|
||||
|
||||
property left_edge:
|
||||
"""
|
||||
The leftmost token of this token's syntactic descendents.
|
||||
"""The leftmost token of this token's syntactic descendents.
|
||||
|
||||
Returns: Token The first token such that self.is_ancestor(token)
|
||||
RETURNS (Token): The first token such that `self.is_ancestor(token)`.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.doc[self.c.l_edge]
|
||||
|
||||
property right_edge:
|
||||
"""
|
||||
The rightmost token of this token's syntactic descendents.
|
||||
"""The rightmost token of this token's syntactic descendents.
|
||||
|
||||
Returns: Token The last token such that self.is_ancestor(token)
|
||||
RETURNS (Token): The last token such that `self.is_ancestor(token)`.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.doc[self.c.r_edge]
|
||||
|
||||
property ancestors:
|
||||
"""
|
||||
A sequence of this token's syntactic ancestors.
|
||||
"""A sequence of this token's syntactic ancestors.
|
||||
|
||||
Yields: Token A sequence of ancestor tokens such that ancestor.is_ancestor(self)
|
||||
YIELDS (Token): A sequence of ancestor tokens such that
|
||||
`ancestor.is_ancestor(self)`.
|
||||
"""
|
||||
def __get__(self):
|
||||
cdef const TokenC* head_ptr = self.c
|
||||
|
@ -357,33 +373,25 @@ cdef class Token:
|
|||
yield self.doc[head_ptr - (self.c - self.i)]
|
||||
i += 1
|
||||
|
||||
def is_ancestor_of(self, descendant):
|
||||
# TODO: Remove after backward compatibility check.
|
||||
return self.is_ancestor(descendant)
|
||||
|
||||
def is_ancestor(self, descendant):
|
||||
"""
|
||||
Check whether this token is a parent, grandparent, etc. of another
|
||||
"""Check whether this token is a parent, grandparent, etc. of another
|
||||
in the dependency tree.
|
||||
|
||||
Arguments:
|
||||
descendant (Token): Another token.
|
||||
Returns:
|
||||
is_ancestor (bool): Whether this token is the ancestor of the descendant.
|
||||
descendant (Token): Another token.
|
||||
RETURNS (bool): Whether this token is the ancestor of the descendant.
|
||||
"""
|
||||
if self.doc is not descendant.doc:
|
||||
return False
|
||||
return any( ancestor.i == self.i for ancestor in descendant.ancestors )
|
||||
|
||||
property head:
|
||||
"""
|
||||
The syntactic parent, or "governor", of this token.
|
||||
"""The syntactic parent, or "governor", of this token.
|
||||
|
||||
Returns: Token
|
||||
RETURNS (Token): The token head.
|
||||
"""
|
||||
def __get__(self):
|
||||
"""
|
||||
The token predicted by the parser to be the head of the current token.
|
||||
"""The token predicted by the parser to be the head of the current
|
||||
token.
|
||||
"""
|
||||
return self.doc[self.i + self.c.head]
|
||||
def __set__(self, Token new_head):
|
||||
|
@ -477,10 +485,9 @@ cdef class Token:
|
|||
self.c.head = rel_newhead_i
|
||||
|
||||
property conjuncts:
|
||||
"""
|
||||
A sequence of coordinated tokens, including the token itself.
|
||||
"""A sequence of coordinated tokens, including the token itself.
|
||||
|
||||
Yields: Token A coordinated token
|
||||
YIELDS (Token): A coordinated token.
|
||||
"""
|
||||
def __get__(self):
|
||||
"""Get a list of conjoined words."""
|
||||
|
@ -495,25 +502,46 @@ cdef class Token:
|
|||
yield from word.conjuncts
|
||||
|
||||
property ent_type:
|
||||
"""Named entity type.
|
||||
|
||||
RETURNS (int): Named entity type.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.ent_type
|
||||
|
||||
property ent_iob:
|
||||
"""IOB code of named entity tag. `1="I", 2="O", 3="B"`. 0 means no tag
|
||||
is assigned.
|
||||
|
||||
RETURNS (int): IOB code of named entity tag.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.ent_iob
|
||||
|
||||
property ent_type_:
|
||||
"""Named entity type.
|
||||
|
||||
RETURNS (unicode): Named entity type.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.ent_type]
|
||||
|
||||
property ent_iob_:
|
||||
"""IOB code of named entity tag. "B" means the token begins an entity,
|
||||
"I" means it is inside an entity, "O" means it is outside an entity, and
|
||||
"" means no entity tag is set.
|
||||
|
||||
RETURNS (unicode): IOB code of named entity tag.
|
||||
"""
|
||||
def __get__(self):
|
||||
iob_strings = ('', 'I', 'O', 'B')
|
||||
return iob_strings[self.c.ent_iob]
|
||||
|
||||
property ent_id:
|
||||
"""
|
||||
An (integer) entity ID. Usually assigned by patterns in the Matcher.
|
||||
"""ID of the entity the token is an instance of, if any. Usually
|
||||
assigned by patterns in the Matcher.
|
||||
|
||||
RETURNS (int): ID of the entity.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.ent_id
|
||||
|
@ -522,8 +550,10 @@ cdef class Token:
|
|||
self.c.ent_id = key
|
||||
|
||||
property ent_id_:
|
||||
"""
|
||||
A (string) entity ID. Usually assigned by patterns in the Matcher.
|
||||
"""ID of the entity the token is an instance of, if any. Usually
|
||||
assigned by patterns in the Matcher.
|
||||
|
||||
RETURNS (unicode): ID of the entity.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.ent_id]
|
||||
|
@ -564,6 +594,10 @@ cdef class Token:
|
|||
return self.vocab.strings[self.c.lex.lang]
|
||||
|
||||
property lemma_:
|
||||
"""Base form of the word, with no inflectional suffixes.
|
||||
|
||||
RETURNS (unicode): Token lemma.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lemma]
|
||||
def __set__(self, unicode lemma_):
|
||||
|
|
|
@ -4,9 +4,255 @@ include ../../_includes/_mixins
|
|||
|
||||
p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
||||
|
||||
+h(2, "init") Token.__init__
|
||||
+tag method
|
||||
|
||||
p Construct a #[code Token] object.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'Give it back! He pleaded.')
|
||||
token = doc[0]
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code vocab]
|
||||
+cell #[code Vocab]
|
||||
+cell A storage container for lexical types.
|
||||
|
||||
+row
|
||||
+cell #[code doc]
|
||||
+cell #[code Doc]
|
||||
+cell The parent document.
|
||||
|
||||
+row
|
||||
+cell #[code offset]
|
||||
+cell int
|
||||
+cell The index of the token within the document.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Token]
|
||||
+cell The newly constructed object.
|
||||
|
||||
+h(2, "len") Token.__len__
|
||||
+tag method
|
||||
|
||||
p The number of unicode characters in the token, i.e. #[code token.text].
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'Give it back! He pleaded.')
|
||||
token = doc[0]
|
||||
assert len(token) == 4
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell int
|
||||
+cell The number of unicode characters in the token.
|
||||
|
||||
+h(2, "check_flag") Token.check_flag
|
||||
+tag method
|
||||
|
||||
p Check the value of a boolean flag.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.attrs import IS_TITLE
|
||||
doc = nlp(u'Give it back! He pleaded.')
|
||||
token = doc[0]
|
||||
token.check_flag(IS_TITLE)
|
||||
# True
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code flag_id]
|
||||
+cell int
|
||||
+cell The attribute ID of the flag to check.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell Whether the flag is set.
|
||||
|
||||
+h(2, "nbor") Token.nbor
|
||||
+tag method
|
||||
|
||||
p Get a neighboring token.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'Give it back! He pleaded.')
|
||||
token = doc[0]
|
||||
token.nbor()
|
||||
# it
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code i]
|
||||
+cell int
|
||||
+cell The relative position of the token to get. Defaults to #[code 1].
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Token]
|
||||
+cell The token at position #[code self.doc[self.i+i]].
|
||||
|
||||
+h(2, "similarity") Token.similarity
|
||||
+tag method
|
||||
|
||||
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||
|
||||
+aside-code("Example").
|
||||
apples, and, oranges = nlp(u'apples and oranges')
|
||||
apples_oranges = apples.similarity(oranges)
|
||||
oranges_apples = oranges.similarity(apples)
|
||||
assert apples_oranges == oranges_apples
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell other
|
||||
+cell -
|
||||
+cell
|
||||
| The object to compare with. By default, accepts #[code Doc],
|
||||
| #[code Span], #[code Token] and #[code Lexeme] objects.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell float
|
||||
+cell A scalar similarity score. Higher is more similar.
|
||||
|
||||
+h(2, "is_ancestor") Token.is_ancestor
|
||||
+tag method
|
||||
|
||||
p
|
||||
| Check whether this token is a parent, grandparent, etc. of another
|
||||
| in the dependency tree.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell descendant
|
||||
+cell #[code Token]
|
||||
+cell Another token.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell Whether this token is the ancestor of the descendant.
|
||||
|
||||
+h(2, "has_vector") Token.has_vector
|
||||
+tag property
|
||||
+tag requires model
|
||||
|
||||
p
|
||||
| A boolean value indicating whether a word vector is associated with the
|
||||
| token.
|
||||
|
||||
+aside-code("Example").
|
||||
apple = nlp(u'apple')
|
||||
assert apple.has_vector
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell Whether the token has a vector data attached.
|
||||
|
||||
+h(2, "vector") Token.vector
|
||||
+tag property
|
||||
+tag requires model
|
||||
|
||||
p
|
||||
| A real-valued meaning representation.
|
||||
|
||||
+aside-code("Example").
|
||||
apple = nlp(u'apple')
|
||||
(apple.vector.dtype, apple.vector.shape)
|
||||
# (dtype('float32'), (300,))
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A 1D numpy array representing the token's semantics.
|
||||
|
||||
+h(2, "vector_norm") Span.vector_norm
|
||||
+tag property
|
||||
+tag requires model
|
||||
|
||||
p
|
||||
| The L2 norm of the token's vector representation.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell float
|
||||
+cell The L2 norm of the vector representation.
|
||||
|
||||
+h(2, "conjuncts") Token.conjuncts
|
||||
+tag property
|
||||
|
||||
p A sequence of coordinated tokens, including the token itself.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell A coordinated token.
|
||||
|
||||
+h(2, "children") Token.children
|
||||
+tag property
|
||||
|
||||
p A sequence of the token's immediate syntactic children.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell A child token such that #[code child.head==self].
|
||||
|
||||
+h(2, "subtree") Token.subtree
|
||||
+tag property
|
||||
|
||||
p A sequence of all the token's syntactic descendents.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell A descendant token such that #[code self.is_ancestor(descendant)].
|
||||
|
||||
+h(2, "ancestors") Token.ancestors
|
||||
+tag property
|
||||
|
||||
p The rightmost token of this token's syntactic descendants.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell
|
||||
| A sequence of ancestor tokens such that
|
||||
| #[code ancestor.is_ancestor(self)].
|
||||
|
||||
+h(2, "attributes") Attributes
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code text]
|
||||
+cell unicode
|
||||
+cell Verbatim text content.
|
||||
+row
|
||||
+cell #[code text_with_ws]
|
||||
+cell unicode
|
||||
+cell Text content, with trailing space character if present.
|
||||
|
||||
+row
|
||||
+cell #[code whitespace]
|
||||
+cell int
|
||||
+cell Trailing space character if present.
|
||||
+row
|
||||
+cell #[code whitespace_]
|
||||
+cell unicode
|
||||
+cell Trailing space character if present.
|
||||
|
||||
+row
|
||||
+cell #[code vocab]
|
||||
+cell #[code Vocab]
|
||||
|
@ -17,14 +263,31 @@ p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
|||
+cell #[code Doc]
|
||||
+cell The parent document.
|
||||
|
||||
+row
|
||||
+cell #[code head]
|
||||
+cell #[code Token]
|
||||
+cell The syntactic parent, or "governor", of this token.
|
||||
|
||||
+row
|
||||
+cell #[code left_edge]
|
||||
+cell #[code Token]
|
||||
+cell The leftmost token of this token's syntactic descendants.
|
||||
|
||||
+row
|
||||
+cell #[code right_edge]
|
||||
+cell #[code Token]
|
||||
+cell The rightmost token of this token's syntactic descendents.
|
||||
|
||||
+row
|
||||
+cell #[code i]
|
||||
+cell int
|
||||
+cell The index of the token within the parent document.
|
||||
|
||||
+row
|
||||
+cell #[code ent_type]
|
||||
+cell int
|
||||
+cell Named entity type.
|
||||
|
||||
+row
|
||||
+cell #[code ent_type_]
|
||||
+cell unicode
|
||||
|
@ -42,19 +305,23 @@ p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
|||
+cell unicode
|
||||
+cell
|
||||
| IOB code of named entity tag. #[code "B"]
|
||||
| means the token begins an entity, #[code "I"] means it inside an
|
||||
| entity, #[code "O"] means it is outside an entity, and
|
||||
| means the token begins an entity, #[code "I"] means it is inside
|
||||
| an entity, #[code "O"] means it is outside an entity, and
|
||||
| #[code ""] means no entity tag is set.
|
||||
|
||||
+row
|
||||
+cell #[code ent_id]
|
||||
+cell int
|
||||
+cell ID of the entity the token is an instance of, if any.
|
||||
+cell
|
||||
| ID of the entity the token is an instance of, if any. Usually
|
||||
| assigned by patterns in the Matcher.
|
||||
|
||||
+row
|
||||
+cell #[code ent_id_]
|
||||
+cell unicode
|
||||
+cell ID of the entity the token is an instance of, if any.
|
||||
+cell
|
||||
| ID of the entity the token is an instance of, if any. Usually
|
||||
| assigned by patterns in the Matcher.
|
||||
|
||||
+row
|
||||
+cell #[code lemma]
|
||||
|
@ -229,232 +496,3 @@ p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
|||
+cell #[code lex_id]
|
||||
+cell int
|
||||
+cell ID of the token's lexical type.
|
||||
|
||||
+row
|
||||
+cell #[code text]
|
||||
+cell unicode
|
||||
+cell Verbatim text content.
|
||||
+row
|
||||
+cell #[code text_with_ws]
|
||||
+cell unicode
|
||||
+cell Text content, with trailing space character if present.
|
||||
|
||||
+row
|
||||
+cell #[code whitespace]
|
||||
+cell int
|
||||
+cell Trailing space character if present.
|
||||
+row
|
||||
+cell #[code whitespace_]
|
||||
+cell unicode
|
||||
+cell Trailing space character if present.
|
||||
|
||||
|
||||
+h(2, "init") Token.__init__
|
||||
+tag method
|
||||
|
||||
p Construct a #[code Token] object.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code vocab]
|
||||
+cell #[code Vocab]
|
||||
+cell A storage container for lexical types.
|
||||
|
||||
+row
|
||||
+cell #[code doc]
|
||||
+cell #[code Doc]
|
||||
+cell The parent document.
|
||||
|
||||
+row
|
||||
+cell #[code offset]
|
||||
+cell int
|
||||
+cell The index of the token within the document.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Token]
|
||||
+cell The newly constructed object.
|
||||
|
||||
+h(2, "len") Token.__len__
|
||||
+tag method
|
||||
|
||||
p Get the number of unicode characters in the token.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell int
|
||||
+cell The number of unicode characters in the token.
|
||||
|
||||
|
||||
+h(2, "check_flag") Token.check_flag
|
||||
+tag method
|
||||
|
||||
p Check the value of a boolean flag.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code flag_id]
|
||||
+cell int
|
||||
+cell The attribute ID of the flag to check.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell Whether the flag is set.
|
||||
|
||||
+h(2, "nbor") Token.nbor
|
||||
+tag method
|
||||
|
||||
p Get a neighboring token.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code i]
|
||||
+cell int
|
||||
+cell The relative position of the token to get. Defaults to #[code 1].
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Token]
|
||||
+cell The token at position #[code self.doc[self.i+i]]
|
||||
|
||||
+h(2, "similarity") Token.similarity
|
||||
+tag method
|
||||
|
||||
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell other
|
||||
+cell -
|
||||
+cell
|
||||
| The object to compare with. By default, accepts #[code Doc],
|
||||
| #[code Span], #[code Token] and #[code Lexeme] objects.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell float
|
||||
+cell A scalar similarity score. Higher is more similar.
|
||||
|
||||
+h(2, "is_ancestor") Token.is_ancestor
|
||||
+tag method
|
||||
|
||||
p
|
||||
| Check whether this token is a parent, grandparent, etc. of another
|
||||
| in the dependency tree.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell descendant
|
||||
+cell #[code Token]
|
||||
+cell Another token.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell Whether this token is the ancestor of the descendant.
|
||||
|
||||
|
||||
+h(2, "vector") Token.vector
|
||||
+tag property
|
||||
|
||||
p A real-valued meaning representation.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A 1D numpy array representing the token's semantics.
|
||||
|
||||
+h(2, "has_vector") Token.has_vector
|
||||
+tag property
|
||||
|
||||
p
|
||||
| A boolean value indicating whether a word vector is associated with the
|
||||
| object.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell Whether the token has a vector data attached.
|
||||
|
||||
+h(2, "head") Token.head
|
||||
+tag property
|
||||
|
||||
p The syntactic parent, or "governor", of this token.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Token]
|
||||
+cell The head.
|
||||
|
||||
+h(2, "conjuncts") Token.conjuncts
|
||||
+tag property
|
||||
|
||||
p A sequence of coordinated tokens, including the token itself.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell A coordinated token.
|
||||
|
||||
+h(2, "children") Token.children
|
||||
+tag property
|
||||
|
||||
p A sequence of the token's immediate syntactic children.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell A child token such that #[code child.head==self].
|
||||
|
||||
+h(2, "subtree") Token.subtree
|
||||
+tag property
|
||||
|
||||
p A sequence of all the token's syntactic descendents.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell A descendant token such that #[code self.is_ancestor(descendant)].
|
||||
|
||||
+h(2, "left_edge") Token.left_edge
|
||||
+tag property
|
||||
|
||||
p The leftmost token of this token's syntactic descendants.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Token]
|
||||
+cell The first token such that #[code self.is_ancestor(token)].
|
||||
|
||||
+h(2, "right_edge") Token.right_edge
|
||||
+tag property
|
||||
|
||||
p The rightmost token of this token's syntactic descendents.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Token]
|
||||
+cell The last token such that #[code self.is_ancestor(token)].
|
||||
|
||||
+h(2, "ancestors") Token.ancestors
|
||||
+tag property
|
||||
|
||||
p The rightmost token of this token's syntactic descendants.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell
|
||||
| A sequence of ancestor tokens such that
|
||||
| #[code ancestor.is_ancestor(self)].
|
||||
|
|
Loading…
Reference in New Issue