mirror of https://github.com/explosion/spaCy.git
Convert properties to decorator syntax (#13390)
This commit is contained in:
parent
f5e85fa05a
commit
2e96797696
425
spacy/lexeme.pyx
425
spacy/lexeme.pyx
|
@ -164,45 +164,48 @@ cdef class Lexeme:
|
||||||
vector = self.vector
|
vector = self.vector
|
||||||
return numpy.sqrt((vector**2).sum())
|
return numpy.sqrt((vector**2).sum())
|
||||||
|
|
||||||
property vector:
|
@property
|
||||||
|
def vector(self):
|
||||||
"""A real-valued meaning representation.
|
"""A real-valued meaning representation.
|
||||||
|
|
||||||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||||
representing the lexeme's semantics.
|
representing the lexeme's semantics.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
cdef int length = self.vocab.vectors_length
|
||||||
cdef int length = self.vocab.vectors_length
|
if length == 0:
|
||||||
if length == 0:
|
raise ValueError(Errors.E010)
|
||||||
raise ValueError(Errors.E010)
|
return self.vocab.get_vector(self.c.orth)
|
||||||
return self.vocab.get_vector(self.c.orth)
|
|
||||||
|
|
||||||
def __set__(self, vector):
|
@vector.setter
|
||||||
if len(vector) != self.vocab.vectors_length:
|
def vector(self, vector):
|
||||||
raise ValueError(Errors.E073.format(new_length=len(vector),
|
if len(vector) != self.vocab.vectors_length:
|
||||||
length=self.vocab.vectors_length))
|
raise ValueError(Errors.E073.format(new_length=len(vector),
|
||||||
self.vocab.set_vector(self.c.orth, vector)
|
length=self.vocab.vectors_length))
|
||||||
|
self.vocab.set_vector(self.c.orth, vector)
|
||||||
|
|
||||||
property rank:
|
@property
|
||||||
|
def rank(self):
|
||||||
"""RETURNS (str): Sequential ID of the lexeme's lexical type, used
|
"""RETURNS (str): Sequential ID of the lexeme's lexical type, used
|
||||||
to index into tables, e.g. for word vectors."""
|
to index into tables, e.g. for word vectors."""
|
||||||
def __get__(self):
|
return self.c.id
|
||||||
return self.c.id
|
|
||||||
|
|
||||||
def __set__(self, value):
|
@rank.setter
|
||||||
self.c.id = value
|
def rank(self, value):
|
||||||
|
self.c.id = value
|
||||||
|
|
||||||
property sentiment:
|
@property
|
||||||
|
def sentiment(self):
|
||||||
"""RETURNS (float): A scalar value indicating the positivity or
|
"""RETURNS (float): A scalar value indicating the positivity or
|
||||||
negativity of the lexeme."""
|
negativity of the lexeme."""
|
||||||
def __get__(self):
|
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {})
|
||||||
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {})
|
return sentiment_table.get(self.c.orth, 0.0)
|
||||||
return sentiment_table.get(self.c.orth, 0.0)
|
|
||||||
|
|
||||||
def __set__(self, float x):
|
@sentiment.setter
|
||||||
if "lexeme_sentiment" not in self.vocab.lookups:
|
def sentiment(self, float x):
|
||||||
self.vocab.lookups.add_table("lexeme_sentiment")
|
if "lexeme_sentiment" not in self.vocab.lookups:
|
||||||
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment")
|
self.vocab.lookups.add_table("lexeme_sentiment")
|
||||||
sentiment_table[self.c.orth] = x
|
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment")
|
||||||
|
sentiment_table[self.c.orth] = x
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def orth_(self):
|
def orth_(self):
|
||||||
|
@ -216,306 +219,338 @@ cdef class Lexeme:
|
||||||
"""RETURNS (str): The original verbatim text of the lexeme."""
|
"""RETURNS (str): The original verbatim text of the lexeme."""
|
||||||
return self.orth_
|
return self.orth_
|
||||||
|
|
||||||
property lower:
|
@property
|
||||||
|
def lower(self):
|
||||||
"""RETURNS (uint64): Lowercase form of the lexeme."""
|
"""RETURNS (uint64): Lowercase form of the lexeme."""
|
||||||
def __get__(self):
|
return self.c.lower
|
||||||
return self.c.lower
|
|
||||||
|
|
||||||
def __set__(self, attr_t x):
|
@lower.setter
|
||||||
self.c.lower = x
|
def lower(self, attr_t x):
|
||||||
|
self.c.lower = x
|
||||||
|
|
||||||
property norm:
|
@property
|
||||||
|
def norm(self):
|
||||||
"""RETURNS (uint64): The lexeme's norm, i.e. a normalised form of the
|
"""RETURNS (uint64): The lexeme's norm, i.e. a normalised form of the
|
||||||
lexeme text.
|
lexeme text.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.c.norm
|
||||||
return self.c.norm
|
|
||||||
|
|
||||||
def __set__(self, attr_t x):
|
@norm.setter
|
||||||
if "lexeme_norm" not in self.vocab.lookups:
|
def norm(self, attr_t x):
|
||||||
self.vocab.lookups.add_table("lexeme_norm")
|
if "lexeme_norm" not in self.vocab.lookups:
|
||||||
norm_table = self.vocab.lookups.get_table("lexeme_norm")
|
self.vocab.lookups.add_table("lexeme_norm")
|
||||||
norm_table[self.c.orth] = self.vocab.strings[x]
|
norm_table = self.vocab.lookups.get_table("lexeme_norm")
|
||||||
self.c.norm = x
|
norm_table[self.c.orth] = self.vocab.strings[x]
|
||||||
|
self.c.norm = x
|
||||||
|
|
||||||
property shape:
|
@property
|
||||||
|
def shape(self):
|
||||||
"""RETURNS (uint64): Transform of the word's string, to show
|
"""RETURNS (uint64): Transform of the word's string, to show
|
||||||
orthographic features.
|
orthographic features.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.c.shape
|
||||||
return self.c.shape
|
|
||||||
|
|
||||||
def __set__(self, attr_t x):
|
@shape.setter
|
||||||
self.c.shape = x
|
def shape(self, attr_t x):
|
||||||
|
self.c.shape = x
|
||||||
|
|
||||||
property prefix:
|
@property
|
||||||
|
def prefix(self):
|
||||||
"""RETURNS (uint64): Length-N substring from the start of the word.
|
"""RETURNS (uint64): Length-N substring from the start of the word.
|
||||||
Defaults to `N=1`.
|
Defaults to `N=1`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.c.prefix
|
||||||
return self.c.prefix
|
|
||||||
|
|
||||||
def __set__(self, attr_t x):
|
@prefix.setter
|
||||||
self.c.prefix = x
|
def prefix(self, attr_t x):
|
||||||
|
self.c.prefix = x
|
||||||
|
|
||||||
property suffix:
|
@property
|
||||||
|
def suffix(self):
|
||||||
"""RETURNS (uint64): Length-N substring from the end of the word.
|
"""RETURNS (uint64): Length-N substring from the end of the word.
|
||||||
Defaults to `N=3`.
|
Defaults to `N=3`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.c.suffix
|
||||||
return self.c.suffix
|
|
||||||
|
|
||||||
def __set__(self, attr_t x):
|
@suffix.setter
|
||||||
self.c.suffix = x
|
def suffix(self, attr_t x):
|
||||||
|
self.c.suffix = x
|
||||||
|
|
||||||
property cluster:
|
@property
|
||||||
|
def cluster(self):
|
||||||
"""RETURNS (int): Brown cluster ID."""
|
"""RETURNS (int): Brown cluster ID."""
|
||||||
def __get__(self):
|
cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
|
||||||
cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
|
return cluster_table.get(self.c.orth, 0)
|
||||||
return cluster_table.get(self.c.orth, 0)
|
|
||||||
|
|
||||||
def __set__(self, int x):
|
@cluster.setter
|
||||||
cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
|
def cluster(self, int x):
|
||||||
cluster_table[self.c.orth] = x
|
cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
|
||||||
|
cluster_table[self.c.orth] = x
|
||||||
|
|
||||||
property lang:
|
@property
|
||||||
|
def lang(self):
|
||||||
"""RETURNS (uint64): Language of the parent vocabulary."""
|
"""RETURNS (uint64): Language of the parent vocabulary."""
|
||||||
def __get__(self):
|
return self.c.lang
|
||||||
return self.c.lang
|
|
||||||
|
|
||||||
def __set__(self, attr_t x):
|
@lang.setter
|
||||||
self.c.lang = x
|
def lang(self, attr_t x):
|
||||||
|
self.c.lang = x
|
||||||
|
|
||||||
property prob:
|
@property
|
||||||
|
def prob(self):
|
||||||
"""RETURNS (float): Smoothed log probability estimate of the lexeme's
|
"""RETURNS (float): Smoothed log probability estimate of the lexeme's
|
||||||
type."""
|
type."""
|
||||||
def __get__(self):
|
prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
|
||||||
prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
|
settings_table = self.vocab.lookups.get_table("lexeme_settings", {})
|
||||||
settings_table = self.vocab.lookups.get_table("lexeme_settings", {})
|
default_oov_prob = settings_table.get("oov_prob", -20.0)
|
||||||
default_oov_prob = settings_table.get("oov_prob", -20.0)
|
return prob_table.get(self.c.orth, default_oov_prob)
|
||||||
return prob_table.get(self.c.orth, default_oov_prob)
|
|
||||||
|
|
||||||
def __set__(self, float x):
|
@prob.setter
|
||||||
prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
|
def prob(self, float x):
|
||||||
prob_table[self.c.orth] = x
|
prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
|
||||||
|
prob_table[self.c.orth] = x
|
||||||
|
|
||||||
property lower_:
|
@property
|
||||||
|
def lower_(self):
|
||||||
"""RETURNS (str): Lowercase form of the word."""
|
"""RETURNS (str): Lowercase form of the word."""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.lower]
|
||||||
return self.vocab.strings[self.c.lower]
|
|
||||||
|
|
||||||
def __set__(self, str x):
|
@lower_.setter
|
||||||
self.c.lower = self.vocab.strings.add(x)
|
def lower_(self, str x):
|
||||||
|
self.c.lower = self.vocab.strings.add(x)
|
||||||
|
|
||||||
property norm_:
|
@property
|
||||||
|
def norm_(self):
|
||||||
"""RETURNS (str): The lexeme's norm, i.e. a normalised form of the
|
"""RETURNS (str): The lexeme's norm, i.e. a normalised form of the
|
||||||
lexeme text.
|
lexeme text.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.norm]
|
||||||
return self.vocab.strings[self.c.norm]
|
|
||||||
|
|
||||||
def __set__(self, str x):
|
@norm_.setter
|
||||||
self.norm = self.vocab.strings.add(x)
|
def norm_(self, str x):
|
||||||
|
self.norm = self.vocab.strings.add(x)
|
||||||
|
|
||||||
property shape_:
|
@property
|
||||||
|
def shape_(self):
|
||||||
"""RETURNS (str): Transform of the word's string, to show
|
"""RETURNS (str): Transform of the word's string, to show
|
||||||
orthographic features.
|
orthographic features.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.shape]
|
||||||
return self.vocab.strings[self.c.shape]
|
|
||||||
|
|
||||||
def __set__(self, str x):
|
@shape_.setter
|
||||||
self.c.shape = self.vocab.strings.add(x)
|
def shape_(self, str x):
|
||||||
|
self.c.shape = self.vocab.strings.add(x)
|
||||||
|
|
||||||
property prefix_:
|
@property
|
||||||
|
def prefix_(self):
|
||||||
"""RETURNS (str): Length-N substring from the start of the word.
|
"""RETURNS (str): Length-N substring from the start of the word.
|
||||||
Defaults to `N=1`.
|
Defaults to `N=1`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.prefix]
|
||||||
return self.vocab.strings[self.c.prefix]
|
|
||||||
|
|
||||||
def __set__(self, str x):
|
@prefix_.setter
|
||||||
self.c.prefix = self.vocab.strings.add(x)
|
def prefix_(self, str x):
|
||||||
|
self.c.prefix = self.vocab.strings.add(x)
|
||||||
|
|
||||||
property suffix_:
|
@property
|
||||||
|
def suffix_(self):
|
||||||
"""RETURNS (str): Length-N substring from the end of the word.
|
"""RETURNS (str): Length-N substring from the end of the word.
|
||||||
Defaults to `N=3`.
|
Defaults to `N=3`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.suffix]
|
||||||
return self.vocab.strings[self.c.suffix]
|
|
||||||
|
|
||||||
def __set__(self, str x):
|
@suffix_.setter
|
||||||
self.c.suffix = self.vocab.strings.add(x)
|
def suffix_(self, str x):
|
||||||
|
self.c.suffix = self.vocab.strings.add(x)
|
||||||
|
|
||||||
property lang_:
|
@property
|
||||||
|
def lang_(self):
|
||||||
"""RETURNS (str): Language of the parent vocabulary."""
|
"""RETURNS (str): Language of the parent vocabulary."""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.lang]
|
||||||
return self.vocab.strings[self.c.lang]
|
|
||||||
|
|
||||||
def __set__(self, str x):
|
@lang_.setter
|
||||||
self.c.lang = self.vocab.strings.add(x)
|
def lang_(self, str x):
|
||||||
|
self.c.lang = self.vocab.strings.add(x)
|
||||||
|
|
||||||
property flags:
|
@property
|
||||||
|
def flags(self):
|
||||||
"""RETURNS (uint64): Container of the lexeme's binary flags."""
|
"""RETURNS (uint64): Container of the lexeme's binary flags."""
|
||||||
def __get__(self):
|
return self.c.flags
|
||||||
return self.c.flags
|
|
||||||
|
|
||||||
def __set__(self, flags_t x):
|
@flags.setter
|
||||||
self.c.flags = x
|
def flags(self, flags_t x):
|
||||||
|
self.c.flags = x
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_oov(self):
|
def is_oov(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is out-of-vocabulary."""
|
"""RETURNS (bool): Whether the lexeme is out-of-vocabulary."""
|
||||||
return self.orth not in self.vocab.vectors
|
return self.orth not in self.vocab.vectors
|
||||||
|
|
||||||
property is_stop:
|
@property
|
||||||
|
def is_stop(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is a stop word."""
|
"""RETURNS (bool): Whether the lexeme is a stop word."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_STOP)
|
||||||
return Lexeme.c_check_flag(self.c, IS_STOP)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_stop.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_STOP, x)
|
def is_stop(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_STOP, x)
|
||||||
|
|
||||||
property is_alpha:
|
@property
|
||||||
|
def is_alpha(self):
|
||||||
"""RETURNS (bool): Whether the lexeme consists of alphabetic
|
"""RETURNS (bool): Whether the lexeme consists of alphabetic
|
||||||
characters. Equivalent to `lexeme.text.isalpha()`.
|
characters. Equivalent to `lexeme.text.isalpha()`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_ALPHA)
|
||||||
return Lexeme.c_check_flag(self.c, IS_ALPHA)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_alpha.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_ALPHA, x)
|
def is_alpha(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_ALPHA, x)
|
||||||
|
|
||||||
property is_ascii:
|
@property
|
||||||
|
def is_ascii(self):
|
||||||
"""RETURNS (bool): Whether the lexeme consists of ASCII characters.
|
"""RETURNS (bool): Whether the lexeme consists of ASCII characters.
|
||||||
Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`.
|
Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_ASCII)
|
||||||
return Lexeme.c_check_flag(self.c, IS_ASCII)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_ascii.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_ASCII, x)
|
def is_ascii(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_ASCII, x)
|
||||||
|
|
||||||
property is_digit:
|
@property
|
||||||
|
def is_digit(self):
|
||||||
"""RETURNS (bool): Whether the lexeme consists of digits. Equivalent
|
"""RETURNS (bool): Whether the lexeme consists of digits. Equivalent
|
||||||
to `lexeme.text.isdigit()`.
|
to `lexeme.text.isdigit()`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_DIGIT)
|
||||||
return Lexeme.c_check_flag(self.c, IS_DIGIT)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_digit.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_DIGIT, x)
|
def is_digit(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_DIGIT, x)
|
||||||
|
|
||||||
property is_lower:
|
@property
|
||||||
|
def is_lower(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is in lowercase. Equivalent to
|
"""RETURNS (bool): Whether the lexeme is in lowercase. Equivalent to
|
||||||
`lexeme.text.islower()`.
|
`lexeme.text.islower()`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_LOWER)
|
||||||
return Lexeme.c_check_flag(self.c, IS_LOWER)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_lower.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_LOWER, x)
|
def is_lower(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_LOWER, x)
|
||||||
|
|
||||||
property is_upper:
|
@property
|
||||||
|
def is_upper(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is in uppercase. Equivalent to
|
"""RETURNS (bool): Whether the lexeme is in uppercase. Equivalent to
|
||||||
`lexeme.text.isupper()`.
|
`lexeme.text.isupper()`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_UPPER)
|
||||||
return Lexeme.c_check_flag(self.c, IS_UPPER)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_upper.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_UPPER, x)
|
def is_upper(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_UPPER, x)
|
||||||
|
|
||||||
property is_title:
|
@property
|
||||||
|
def is_title(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is in titlecase. Equivalent to
|
"""RETURNS (bool): Whether the lexeme is in titlecase. Equivalent to
|
||||||
`lexeme.text.istitle()`.
|
`lexeme.text.istitle()`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_TITLE)
|
||||||
return Lexeme.c_check_flag(self.c, IS_TITLE)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_title.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_TITLE, x)
|
def is_title(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_TITLE, x)
|
||||||
|
|
||||||
property is_punct:
|
@property
|
||||||
|
def is_punct(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is punctuation."""
|
"""RETURNS (bool): Whether the lexeme is punctuation."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_PUNCT)
|
||||||
return Lexeme.c_check_flag(self.c, IS_PUNCT)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_punct.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_PUNCT, x)
|
def is_punct(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_PUNCT, x)
|
||||||
|
|
||||||
property is_space:
|
@property
|
||||||
|
def is_space(self):
|
||||||
"""RETURNS (bool): Whether the lexeme consist of whitespace characters.
|
"""RETURNS (bool): Whether the lexeme consist of whitespace characters.
|
||||||
Equivalent to `lexeme.text.isspace()`.
|
Equivalent to `lexeme.text.isspace()`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_SPACE)
|
||||||
return Lexeme.c_check_flag(self.c, IS_SPACE)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_space.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_SPACE, x)
|
def is_space(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_SPACE, x)
|
||||||
|
|
||||||
property is_bracket:
|
@property
|
||||||
|
def is_bracket(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is a bracket."""
|
"""RETURNS (bool): Whether the lexeme is a bracket."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_BRACKET)
|
||||||
return Lexeme.c_check_flag(self.c, IS_BRACKET)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_bracket.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_BRACKET, x)
|
def is_bracket(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_BRACKET, x)
|
||||||
|
|
||||||
property is_quote:
|
@property
|
||||||
|
def is_quote(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is a quotation mark."""
|
"""RETURNS (bool): Whether the lexeme is a quotation mark."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_QUOTE)
|
||||||
return Lexeme.c_check_flag(self.c, IS_QUOTE)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_quote.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_QUOTE, x)
|
def is_quote(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_QUOTE, x)
|
||||||
|
|
||||||
property is_left_punct:
|
@property
|
||||||
|
def is_left_punct(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is left punctuation, e.g. (."""
|
"""RETURNS (bool): Whether the lexeme is left punctuation, e.g. (."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)
|
||||||
return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_left_punct.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)
|
def is_left_punct(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)
|
||||||
|
|
||||||
property is_right_punct:
|
@property
|
||||||
|
def is_right_punct(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is right punctuation, e.g. )."""
|
"""RETURNS (bool): Whether the lexeme is right punctuation, e.g. )."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)
|
||||||
return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_right_punct.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)
|
def is_right_punct(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)
|
||||||
|
|
||||||
property is_currency:
|
@property
|
||||||
|
def is_currency(self):
|
||||||
"""RETURNS (bool): Whether the lexeme is a currency symbol, e.g. $, €."""
|
"""RETURNS (bool): Whether the lexeme is a currency symbol, e.g. $, €."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, IS_CURRENCY)
|
||||||
return Lexeme.c_check_flag(self.c, IS_CURRENCY)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@is_currency.setter
|
||||||
Lexeme.c_set_flag(self.c, IS_CURRENCY, x)
|
def is_currency(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, IS_CURRENCY, x)
|
||||||
|
|
||||||
property like_url:
|
@property
|
||||||
|
def like_url(self):
|
||||||
"""RETURNS (bool): Whether the lexeme resembles a URL."""
|
"""RETURNS (bool): Whether the lexeme resembles a URL."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, LIKE_URL)
|
||||||
return Lexeme.c_check_flag(self.c, LIKE_URL)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@like_url.setter
|
||||||
Lexeme.c_set_flag(self.c, LIKE_URL, x)
|
def like_url(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, LIKE_URL, x)
|
||||||
|
|
||||||
property like_num:
|
@property
|
||||||
|
def like_num(self):
|
||||||
"""RETURNS (bool): Whether the lexeme represents a number, e.g. "10.9",
|
"""RETURNS (bool): Whether the lexeme represents a number, e.g. "10.9",
|
||||||
"10", "ten", etc.
|
"10", "ten", etc.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, LIKE_NUM)
|
||||||
return Lexeme.c_check_flag(self.c, LIKE_NUM)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@like_num.setter
|
||||||
Lexeme.c_set_flag(self.c, LIKE_NUM, x)
|
def like_num(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, LIKE_NUM, x)
|
||||||
|
|
||||||
property like_email:
|
@property
|
||||||
|
def like_email(self):
|
||||||
"""RETURNS (bool): Whether the lexeme resembles an email address."""
|
"""RETURNS (bool): Whether the lexeme resembles an email address."""
|
||||||
def __get__(self):
|
return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
|
||||||
return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
|
|
||||||
|
|
||||||
def __set__(self, bint x):
|
@like_email.setter
|
||||||
Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)
|
def like_email(self, bint x):
|
||||||
|
Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)
|
||||||
|
|
|
@ -70,65 +70,72 @@ cdef class Tokenizer:
|
||||||
self._special_matcher = PhraseMatcher(self.vocab)
|
self._special_matcher = PhraseMatcher(self.vocab)
|
||||||
self._load_special_cases(rules)
|
self._load_special_cases(rules)
|
||||||
|
|
||||||
property token_match:
|
@property
|
||||||
def __get__(self):
|
def token_match(self):
|
||||||
return self._token_match
|
return self._token_match
|
||||||
|
|
||||||
def __set__(self, token_match):
|
@token_match.setter
|
||||||
self._token_match = token_match
|
def token_match(self, token_match):
|
||||||
self._reload_special_cases()
|
self._token_match = token_match
|
||||||
|
self._reload_special_cases()
|
||||||
|
|
||||||
property url_match:
|
@property
|
||||||
def __get__(self):
|
def url_match(self):
|
||||||
return self._url_match
|
return self._url_match
|
||||||
|
|
||||||
def __set__(self, url_match):
|
@url_match.setter
|
||||||
self._url_match = url_match
|
def url_match(self, url_match):
|
||||||
self._reload_special_cases()
|
self._url_match = url_match
|
||||||
|
self._reload_special_cases()
|
||||||
|
|
||||||
property prefix_search:
|
@property
|
||||||
def __get__(self):
|
def prefix_search(self):
|
||||||
return self._prefix_search
|
return self._prefix_search
|
||||||
|
|
||||||
def __set__(self, prefix_search):
|
@prefix_search.setter
|
||||||
self._prefix_search = prefix_search
|
def prefix_search(self, prefix_search):
|
||||||
self._reload_special_cases()
|
self._prefix_search = prefix_search
|
||||||
|
self._reload_special_cases()
|
||||||
|
|
||||||
property suffix_search:
|
@property
|
||||||
def __get__(self):
|
def suffix_search(self):
|
||||||
return self._suffix_search
|
return self._suffix_search
|
||||||
|
|
||||||
def __set__(self, suffix_search):
|
@suffix_search.setter
|
||||||
self._suffix_search = suffix_search
|
def suffix_search(self, suffix_search):
|
||||||
self._reload_special_cases()
|
self._suffix_search = suffix_search
|
||||||
|
self._reload_special_cases()
|
||||||
|
|
||||||
property infix_finditer:
|
@property
|
||||||
def __get__(self):
|
def infix_finditer(self):
|
||||||
return self._infix_finditer
|
return self._infix_finditer
|
||||||
|
|
||||||
def __set__(self, infix_finditer):
|
@infix_finditer.setter
|
||||||
self._infix_finditer = infix_finditer
|
def infix_finditer(self, infix_finditer):
|
||||||
self._reload_special_cases()
|
self._infix_finditer = infix_finditer
|
||||||
|
self._reload_special_cases()
|
||||||
|
|
||||||
property rules:
|
@property
|
||||||
def __get__(self):
|
def rules(self):
|
||||||
return self._rules
|
return self._rules
|
||||||
|
|
||||||
def __set__(self, rules):
|
@rules.setter
|
||||||
self._rules = {}
|
def rules(self, rules):
|
||||||
self._flush_cache()
|
self._rules = {}
|
||||||
self._flush_specials()
|
self._flush_cache()
|
||||||
self._cache = PreshMap()
|
self._flush_specials()
|
||||||
self._specials = PreshMap()
|
self._cache = PreshMap()
|
||||||
self._load_special_cases(rules)
|
self._specials = PreshMap()
|
||||||
|
self._load_special_cases(rules)
|
||||||
|
|
||||||
property faster_heuristics:
|
@property
|
||||||
def __get__(self):
|
def faster_heuristics(self):
|
||||||
return bool(self._faster_heuristics)
|
return bool(self._faster_heuristics)
|
||||||
|
|
||||||
def __set__(self, faster_heuristics):
|
@faster_heuristics.setter
|
||||||
self._faster_heuristics = bool(faster_heuristics)
|
def faster_heuristics(self, faster_heuristics):
|
||||||
self._reload_special_cases()
|
self._faster_heuristics = bool(faster_heuristics)
|
||||||
|
self._reload_special_cases()
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
args = (self.vocab,
|
args = (self.vocab,
|
||||||
|
|
|
@ -667,7 +667,8 @@ cdef class Doc:
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
property vector:
|
@property
|
||||||
|
def vector(self):
|
||||||
"""A real-valued meaning representation. Defaults to an average of the
|
"""A real-valued meaning representation. Defaults to an average of the
|
||||||
token vectors.
|
token vectors.
|
||||||
|
|
||||||
|
@ -676,48 +677,49 @@ cdef class Doc:
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#vector
|
DOCS: https://spacy.io/api/doc#vector
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
if "vector" in self.user_hooks:
|
||||||
if "vector" in self.user_hooks:
|
return self.user_hooks["vector"](self)
|
||||||
return self.user_hooks["vector"](self)
|
if self._vector is not None:
|
||||||
if self._vector is not None:
|
return self._vector
|
||||||
return self._vector
|
xp = get_array_module(self.vocab.vectors.data)
|
||||||
xp = get_array_module(self.vocab.vectors.data)
|
if not len(self):
|
||||||
if not len(self):
|
self._vector = xp.zeros((self.vocab.vectors_length,), dtype="f")
|
||||||
self._vector = xp.zeros((self.vocab.vectors_length,), dtype="f")
|
return self._vector
|
||||||
return self._vector
|
elif self.vocab.vectors.size > 0:
|
||||||
elif self.vocab.vectors.size > 0:
|
self._vector = sum(t.vector for t in self) / len(self)
|
||||||
self._vector = sum(t.vector for t in self) / len(self)
|
return self._vector
|
||||||
return self._vector
|
elif self.tensor.size > 0:
|
||||||
elif self.tensor.size > 0:
|
self._vector = self.tensor.mean(axis=0)
|
||||||
self._vector = self.tensor.mean(axis=0)
|
return self._vector
|
||||||
return self._vector
|
else:
|
||||||
else:
|
return xp.zeros((self.vocab.vectors_length,), dtype="float32")
|
||||||
return xp.zeros((self.vocab.vectors_length,), dtype="float32")
|
|
||||||
|
|
||||||
def __set__(self, value):
|
@vector.setter
|
||||||
self._vector = value
|
def vector(self, value):
|
||||||
|
self._vector = value
|
||||||
|
|
||||||
property vector_norm:
|
@property
|
||||||
|
def vector_norm(self):
|
||||||
"""The L2 norm of the document's vector representation.
|
"""The L2 norm of the document's vector representation.
|
||||||
|
|
||||||
RETURNS (float): The L2 norm of the vector representation.
|
RETURNS (float): The L2 norm of the vector representation.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#vector_norm
|
DOCS: https://spacy.io/api/doc#vector_norm
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
if "vector_norm" in self.user_hooks:
|
||||||
if "vector_norm" in self.user_hooks:
|
return self.user_hooks["vector_norm"](self)
|
||||||
return self.user_hooks["vector_norm"](self)
|
cdef float value
|
||||||
cdef float value
|
cdef double norm = 0
|
||||||
cdef double norm = 0
|
if self._vector_norm is None:
|
||||||
if self._vector_norm is None:
|
norm = 0.0
|
||||||
norm = 0.0
|
for value in self.vector:
|
||||||
for value in self.vector:
|
norm += value * value
|
||||||
norm += value * value
|
self._vector_norm = sqrt(norm) if norm != 0 else 0
|
||||||
self._vector_norm = sqrt(norm) if norm != 0 else 0
|
return self._vector_norm
|
||||||
return self._vector_norm
|
|
||||||
|
|
||||||
def __set__(self, value):
|
@vector_norm.setter
|
||||||
self._vector_norm = value
|
def vector_norm(self, value):
|
||||||
|
self._vector_norm = value
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def text(self):
|
def text(self):
|
||||||
|
@ -736,7 +738,8 @@ cdef class Doc:
|
||||||
"""
|
"""
|
||||||
return self.text
|
return self.text
|
||||||
|
|
||||||
property ents:
|
@property
|
||||||
|
def ents(self):
|
||||||
"""The named entities in the document. Returns a tuple of named entity
|
"""The named entities in the document. Returns a tuple of named entity
|
||||||
`Span` objects, if the entity recognizer has been applied.
|
`Span` objects, if the entity recognizer has been applied.
|
||||||
|
|
||||||
|
@ -744,55 +747,55 @@ cdef class Doc:
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#ents
|
DOCS: https://spacy.io/api/doc#ents
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
cdef int i
|
||||||
cdef int i
|
cdef const TokenC* token
|
||||||
cdef const TokenC* token
|
cdef int start = -1
|
||||||
cdef int start = -1
|
cdef attr_t label = 0
|
||||||
cdef attr_t label = 0
|
cdef attr_t kb_id = 0
|
||||||
cdef attr_t kb_id = 0
|
cdef attr_t ent_id = 0
|
||||||
cdef attr_t ent_id = 0
|
output = []
|
||||||
output = []
|
for i in range(self.length):
|
||||||
for i in range(self.length):
|
token = &self.c[i]
|
||||||
token = &self.c[i]
|
if token.ent_iob == 1:
|
||||||
if token.ent_iob == 1:
|
if start == -1:
|
||||||
if start == -1:
|
seq = [f"{t.text}|{t.ent_iob_}" for t in self[i-5:i+5]]
|
||||||
seq = [f"{t.text}|{t.ent_iob_}" for t in self[i-5:i+5]]
|
raise ValueError(Errors.E093.format(seq=" ".join(seq)))
|
||||||
raise ValueError(Errors.E093.format(seq=" ".join(seq)))
|
elif token.ent_iob == 2 or token.ent_iob == 0 or \
|
||||||
elif token.ent_iob == 2 or token.ent_iob == 0 or \
|
(token.ent_iob == 3 and token.ent_type == 0):
|
||||||
(token.ent_iob == 3 and token.ent_type == 0):
|
if start != -1:
|
||||||
if start != -1:
|
output.append(Span(self, start, i, label=label, kb_id=kb_id, span_id=ent_id))
|
||||||
output.append(Span(self, start, i, label=label, kb_id=kb_id, span_id=ent_id))
|
start = -1
|
||||||
start = -1
|
label = 0
|
||||||
label = 0
|
kb_id = 0
|
||||||
kb_id = 0
|
ent_id = 0
|
||||||
ent_id = 0
|
elif token.ent_iob == 3:
|
||||||
elif token.ent_iob == 3:
|
if start != -1:
|
||||||
if start != -1:
|
output.append(Span(self, start, i, label=label, kb_id=kb_id, span_id=ent_id))
|
||||||
output.append(Span(self, start, i, label=label, kb_id=kb_id, span_id=ent_id))
|
start = i
|
||||||
start = i
|
label = token.ent_type
|
||||||
label = token.ent_type
|
kb_id = token.ent_kb_id
|
||||||
kb_id = token.ent_kb_id
|
ent_id = token.ent_id
|
||||||
ent_id = token.ent_id
|
if start != -1:
|
||||||
if start != -1:
|
output.append(Span(self, start, self.length, label=label, kb_id=kb_id, span_id=ent_id))
|
||||||
output.append(Span(self, start, self.length, label=label, kb_id=kb_id, span_id=ent_id))
|
# remove empty-label spans
|
||||||
# remove empty-label spans
|
output = [o for o in output if o.label_ != ""]
|
||||||
output = [o for o in output if o.label_ != ""]
|
return tuple(output)
|
||||||
return tuple(output)
|
|
||||||
|
|
||||||
def __set__(self, ents):
|
@ents.setter
|
||||||
# TODO:
|
def ents(self, ents):
|
||||||
# 1. Test basic data-driven ORTH gazetteer
|
# TODO:
|
||||||
# 2. Test more nuanced date and currency regex
|
# 1. Test basic data-driven ORTH gazetteer
|
||||||
cdef attr_t kb_id, ent_id
|
# 2. Test more nuanced date and currency regex
|
||||||
cdef int ent_start, ent_end
|
cdef attr_t kb_id, ent_id
|
||||||
ent_spans = []
|
cdef int ent_start, ent_end
|
||||||
for ent_info in ents:
|
ent_spans = []
|
||||||
entity_type_, kb_id, ent_start, ent_end, ent_id = get_entity_info(ent_info)
|
for ent_info in ents:
|
||||||
if isinstance(entity_type_, str):
|
entity_type_, kb_id, ent_start, ent_end, ent_id = get_entity_info(ent_info)
|
||||||
self.vocab.strings.add(entity_type_)
|
if isinstance(entity_type_, str):
|
||||||
span = Span(self, ent_start, ent_end, label=entity_type_, kb_id=kb_id, span_id=ent_id)
|
self.vocab.strings.add(entity_type_)
|
||||||
ent_spans.append(span)
|
span = Span(self, ent_start, ent_end, label=entity_type_, kb_id=kb_id, span_id=ent_id)
|
||||||
self.set_ents(ent_spans, default=SetEntsDefault.outside)
|
ent_spans.append(span)
|
||||||
|
self.set_ents(ent_spans, default=SetEntsDefault.outside)
|
||||||
|
|
||||||
def set_ents(self, entities, *, blocked=None, missing=None, outside=None, default=SetEntsDefault.outside):
|
def set_ents(self, entities, *, blocked=None, missing=None, outside=None, default=SetEntsDefault.outside):
|
||||||
"""Set entity annotation.
|
"""Set entity annotation.
|
||||||
|
|
|
@ -757,78 +757,87 @@ cdef class Span:
|
||||||
for word in self.rights:
|
for word in self.rights:
|
||||||
yield from word.subtree
|
yield from word.subtree
|
||||||
|
|
||||||
property start:
|
@property
|
||||||
def __get__(self):
|
def start(self):
|
||||||
return self.c.start
|
return self.c.start
|
||||||
|
|
||||||
def __set__(self, int start):
|
@start.setter
|
||||||
if start < 0:
|
def start(self, int start):
|
||||||
raise IndexError(Errors.E1032.format(var="start", forbidden="< 0", value=start))
|
if start < 0:
|
||||||
self.c.start = start
|
raise IndexError(Errors.E1032.format(var="start", forbidden="< 0", value=start))
|
||||||
|
self.c.start = start
|
||||||
|
|
||||||
property end:
|
@property
|
||||||
def __get__(self):
|
def end(self):
|
||||||
return self.c.end
|
return self.c.end
|
||||||
|
|
||||||
def __set__(self, int end):
|
@end.setter
|
||||||
if end < 0:
|
def end(self, int end):
|
||||||
raise IndexError(Errors.E1032.format(var="end", forbidden="< 0", value=end))
|
if end < 0:
|
||||||
self.c.end = end
|
raise IndexError(Errors.E1032.format(var="end", forbidden="< 0", value=end))
|
||||||
|
self.c.end = end
|
||||||
|
|
||||||
property start_char:
|
@property
|
||||||
def __get__(self):
|
def start_char(self):
|
||||||
return self.c.start_char
|
return self.c.start_char
|
||||||
|
|
||||||
def __set__(self, int start_char):
|
@start_char.setter
|
||||||
if start_char < 0:
|
def start_char(self, int start_char):
|
||||||
raise IndexError(Errors.E1032.format(var="start_char", forbidden="< 0", value=start_char))
|
if start_char < 0:
|
||||||
self.c.start_char = start_char
|
raise IndexError(Errors.E1032.format(var="start_char", forbidden="< 0", value=start_char))
|
||||||
|
self.c.start_char = start_char
|
||||||
|
|
||||||
property end_char:
|
@property
|
||||||
def __get__(self):
|
def end_char(self):
|
||||||
return self.c.end_char
|
return self.c.end_char
|
||||||
|
|
||||||
def __set__(self, int end_char):
|
@end_char.setter
|
||||||
if end_char < 0:
|
def end_char(self, int end_char):
|
||||||
raise IndexError(Errors.E1032.format(var="end_char", forbidden="< 0", value=end_char))
|
if end_char < 0:
|
||||||
self.c.end_char = end_char
|
raise IndexError(Errors.E1032.format(var="end_char", forbidden="< 0", value=end_char))
|
||||||
|
self.c.end_char = end_char
|
||||||
|
|
||||||
property label:
|
@property
|
||||||
def __get__(self):
|
def label(self):
|
||||||
return self.c.label
|
return self.c.label
|
||||||
|
|
||||||
def __set__(self, attr_t label):
|
@label.setter
|
||||||
self.c.label = label
|
def label(self, attr_t label):
|
||||||
|
self.c.label = label
|
||||||
|
|
||||||
property kb_id:
|
@property
|
||||||
def __get__(self):
|
def kb_id(self):
|
||||||
return self.c.kb_id
|
return self.c.kb_id
|
||||||
|
|
||||||
def __set__(self, attr_t kb_id):
|
@kb_id.setter
|
||||||
self.c.kb_id = kb_id
|
def kb_id(self, attr_t kb_id):
|
||||||
|
self.c.kb_id = kb_id
|
||||||
|
|
||||||
property id:
|
@property
|
||||||
def __get__(self):
|
def id(self):
|
||||||
return self.c.id
|
return self.c.id
|
||||||
|
|
||||||
def __set__(self, attr_t id):
|
@id.setter
|
||||||
self.c.id = id
|
def id(self, attr_t id):
|
||||||
|
self.c.id = id
|
||||||
|
|
||||||
property ent_id:
|
@property
|
||||||
|
def ent_id(self):
|
||||||
"""RETURNS (uint64): The entity ID."""
|
"""RETURNS (uint64): The entity ID."""
|
||||||
def __get__(self):
|
return self.root.ent_id
|
||||||
return self.root.ent_id
|
|
||||||
|
|
||||||
def __set__(self, hash_t key):
|
@ent_id.setter
|
||||||
raise NotImplementedError(Errors.E200.format(attr="ent_id"))
|
def ent_id(self, hash_t key):
|
||||||
|
raise NotImplementedError(Errors.E200.format(attr="ent_id"))
|
||||||
|
|
||||||
property ent_id_:
|
@property
|
||||||
|
def ent_id_(self):
|
||||||
"""RETURNS (str): The (string) entity ID."""
|
"""RETURNS (str): The (string) entity ID."""
|
||||||
def __get__(self):
|
return self.root.ent_id_
|
||||||
return self.root.ent_id_
|
|
||||||
|
|
||||||
def __set__(self, str key):
|
@ent_id_.setter
|
||||||
raise NotImplementedError(Errors.E200.format(attr="ent_id_"))
|
def ent_id_(self, str key):
|
||||||
|
raise NotImplementedError(Errors.E200.format(attr="ent_id_"))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def orth_(self):
|
def orth_(self):
|
||||||
|
@ -843,29 +852,32 @@ cdef class Span:
|
||||||
"""RETURNS (str): The span's lemma."""
|
"""RETURNS (str): The span's lemma."""
|
||||||
return "".join([t.lemma_ + t.whitespace_ for t in self]).strip()
|
return "".join([t.lemma_ + t.whitespace_ for t in self]).strip()
|
||||||
|
|
||||||
property label_:
|
@property
|
||||||
|
def label_(self):
|
||||||
"""RETURNS (str): The span's label."""
|
"""RETURNS (str): The span's label."""
|
||||||
def __get__(self):
|
return self.doc.vocab.strings[self.label]
|
||||||
return self.doc.vocab.strings[self.label]
|
|
||||||
|
|
||||||
def __set__(self, str label_):
|
@label_.setter
|
||||||
self.label = self.doc.vocab.strings.add(label_)
|
def label_(self, str label_):
|
||||||
|
self.label = self.doc.vocab.strings.add(label_)
|
||||||
|
|
||||||
property kb_id_:
|
@property
|
||||||
|
def kb_id_(self):
|
||||||
"""RETURNS (str): The span's KB ID."""
|
"""RETURNS (str): The span's KB ID."""
|
||||||
def __get__(self):
|
return self.doc.vocab.strings[self.kb_id]
|
||||||
return self.doc.vocab.strings[self.kb_id]
|
|
||||||
|
|
||||||
def __set__(self, str kb_id_):
|
@kb_id_.setter
|
||||||
self.kb_id = self.doc.vocab.strings.add(kb_id_)
|
def kb_id_(self, str kb_id_):
|
||||||
|
self.kb_id = self.doc.vocab.strings.add(kb_id_)
|
||||||
|
|
||||||
property id_:
|
@property
|
||||||
|
def id_(self):
|
||||||
"""RETURNS (str): The span's ID."""
|
"""RETURNS (str): The span's ID."""
|
||||||
def __get__(self):
|
return self.doc.vocab.strings[self.id]
|
||||||
return self.doc.vocab.strings[self.id]
|
|
||||||
|
|
||||||
def __set__(self, str id_):
|
@id_.setter
|
||||||
self.id = self.doc.vocab.strings.add(id_)
|
def id_(self, str id_):
|
||||||
|
self.id = self.doc.vocab.strings.add(id_)
|
||||||
|
|
||||||
|
|
||||||
cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
||||||
|
|
|
@ -249,15 +249,16 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
return not self.c.morph == 0
|
return not self.c.morph == 0
|
||||||
|
|
||||||
property morph:
|
@property
|
||||||
def __get__(self):
|
def morph(self):
|
||||||
return MorphAnalysis.from_id(self.vocab, self.c.morph)
|
return MorphAnalysis.from_id(self.vocab, self.c.morph)
|
||||||
|
|
||||||
def __set__(self, MorphAnalysis morph):
|
@morph.setter
|
||||||
# Check that the morph has the same vocab
|
def morph(self, MorphAnalysis morph):
|
||||||
if self.vocab != morph.vocab:
|
# Check that the morph has the same vocab
|
||||||
raise ValueError(Errors.E1013)
|
if self.vocab != morph.vocab:
|
||||||
self.c.morph = morph.c.key
|
raise ValueError(Errors.E1013)
|
||||||
|
self.c.morph = morph.c.key
|
||||||
|
|
||||||
def set_morph(self, features):
|
def set_morph(self, features):
|
||||||
cdef hash_t key
|
cdef hash_t key
|
||||||
|
@ -377,39 +378,43 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
return self.c.lex.suffix
|
return self.c.lex.suffix
|
||||||
|
|
||||||
property lemma:
|
@property
|
||||||
|
def lemma(self):
|
||||||
"""RETURNS (uint64): ID of the base form of the word, with no
|
"""RETURNS (uint64): ID of the base form of the word, with no
|
||||||
inflectional suffixes.
|
inflectional suffixes.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.c.lemma
|
||||||
return self.c.lemma
|
|
||||||
|
|
||||||
def __set__(self, attr_t lemma):
|
@lemma.setter
|
||||||
self.c.lemma = lemma
|
def lemma(self, attr_t lemma):
|
||||||
|
self.c.lemma = lemma
|
||||||
|
|
||||||
property pos:
|
@property
|
||||||
|
def pos(self):
|
||||||
"""RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
|
"""RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
|
||||||
def __get__(self):
|
return self.c.pos
|
||||||
return self.c.pos
|
|
||||||
|
|
||||||
def __set__(self, pos):
|
@pos.setter
|
||||||
self.c.pos = pos
|
def pos(self, pos):
|
||||||
|
self.c.pos = pos
|
||||||
|
|
||||||
property tag:
|
@property
|
||||||
|
def tag(self):
|
||||||
"""RETURNS (uint64): ID of fine-grained part-of-speech tag."""
|
"""RETURNS (uint64): ID of fine-grained part-of-speech tag."""
|
||||||
def __get__(self):
|
return self.c.tag
|
||||||
return self.c.tag
|
|
||||||
|
|
||||||
def __set__(self, attr_t tag):
|
@tag.setter
|
||||||
self.c.tag = tag
|
def tag(self, attr_t tag):
|
||||||
|
self.c.tag = tag
|
||||||
|
|
||||||
property dep:
|
@property
|
||||||
|
def dep(self):
|
||||||
"""RETURNS (uint64): ID of syntactic dependency label."""
|
"""RETURNS (uint64): ID of syntactic dependency label."""
|
||||||
def __get__(self):
|
return self.c.dep
|
||||||
return self.c.dep
|
|
||||||
|
|
||||||
def __set__(self, attr_t label):
|
@dep.setter
|
||||||
self.c.dep = label
|
def dep(self, attr_t label):
|
||||||
|
self.c.dep = label
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_vector(self):
|
def has_vector(self):
|
||||||
|
@ -494,48 +499,51 @@ cdef class Token:
|
||||||
return self.doc.user_token_hooks["sent"](self)
|
return self.doc.user_token_hooks["sent"](self)
|
||||||
return self.doc[self.i : self.i+1].sent
|
return self.doc[self.i : self.i+1].sent
|
||||||
|
|
||||||
property sent_start:
|
@property
|
||||||
def __get__(self):
|
def sent_start(self):
|
||||||
"""Deprecated: use Token.is_sent_start instead."""
|
"""Deprecated: use Token.is_sent_start instead."""
|
||||||
# Raising a deprecation warning here causes errors for autocomplete
|
# Raising a deprecation warning here causes errors for autocomplete
|
||||||
# Handle broken backwards compatibility case: doc[0].sent_start
|
# Handle broken backwards compatibility case: doc[0].sent_start
|
||||||
# was False.
|
# was False.
|
||||||
if self.i == 0:
|
if self.i == 0:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return self.c.sent_start
|
return self.c.sent_start
|
||||||
|
|
||||||
def __set__(self, value):
|
@sent_start.setter
|
||||||
self.is_sent_start = value
|
def sent_start(self, value):
|
||||||
|
self.is_sent_start = value
|
||||||
|
|
||||||
property is_sent_start:
|
@property
|
||||||
|
def is_sent_start(self):
|
||||||
"""A boolean value indicating whether the token starts a sentence.
|
"""A boolean value indicating whether the token starts a sentence.
|
||||||
`None` if unknown. Defaults to `True` for the first token in the `Doc`.
|
`None` if unknown. Defaults to `True` for the first token in the `Doc`.
|
||||||
|
|
||||||
RETURNS (bool / None): Whether the token starts a sentence.
|
RETURNS (bool / None): Whether the token starts a sentence.
|
||||||
None if unknown.
|
None if unknown.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
if self.c.sent_start == 0:
|
||||||
if self.c.sent_start == 0:
|
return None
|
||||||
return None
|
elif self.c.sent_start < 0:
|
||||||
elif self.c.sent_start < 0:
|
return False
|
||||||
return False
|
else:
|
||||||
else:
|
return True
|
||||||
return True
|
|
||||||
|
|
||||||
def __set__(self, value):
|
@is_sent_start.setter
|
||||||
if self.doc.has_annotation("DEP"):
|
def is_sent_start(self, value):
|
||||||
raise ValueError(Errors.E043)
|
if self.doc.has_annotation("DEP"):
|
||||||
if value is None:
|
raise ValueError(Errors.E043)
|
||||||
self.c.sent_start = 0
|
if value is None:
|
||||||
elif value is True:
|
self.c.sent_start = 0
|
||||||
self.c.sent_start = 1
|
elif value is True:
|
||||||
elif value is False:
|
self.c.sent_start = 1
|
||||||
self.c.sent_start = -1
|
elif value is False:
|
||||||
else:
|
self.c.sent_start = -1
|
||||||
raise ValueError(Errors.E044.format(value=value))
|
else:
|
||||||
|
raise ValueError(Errors.E044.format(value=value))
|
||||||
|
|
||||||
property is_sent_end:
|
@property
|
||||||
|
def is_sent_end(self):
|
||||||
"""A boolean value indicating whether the token ends a sentence.
|
"""A boolean value indicating whether the token ends a sentence.
|
||||||
`None` if unknown. Defaults to `True` for the last token in the `Doc`.
|
`None` if unknown. Defaults to `True` for the last token in the `Doc`.
|
||||||
|
|
||||||
|
@ -544,18 +552,18 @@ cdef class Token:
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/token#is_sent_end
|
DOCS: https://spacy.io/api/token#is_sent_end
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
if self.i + 1 == len(self.doc):
|
||||||
if self.i + 1 == len(self.doc):
|
return True
|
||||||
return True
|
elif self.doc[self.i+1].is_sent_start is None:
|
||||||
elif self.doc[self.i+1].is_sent_start is None:
|
return None
|
||||||
return None
|
elif self.doc[self.i+1].is_sent_start is True:
|
||||||
elif self.doc[self.i+1].is_sent_start is True:
|
return True
|
||||||
return True
|
else:
|
||||||
else:
|
return False
|
||||||
return False
|
|
||||||
|
|
||||||
def __set__(self, value):
|
@is_sent_end.setter
|
||||||
raise ValueError(Errors.E196)
|
def is_sent_end(self, value):
|
||||||
|
raise ValueError(Errors.E196)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def lefts(self):
|
def lefts(self):
|
||||||
|
@ -682,41 +690,42 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
return not Token.missing_head(self.c)
|
return not Token.missing_head(self.c)
|
||||||
|
|
||||||
property head:
|
@property
|
||||||
|
def head(self):
|
||||||
"""The syntactic parent, or "governor", of this token.
|
"""The syntactic parent, or "governor", of this token.
|
||||||
If token.has_head() is `False`, this method will return itself.
|
If token.has_head() is `False`, this method will return itself.
|
||||||
|
|
||||||
RETURNS (Token): The token predicted by the parser to be the head of
|
RETURNS (Token): The token predicted by the parser to be the head of
|
||||||
the current token.
|
the current token.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
if not self.has_head():
|
||||||
if not self.has_head():
|
return self
|
||||||
return self
|
else:
|
||||||
else:
|
return self.doc[self.i + self.c.head]
|
||||||
return self.doc[self.i + self.c.head]
|
|
||||||
|
|
||||||
def __set__(self, Token new_head):
|
@head.setter
|
||||||
# This function sets the head of self to new_head and updates the
|
def head(self, Token new_head):
|
||||||
# counters for left/right dependents and left/right corner for the
|
# This function sets the head of self to new_head and updates the
|
||||||
# new and the old head
|
# counters for left/right dependents and left/right corner for the
|
||||||
# Check that token is from the same document
|
# new and the old head
|
||||||
if self.doc != new_head.doc:
|
# Check that token is from the same document
|
||||||
raise ValueError(Errors.E191)
|
if self.doc != new_head.doc:
|
||||||
# Do nothing if old head is new head
|
raise ValueError(Errors.E191)
|
||||||
if self.i + self.c.head == new_head.i:
|
# Do nothing if old head is new head
|
||||||
return
|
if self.i + self.c.head == new_head.i:
|
||||||
# Find the widest l/r_edges of the roots of the two tokens involved
|
return
|
||||||
# to limit the number of tokens for set_children_from_heads
|
# Find the widest l/r_edges of the roots of the two tokens involved
|
||||||
cdef Token self_root, new_head_root
|
# to limit the number of tokens for set_children_from_heads
|
||||||
self_root = ([self] + list(self.ancestors))[-1]
|
cdef Token self_root, new_head_root
|
||||||
new_head_ancestors = list(new_head.ancestors)
|
self_root = ([self] + list(self.ancestors))[-1]
|
||||||
new_head_root = new_head_ancestors[-1] if new_head_ancestors else new_head
|
new_head_ancestors = list(new_head.ancestors)
|
||||||
start = self_root.c.l_edge if self_root.c.l_edge < new_head_root.c.l_edge else new_head_root.c.l_edge
|
new_head_root = new_head_ancestors[-1] if new_head_ancestors else new_head
|
||||||
end = self_root.c.r_edge if self_root.c.r_edge > new_head_root.c.r_edge else new_head_root.c.r_edge
|
start = self_root.c.l_edge if self_root.c.l_edge < new_head_root.c.l_edge else new_head_root.c.l_edge
|
||||||
# Set new head
|
end = self_root.c.r_edge if self_root.c.r_edge > new_head_root.c.r_edge else new_head_root.c.r_edge
|
||||||
self.c.head = new_head.i - self.i
|
# Set new head
|
||||||
# Adjust parse properties and sentence starts
|
self.c.head = new_head.i - self.i
|
||||||
set_children_from_heads(self.doc.c, start, end + 1)
|
# Adjust parse properties and sentence starts
|
||||||
|
set_children_from_heads(self.doc.c, start, end + 1)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def conjuncts(self):
|
def conjuncts(self):
|
||||||
|
@ -744,21 +753,23 @@ cdef class Token:
|
||||||
queue.append(child)
|
queue.append(child)
|
||||||
return tuple([w for w in output if w.i != self.i])
|
return tuple([w for w in output if w.i != self.i])
|
||||||
|
|
||||||
property ent_type:
|
@property
|
||||||
|
def ent_type(self):
|
||||||
"""RETURNS (uint64): Named entity type."""
|
"""RETURNS (uint64): Named entity type."""
|
||||||
def __get__(self):
|
return self.c.ent_type
|
||||||
return self.c.ent_type
|
|
||||||
|
|
||||||
def __set__(self, ent_type):
|
@ent_type.setter
|
||||||
self.c.ent_type = ent_type
|
def ent_type(self, ent_type):
|
||||||
|
self.c.ent_type = ent_type
|
||||||
|
|
||||||
property ent_type_:
|
@property
|
||||||
|
def ent_type_(self):
|
||||||
"""RETURNS (str): Named entity type."""
|
"""RETURNS (str): Named entity type."""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.ent_type]
|
||||||
return self.vocab.strings[self.c.ent_type]
|
|
||||||
|
|
||||||
def __set__(self, ent_type):
|
@ent_type_.setter
|
||||||
self.c.ent_type = self.vocab.strings.add(ent_type)
|
def ent_type_(self, ent_type):
|
||||||
|
self.c.ent_type = self.vocab.strings.add(ent_type)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ent_iob(self):
|
def ent_iob(self):
|
||||||
|
@ -784,41 +795,45 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
return self.iob_strings()[self.c.ent_iob]
|
return self.iob_strings()[self.c.ent_iob]
|
||||||
|
|
||||||
property ent_id:
|
@property
|
||||||
|
def ent_id(self):
|
||||||
"""RETURNS (uint64): ID of the entity the token is an instance of,
|
"""RETURNS (uint64): ID of the entity the token is an instance of,
|
||||||
if any.
|
if any.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.c.ent_id
|
||||||
return self.c.ent_id
|
|
||||||
|
|
||||||
def __set__(self, hash_t key):
|
@ent_id.setter
|
||||||
self.c.ent_id = key
|
def ent_id(self, hash_t key):
|
||||||
|
self.c.ent_id = key
|
||||||
|
|
||||||
property ent_id_:
|
@property
|
||||||
|
def ent_id_(self):
|
||||||
"""RETURNS (str): ID of the entity the token is an instance of,
|
"""RETURNS (str): ID of the entity the token is an instance of,
|
||||||
if any.
|
if any.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.ent_id]
|
||||||
return self.vocab.strings[self.c.ent_id]
|
|
||||||
|
|
||||||
def __set__(self, name):
|
@ent_id_.setter
|
||||||
self.c.ent_id = self.vocab.strings.add(name)
|
def ent_id_(self, name):
|
||||||
|
self.c.ent_id = self.vocab.strings.add(name)
|
||||||
|
|
||||||
property ent_kb_id:
|
@property
|
||||||
|
def ent_kb_id(self):
|
||||||
"""RETURNS (uint64): Named entity KB ID."""
|
"""RETURNS (uint64): Named entity KB ID."""
|
||||||
def __get__(self):
|
return self.c.ent_kb_id
|
||||||
return self.c.ent_kb_id
|
|
||||||
|
|
||||||
def __set__(self, attr_t ent_kb_id):
|
@ent_kb_id.setter
|
||||||
self.c.ent_kb_id = ent_kb_id
|
def ent_kb_id(self, attr_t ent_kb_id):
|
||||||
|
self.c.ent_kb_id = ent_kb_id
|
||||||
|
|
||||||
property ent_kb_id_:
|
@property
|
||||||
|
def ent_kb_id_(self):
|
||||||
"""RETURNS (str): Named entity KB ID."""
|
"""RETURNS (str): Named entity KB ID."""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.ent_kb_id]
|
||||||
return self.vocab.strings[self.c.ent_kb_id]
|
|
||||||
|
|
||||||
def __set__(self, ent_kb_id):
|
@ent_kb_id_.setter
|
||||||
self.c.ent_kb_id = self.vocab.strings.add(ent_kb_id)
|
def ent_kb_id_(self, ent_kb_id):
|
||||||
|
self.c.ent_kb_id = self.vocab.strings.add(ent_kb_id)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def whitespace_(self):
|
def whitespace_(self):
|
||||||
|
@ -840,16 +855,17 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
return self.vocab.strings[self.c.lex.lower]
|
return self.vocab.strings[self.c.lex.lower]
|
||||||
|
|
||||||
property norm_:
|
@property
|
||||||
|
def norm_(self):
|
||||||
"""RETURNS (str): The token's norm, i.e. a normalised form of the
|
"""RETURNS (str): The token's norm, i.e. a normalised form of the
|
||||||
token text. Usually set in the language's tokenizer exceptions or
|
token text. Usually set in the language's tokenizer exceptions or
|
||||||
norm exceptions.
|
norm exceptions.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.norm]
|
||||||
return self.vocab.strings[self.norm]
|
|
||||||
|
|
||||||
def __set__(self, str norm_):
|
@norm_.setter
|
||||||
self.c.norm = self.vocab.strings.add(norm_)
|
def norm_(self, str norm_):
|
||||||
|
self.c.norm = self.vocab.strings.add(norm_)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def shape_(self):
|
def shape_(self):
|
||||||
|
@ -879,33 +895,36 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
return self.vocab.strings[self.c.lex.lang]
|
return self.vocab.strings[self.c.lex.lang]
|
||||||
|
|
||||||
property lemma_:
|
@property
|
||||||
|
def lemma_(self):
|
||||||
"""RETURNS (str): The token lemma, i.e. the base form of the word,
|
"""RETURNS (str): The token lemma, i.e. the base form of the word,
|
||||||
with no inflectional suffixes.
|
with no inflectional suffixes.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.lemma]
|
||||||
return self.vocab.strings[self.c.lemma]
|
|
||||||
|
|
||||||
def __set__(self, str lemma_):
|
@lemma_.setter
|
||||||
self.c.lemma = self.vocab.strings.add(lemma_)
|
def lemma_(self, str lemma_):
|
||||||
|
self.c.lemma = self.vocab.strings.add(lemma_)
|
||||||
|
|
||||||
property pos_:
|
@property
|
||||||
|
def pos_(self):
|
||||||
"""RETURNS (str): Coarse-grained part-of-speech tag."""
|
"""RETURNS (str): Coarse-grained part-of-speech tag."""
|
||||||
def __get__(self):
|
return parts_of_speech.NAMES[self.c.pos]
|
||||||
return parts_of_speech.NAMES[self.c.pos]
|
|
||||||
|
|
||||||
def __set__(self, pos_name):
|
@pos_.setter
|
||||||
if pos_name not in parts_of_speech.IDS:
|
def pos_(self, pos_name):
|
||||||
raise ValueError(Errors.E1021.format(pp=pos_name))
|
if pos_name not in parts_of_speech.IDS:
|
||||||
self.c.pos = parts_of_speech.IDS[pos_name]
|
raise ValueError(Errors.E1021.format(pp=pos_name))
|
||||||
|
self.c.pos = parts_of_speech.IDS[pos_name]
|
||||||
|
|
||||||
property tag_:
|
@property
|
||||||
|
def tag_(self):
|
||||||
"""RETURNS (str): Fine-grained part-of-speech tag."""
|
"""RETURNS (str): Fine-grained part-of-speech tag."""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.tag]
|
||||||
return self.vocab.strings[self.c.tag]
|
|
||||||
|
|
||||||
def __set__(self, tag):
|
@tag_.setter
|
||||||
self.tag = self.vocab.strings.add(tag)
|
def tag_(self, tag):
|
||||||
|
self.tag = self.vocab.strings.add(tag)
|
||||||
|
|
||||||
def has_dep(self):
|
def has_dep(self):
|
||||||
"""Check whether the token has annotated dep information.
|
"""Check whether the token has annotated dep information.
|
||||||
|
@ -915,13 +934,14 @@ cdef class Token:
|
||||||
"""
|
"""
|
||||||
return not Token.missing_dep(self.c)
|
return not Token.missing_dep(self.c)
|
||||||
|
|
||||||
property dep_:
|
@property
|
||||||
|
def dep_(self):
|
||||||
"""RETURNS (str): The syntactic dependency label."""
|
"""RETURNS (str): The syntactic dependency label."""
|
||||||
def __get__(self):
|
return self.vocab.strings[self.c.dep]
|
||||||
return self.vocab.strings[self.c.dep]
|
|
||||||
|
|
||||||
def __set__(self, str label):
|
@dep_.setter
|
||||||
self.c.dep = self.vocab.strings.add(label)
|
def dep_(self, str label):
|
||||||
|
self.c.dep = self.vocab.strings.add(label)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_oov(self):
|
def is_oov(self):
|
||||||
|
|
|
@ -88,23 +88,25 @@ cdef class Example:
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.predicted)
|
return len(self.predicted)
|
||||||
|
|
||||||
property predicted:
|
@property
|
||||||
def __get__(self):
|
def predicted(self):
|
||||||
return self.x
|
return self.x
|
||||||
|
|
||||||
def __set__(self, doc):
|
@predicted.setter
|
||||||
self.x = doc
|
def predicted(self, doc):
|
||||||
self._cached_alignment = None
|
self.x = doc
|
||||||
self._cached_words_x = [t.text for t in doc]
|
self._cached_alignment = None
|
||||||
|
self._cached_words_x = [t.text for t in doc]
|
||||||
|
|
||||||
property reference:
|
@property
|
||||||
def __get__(self):
|
def reference(self):
|
||||||
return self.y
|
return self.y
|
||||||
|
|
||||||
def __set__(self, doc):
|
@reference.setter
|
||||||
self.y = doc
|
def reference(self, doc):
|
||||||
self._cached_alignment = None
|
self.y = doc
|
||||||
self._cached_words_y = [t.text for t in doc]
|
self._cached_alignment = None
|
||||||
|
self._cached_words_y = [t.text for t in doc]
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
return Example(
|
return Example(
|
||||||
|
@ -420,9 +422,9 @@ cdef class Example:
|
||||||
seen_indices.update(indices)
|
seen_indices.update(indices)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
property text:
|
@property
|
||||||
def __get__(self):
|
def text(self):
|
||||||
return self.x.text
|
return self.x.text
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return str(self.to_dict())
|
return str(self.to_dict())
|
||||||
|
|
|
@ -88,16 +88,17 @@ cdef class Vocab:
|
||||||
self.writing_system = writing_system
|
self.writing_system = writing_system
|
||||||
self.get_noun_chunks = get_noun_chunks
|
self.get_noun_chunks = get_noun_chunks
|
||||||
|
|
||||||
property vectors:
|
@property
|
||||||
def __get__(self):
|
def vectors(self):
|
||||||
return self._vectors
|
return self._vectors
|
||||||
|
|
||||||
def __set__(self, vectors):
|
@vectors.setter
|
||||||
if hasattr(vectors, "strings"):
|
def vectors(self, vectors):
|
||||||
for s in vectors.strings:
|
if hasattr(vectors, "strings"):
|
||||||
self.strings.add(s)
|
for s in vectors.strings:
|
||||||
self._vectors = vectors
|
self.strings.add(s)
|
||||||
self._vectors.strings = self.strings
|
self._vectors = vectors
|
||||||
|
self._vectors.strings = self.strings
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def lang(self):
|
def lang(self):
|
||||||
|
@ -464,17 +465,18 @@ cdef class Vocab:
|
||||||
key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
|
key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
|
||||||
return key in self.vectors
|
return key in self.vectors
|
||||||
|
|
||||||
property lookups:
|
@property
|
||||||
def __get__(self):
|
def lookups(self):
|
||||||
return self._lookups
|
return self._lookups
|
||||||
|
|
||||||
def __set__(self, lookups):
|
@lookups.setter
|
||||||
self._lookups = lookups
|
def lookups(self, lookups):
|
||||||
if lookups.has_table("lexeme_norm"):
|
self._lookups = lookups
|
||||||
self.lex_attr_getters[NORM] = util.add_lookups(
|
if lookups.has_table("lexeme_norm"):
|
||||||
self.lex_attr_getters.get(NORM, LEX_ATTRS[NORM]),
|
self.lex_attr_getters[NORM] = util.add_lookups(
|
||||||
self.lookups.get_table("lexeme_norm"),
|
self.lex_attr_getters.get(NORM, LEX_ATTRS[NORM]),
|
||||||
)
|
self.lookups.get_table("lexeme_norm"),
|
||||||
|
)
|
||||||
|
|
||||||
def to_disk(self, path, *, exclude=tuple()):
|
def to_disk(self, path, *, exclude=tuple()):
|
||||||
"""Save the current state to a directory.
|
"""Save the current state to a directory.
|
||||||
|
|
Loading…
Reference in New Issue