Fix polarity of Token.is_oov and Lexeme.is_oov (#5634)

Fix `Token.is_oov` and `Lexeme.is_oov` so they return `True` when the
lexeme does **not** have a vector.
This commit is contained in:
Adriane Boyd 2020-06-23 13:29:51 +02:00 committed by GitHub
parent 0ef78bad93
commit d94e961f14
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 5 deletions

View File

@ -349,7 +349,7 @@ cdef class Lexeme:
@property @property
def is_oov(self): def is_oov(self):
"""RETURNS (bool): Whether the lexeme is out-of-vocabulary.""" """RETURNS (bool): Whether the lexeme is out-of-vocabulary."""
return self.orth in self.vocab.vectors return self.orth not in self.vocab.vectors
property is_stop: property is_stop:
"""RETURNS (bool): Whether the lexeme is a stop word.""" """RETURNS (bool): Whether the lexeme is a stop word."""

View File

@ -376,6 +376,6 @@ def test_vector_is_oov():
data[1] = 2.0 data[1] = 2.0
vocab.set_vector("cat", data[0]) vocab.set_vector("cat", data[0])
vocab.set_vector("dog", data[1]) vocab.set_vector("dog", data[1])
assert vocab["cat"].is_oov is True assert vocab["cat"].is_oov is False
assert vocab["dog"].is_oov is True assert vocab["dog"].is_oov is False
assert vocab["hamster"].is_oov is False assert vocab["hamster"].is_oov is True

View File

@ -923,7 +923,7 @@ cdef class Token:
@property @property
def is_oov(self): def is_oov(self):
"""RETURNS (bool): Whether the token is out-of-vocabulary.""" """RETURNS (bool): Whether the token is out-of-vocabulary."""
return self.c.lex.orth in self.vocab.vectors return self.c.lex.orth not in self.vocab.vectors
@property @property
def is_stop(self): def is_stop(self):