From d94e961f14af61dba4f01e0e2821217f38b85fbf Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 23 Jun 2020 13:29:51 +0200 Subject: [PATCH] Fix polarity of Token.is_oov and Lexeme.is_oov (#5634) Fix `Token.is_oov` and `Lexeme.is_oov` so they return `True` when the lexeme does **not** have a vector. --- spacy/lexeme.pyx | 2 +- spacy/tests/vocab_vectors/test_vectors.py | 6 +++--- spacy/tokens/token.pyx | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 1df516dcb..8042098d7 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -349,7 +349,7 @@ cdef class Lexeme: @property def is_oov(self): """RETURNS (bool): Whether the lexeme is out-of-vocabulary.""" - return self.orth in self.vocab.vectors + return self.orth not in self.vocab.vectors property is_stop: """RETURNS (bool): Whether the lexeme is a stop word.""" diff --git a/spacy/tests/vocab_vectors/test_vectors.py b/spacy/tests/vocab_vectors/test_vectors.py index 576ca93d2..b31cef1f2 100644 --- a/spacy/tests/vocab_vectors/test_vectors.py +++ b/spacy/tests/vocab_vectors/test_vectors.py @@ -376,6 +376,6 @@ def test_vector_is_oov(): data[1] = 2.0 vocab.set_vector("cat", data[0]) vocab.set_vector("dog", data[1]) - assert vocab["cat"].is_oov is True - assert vocab["dog"].is_oov is True - assert vocab["hamster"].is_oov is False + assert vocab["cat"].is_oov is False + assert vocab["dog"].is_oov is False + assert vocab["hamster"].is_oov is True diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 45deebc93..8d3406bae 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -923,7 +923,7 @@ cdef class Token: @property def is_oov(self): """RETURNS (bool): Whether the token is out-of-vocabulary.""" - return self.c.lex.orth in self.vocab.vectors + return self.c.lex.orth not in self.vocab.vectors @property def is_stop(self):