Support Any comparisons for Token and Span (#13058)

* Support Any comparisons for Token and Span

* Preserve previous behavior for None
This commit is contained in:
Adriane Boyd 2023-10-12 11:53:33 +02:00 committed by GitHub
parent d72029d9c8
commit ea1befa8ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 35 additions and 6 deletions

View File

@ -731,3 +731,12 @@ def test_for_no_ent_sents():
sents = list(doc.ents[0].sents) sents = list(doc.ents[0].sents)
assert len(sents) == 1 assert len(sents) == 1
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY" assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
def test_span_api_richcmp_other(en_tokenizer):
doc1 = en_tokenizer("a b")
doc2 = en_tokenizer("b c")
assert not doc1[1:2] == doc1[1]
assert not doc1[1:2] == doc2[0]
assert not doc1[1:2] == doc2[0:1]
assert not doc1[0:1] == doc2

View File

@ -294,3 +294,12 @@ def test_missing_head_dep(en_vocab):
assert aligned_heads[0] == ref_heads[0] assert aligned_heads[0] == ref_heads[0]
assert aligned_deps[5] == ref_deps[5] assert aligned_deps[5] == ref_deps[5]
assert aligned_heads[5] == ref_heads[5] assert aligned_heads[5] == ref_heads[5]
def test_token_api_richcmp_other(en_tokenizer):
doc1 = en_tokenizer("a b")
doc2 = en_tokenizer("b c")
assert not doc1[1] == doc1[0:1]
assert not doc1[1] == doc2[1:2]
assert not doc1[1] == doc2[0]
assert not doc1[0] == doc2

View File

@ -127,14 +127,17 @@ cdef class Span:
self._vector = vector self._vector = vector
self._vector_norm = vector_norm self._vector_norm = vector_norm
def __richcmp__(self, Span other, int op): def __richcmp__(self, object other, int op):
if other is None: if other is None:
if op == 0 or op == 1 or op == 2: if op == 0 or op == 1 or op == 2:
return False return False
else: else:
return True return True
if not isinstance(other, Span):
return False
cdef Span other_span = other
self_tuple = (self.c.start_char, self.c.end_char, self.c.label, self.c.kb_id, self.id, self.doc) self_tuple = (self.c.start_char, self.c.end_char, self.c.label, self.c.kb_id, self.id, self.doc)
other_tuple = (other.c.start_char, other.c.end_char, other.c.label, other.c.kb_id, other.id, other.doc) other_tuple = (other_span.c.start_char, other_span.c.end_char, other_span.c.label, other_span.c.kb_id, other_span.id, other_span.doc)
# < # <
if op == 0: if op == 0:
return self_tuple < other_tuple return self_tuple < other_tuple

View File

@ -53,7 +53,12 @@ class Token:
def __bytes__(self) -> bytes: ... def __bytes__(self) -> bytes: ...
def __str__(self) -> str: ... def __str__(self) -> str: ...
def __repr__(self) -> str: ... def __repr__(self) -> str: ...
def __richcmp__(self, other: Token, op: int) -> bool: ... def __lt__(self, other: Any) -> bool: ...
def __le__(self, other: Any) -> bool: ...
def __eq__(self, other: Any) -> bool: ...
def __ne__(self, other: Any) -> bool: ...
def __gt__(self, other: Any) -> bool: ...
def __ge__(self, other: Any) -> bool: ...
@property @property
def _(self) -> Underscore: ... def _(self) -> Underscore: ...
def nbor(self, i: int = ...) -> Token: ... def nbor(self, i: int = ...) -> Token: ...

View File

@ -139,17 +139,20 @@ cdef class Token:
def __repr__(self): def __repr__(self):
return self.__str__() return self.__str__()
def __richcmp__(self, Token other, int op): def __richcmp__(self, object other, int op):
# http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html # http://cython.readthedocs.io/en/latest/src/userguide/special_methods.html
if other is None: if other is None:
if op in (0, 1, 2): if op in (0, 1, 2):
return False return False
else: else:
return True return True
if not isinstance(other, Token):
return False
cdef Token other_token = other
cdef Doc my_doc = self.doc cdef Doc my_doc = self.doc
cdef Doc other_doc = other.doc cdef Doc other_doc = other_token.doc
my = self.idx my = self.idx
their = other.idx their = other_token.idx
if op == 0: if op == 0:
return my < their return my < their
elif op == 2: elif op == 2: