From b40b4c2c31c7e43f7cee1f491e57d444bf1fd6d1 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 11 Jul 2019 12:55:11 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=AB=20Fix=20issue=20#3839:=20Incorrect?= =?UTF-8?q?=20entity=20IDs=20from=20Matcher=20with=20operators=20(#3949)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add regression test for issue #3541 * Add comment on bugfix * Remove incorrect test * Un-xfail test --- spacy/matcher/matcher.pyx | 8 ++++---- spacy/tests/regression/test_issue3839.py | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 2dd8c2940..86658ce99 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -262,13 +262,13 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc, extensions=None, cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil: + # There have been a few bugs here. # The code was originally designed to always have pattern[1].attrs.value # be the ent_id when we get to the end of a pattern. However, Issue #2671 # showed this wasn't the case when we had a reject-and-continue before a - # match. I still don't really understand what's going on here, but this - # workaround does resolve the issue. - while pattern.attrs.attr != ID and \ - (pattern.nr_attr > 0 or pattern.nr_extra_attr > 0 or pattern.nr_py > 0): + # match. + # The patch to #2671 was wrong though, which came up in #3839. + while pattern.attrs.attr != ID: pattern += 1 return pattern.attrs.value diff --git a/spacy/tests/regression/test_issue3839.py b/spacy/tests/regression/test_issue3839.py index fa915faf0..34d6bb46e 100644 --- a/spacy/tests/regression/test_issue3839.py +++ b/spacy/tests/regression/test_issue3839.py @@ -6,7 +6,6 @@ from spacy.matcher import Matcher from spacy.tokens import Doc -@pytest.mark.xfail def test_issue3839(en_vocab): """Test that match IDs returned by the matcher are correct, are in the string """ doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])