From cfa406214715548a1e605ce1c5cfd27947c44523 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 12 Nov 2015 04:56:07 +1100 Subject: [PATCH] * Add test for matcher end-point problem --- spacy/tests/parser/test_ner.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index 935aab4cf..182225758 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -1,6 +1,10 @@ from __future__ import unicode_literals, print_function import pytest +from spacy.attrs import LOWER +from spacy.matcher import Matcher + + @pytest.mark.models def test_simple_types(EN): tokens = EN(u'Mr. Best flew to New York on Saturday morning.') @@ -21,3 +25,28 @@ def test_consistency_bug(EN): tokens = EN(u'''Charity and other short-term aid have buoyed them so far, and a tax-relief bill working its way through Congress would help. But the September 11 Victim Compensation Fund, enacted by Congress to discourage people from filing lawsuits, will determine the shape of their lives for years to come.\n\n''', entity=False) ents = EN.matcher(tokens) EN.entity(tokens) + + +@pytest.mark.models +def test_unit_end_gazetteer(EN): + '''Test a bug in the interaction between the NER model and the gazetteer''' + matcher = Matcher(EN.vocab, + {'MemberNames': + ('PERSON', {}, + [ + [{LOWER: 'cal'}], + [{LOWER: 'cal'}, {LOWER: 'henderson'}], + ] + ) + } + ) + + doc = EN(u'who is cal the manager of?') + if len(list(doc.ents)) == 0: + ents = matcher(doc) + assert len(ents) == 1 + nlp.entity(doc) + assert list(nlp.ents)[0].text == 'cal' + + +