mirror of https://github.com/explosion/spaCy.git
* Add test for Issue #242: Overlapping matches not well recognised.
This commit is contained in:
parent
b98cc3266d
commit
f12b043308
|
@ -1,8 +1,11 @@
|
||||||
import pytest
|
import pytest
|
||||||
import numpy
|
import numpy
|
||||||
|
import os
|
||||||
|
|
||||||
|
import spacy
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.attrs import ORTH, LOWER, ENT_IOB, ENT_TYPE
|
from spacy.attrs import ORTH, LOWER, ENT_IOB, ENT_TYPE
|
||||||
|
from spacy.attrs import ORTH, TAG, LOWER, IS_ALPHA, FLAG63
|
||||||
from spacy.symbols import DATE
|
from spacy.symbols import DATE
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,6 +34,30 @@ def test_overlap_issue118(EN):
|
||||||
assert ents[0].end == 11
|
assert ents[0].end == 11
|
||||||
|
|
||||||
|
|
||||||
|
def test_overlap_issue242():
|
||||||
|
'''Test bug from multi-word phrases breaking text representation.'''
|
||||||
|
|
||||||
|
patterns = [
|
||||||
|
[{LOWER: 'food'}, {LOWER: 'safety'}],
|
||||||
|
[{LOWER: 'safety'}, {LOWER: 'standards'}],
|
||||||
|
]
|
||||||
|
|
||||||
|
if os.environ.get('SPACY_DATA'):
|
||||||
|
data_dir = os.environ.get('SPACY_DATA')
|
||||||
|
else:
|
||||||
|
data_dir = None
|
||||||
|
|
||||||
|
nlp = spacy.en.English(data_dir=data_dir, tagger=False, parser=False, entity=False)
|
||||||
|
|
||||||
|
nlp.matcher.add('FOOD', 'FOOD', {}, patterns)
|
||||||
|
|
||||||
|
doc = nlp(u'There are different food safety standards in different countries.')
|
||||||
|
|
||||||
|
food_safety, safety_standards = doc.ents
|
||||||
|
assert food_safety.text == u'food safety'
|
||||||
|
assert safety_standards.text == u'safety standards'
|
||||||
|
|
||||||
|
|
||||||
def test_overlap_reorder(EN):
|
def test_overlap_reorder(EN):
|
||||||
'''Test order dependence'''
|
'''Test order dependence'''
|
||||||
doc = EN.tokenizer(u'how many points did lebron james score against the boston celtics last night')
|
doc = EN.tokenizer(u'how many points did lebron james score against the boston celtics last night')
|
||||||
|
|
Loading…
Reference in New Issue