2020-01-06 13:57:34 +00:00
|
|
|
# coding: utf8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from spacy.lang.en import English
|
|
|
|
from spacy.pipeline import EntityRuler
|
|
|
|
|
|
|
|
|
|
|
|
def test_issue4849():
|
|
|
|
nlp = English()
|
|
|
|
|
|
|
|
ruler = EntityRuler(
|
2020-03-25 11:28:12 +00:00
|
|
|
nlp,
|
|
|
|
patterns=[
|
|
|
|
{"label": "PERSON", "pattern": "joe biden", "id": "joe-biden"},
|
|
|
|
{"label": "PERSON", "pattern": "bernie sanders", "id": "bernie-sanders"},
|
2020-01-06 13:57:34 +00:00
|
|
|
],
|
2020-03-25 11:28:12 +00:00
|
|
|
phrase_matcher_attr="LOWER",
|
2020-01-06 13:57:34 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
nlp.add_pipe(ruler)
|
|
|
|
|
|
|
|
text = """
|
|
|
|
The left is starting to take aim at Democratic front-runner Joe Biden.
|
|
|
|
Sen. Bernie Sanders joined in her criticism: "There is no 'middle ground' when it comes to climate policy."
|
|
|
|
"""
|
|
|
|
|
|
|
|
# USING 1 PROCESS
|
|
|
|
count_ents = 0
|
|
|
|
for doc in nlp.pipe([text], n_process=1):
|
|
|
|
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
|
2020-03-25 11:28:12 +00:00
|
|
|
assert count_ents == 2
|
2020-01-06 13:57:34 +00:00
|
|
|
|
|
|
|
# USING 2 PROCESSES
|
|
|
|
count_ents = 0
|
|
|
|
for doc in nlp.pipe([text], n_process=2):
|
|
|
|
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
|
2020-03-25 11:28:12 +00:00
|
|
|
assert count_ents == 2
|