spaCy/spacy/tests/regression/test_issue1547.py

18 lines
492 B
Python

# coding: utf8
from __future__ import unicode_literals
import pytest
from ...vocab import Vocab
from ...tokens import Doc, Span
@pytest.mark.xfail
def test_issue1547():
"""Test that entity labels still match after merging tokens."""
words = ['\n', 'worda', '.', '\n', 'wordb', '-', 'Biosphere', '2', '-', ' \n']
doc = Doc(Vocab(), words=words)
doc.ents = [Span(doc, 6, 8, label=doc.vocab.strings['PRODUCT'])]
doc[5:7].merge()
assert [ent.text for ent in doc.ents]