mirror of https://github.com/explosion/spaCy.git
12 lines
376 B
Python
12 lines
376 B
Python
|
# coding: utf-8
|
|||
|
from __future__ import unicode_literals
|
|||
|
|
|||
|
|
|||
|
def test_issue3277(es_tokenizer):
|
|||
|
"""Test that hyphens are split correctly as prefixes."""
|
|||
|
doc = es_tokenizer("—Yo me llamo... –murmuró el niño– Emilio Sánchez Pérez.")
|
|||
|
assert len(doc) == 14
|
|||
|
assert doc[0].text == "\u2014"
|
|||
|
assert doc[5].text == "\u2013"
|
|||
|
assert doc[9].text == "\u2013"
|