mirror of https://github.com/explosion/spaCy.git
22 lines
626 B
Python
22 lines
626 B
Python
|
from spacy.util import detokenize
|
||
|
|
||
|
def test_punct():
|
||
|
tokens = 'Pierre Vinken , 61 years old .'.split()
|
||
|
detoks = [(0,), (1, 2), (3,), (4,), (5, 6)]
|
||
|
token_rules = ('<SEP>,', '<SEP>.')
|
||
|
assert detokenize(token_rules, tokens) == detoks
|
||
|
|
||
|
|
||
|
def test_contractions():
|
||
|
tokens = "I ca n't even".split()
|
||
|
detoks = [(0,), (1, 2), (3,)]
|
||
|
token_rules = ("ca<SEP>n't",)
|
||
|
assert detokenize(token_rules, tokens) == detoks
|
||
|
|
||
|
|
||
|
def test_contractions_punct():
|
||
|
tokens = "I ca n't !".split()
|
||
|
detoks = [(0,), (1, 2, 3)]
|
||
|
token_rules = ("ca<SEP>n't", '<SEP>!')
|
||
|
assert detokenize(token_rules, tokens) == detoks
|