2014-08-30 17:00:10 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from spacy.orth import word_shape as ws
|
|
|
|
|
|
|
|
|
|
|
|
def test_capitalized():
|
|
|
|
assert ws('Nasa') == 'Xxxx'
|
|
|
|
|
|
|
|
def test_truncate():
|
2014-09-01 21:26:17 +00:00
|
|
|
assert ws('capitalized') == 'xxxxx'
|
2014-08-30 17:00:10 +00:00
|
|
|
|
|
|
|
def test_digits():
|
2014-09-01 21:26:17 +00:00
|
|
|
assert ws('999999999') == 'ddddd'
|
2014-08-30 17:00:10 +00:00
|
|
|
|
|
|
|
def test_mix():
|
|
|
|
assert ws('C3P0') == 'XdXd'
|
|
|
|
|
|
|
|
def test_punct():
|
|
|
|
assert ws(',') == ','
|
|
|
|
|
|
|
|
def test_space():
|
|
|
|
assert ws('\n') == '\n'
|
|
|
|
|
|
|
|
def test_punct_seq():
|
|
|
|
assert ws('``,-') == '``,-'
|