2014-08-30 17:00:10 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from spacy.orth import word_shape as ws
|
|
|
|
|
|
|
|
|
|
|
|
def test_capitalized():
|
|
|
|
assert ws('Nasa') == 'Xxxx'
|
|
|
|
|
2015-04-19 19:39:18 +00:00
|
|
|
|
2014-08-30 17:00:10 +00:00
|
|
|
def test_truncate():
|
2014-11-06 17:42:54 +00:00
|
|
|
assert ws('capitalized') == 'xxxx'
|
2014-08-30 17:00:10 +00:00
|
|
|
|
2015-04-19 19:39:18 +00:00
|
|
|
|
2014-08-30 17:00:10 +00:00
|
|
|
def test_digits():
|
2014-11-06 17:42:54 +00:00
|
|
|
assert ws('999999999') == 'dddd'
|
2014-08-30 17:00:10 +00:00
|
|
|
|
2015-04-19 19:39:18 +00:00
|
|
|
|
2014-08-30 17:00:10 +00:00
|
|
|
def test_mix():
|
|
|
|
assert ws('C3P0') == 'XdXd'
|
|
|
|
|
2015-04-19 19:39:18 +00:00
|
|
|
|
2014-08-30 17:00:10 +00:00
|
|
|
def test_punct():
|
|
|
|
assert ws(',') == ','
|
|
|
|
|
2015-04-19 19:39:18 +00:00
|
|
|
|
2014-08-30 17:00:10 +00:00
|
|
|
def test_space():
|
|
|
|
assert ws('\n') == '\n'
|
|
|
|
|
2015-04-19 19:39:18 +00:00
|
|
|
|
2014-08-30 17:00:10 +00:00
|
|
|
def test_punct_seq():
|
|
|
|
assert ws('``,-') == '``,-'
|