* Upd tokenizer with i.e. tests

This commit is contained in:
Matthew Honnibal 2015-02-18 06:37:04 -05:00
parent ba1d3ddd7f
commit 47a4371fea
1 changed files with 7 additions and 0 deletions

View File

@ -119,6 +119,13 @@ def test_bracket_period(EN):
tokens = EN(text) tokens = EN(text)
assert tokens[len(tokens) - 1].orth_ == u'.' assert tokens[len(tokens) - 1].orth_ == u'.'
def test_ie(EN):
text = u"It's mediocre i.e. bad."
tokens = EN(text)
assert len(tokens) == 6
assert tokens[3].orth_ == "i.e."
#def test_cnts7(): #def test_cnts7():
# text = 'But then the 6,000-year ice age came...' # text = 'But then the 6,000-year ice age came...'
# tokens = EN.tokenize(text) # tokens = EN.tokenize(text)