From 99b5cefa88a96f8a98cf61793132e75801144a3f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 2 Nov 2014 13:22:14 +1100 Subject: [PATCH] * Add tests for emoticon tokenization --- tests/test_emoticons.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/test_emoticons.py diff --git a/tests/test_emoticons.py b/tests/test_emoticons.py new file mode 100644 index 000000000..6bb58e661 --- /dev/null +++ b/tests/test_emoticons.py @@ -0,0 +1,29 @@ +from __future__ import unicode_literals + +from spacy.en import EN + +def test_tweebo_challenge(): + text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ....""" + tokens = EN.tokenize(text) + assert tokens[0].string == ":o" + assert tokens[1].string == ":/" + assert tokens[2].string == ":'(" + assert tokens[3].string == ">:o" + assert tokens[4].string == "(:" + assert tokens[5].string == ":)" + assert tokens[6].string == ">.<" + assert tokens[7].string == "XD" + assert tokens[8].string == "-__-" + assert tokens[9].string == "o.O" + assert tokens[10].string == ";D" + assert tokens[11].string == ":-)" + assert tokens[12].string == "@_@" + assert tokens[13].string == ":P" + assert tokens[14].string == "8D" + assert tokens[15].string == ":1" + assert tokens[16].string == ">:(" + assert tokens[17].string == ":D" + assert tokens[18].string == "=|" + assert tokens[19].string == '")' + assert tokens[20].string == ':>' + assert tokens[21].string == '....'