diff --git a/tests/test_group_by.py b/tests/test_group_by.py new file mode 100644 index 000000000..2f9dd6ce0 --- /dev/null +++ b/tests/test_group_by.py @@ -0,0 +1,35 @@ +from __future__ import unicode_literals + +import pytest + +from spacy import en +from spacy.lexeme import lex_of + +from spacy import SIC, LEX, NORM, SHAPE, LAST3 + + +def test_group_by_lex(): + tokens = en.tokenize("I like the red one and I like the blue one") + names, hashes, groups = tokens.group_by(LEX) + + assert len(groups[0]) == 2 + assert en.unhash(lex_of(groups[0][0])) == 'I' + assert names[0] == 'I' + assert len(groups[1]) == 2 + assert en.unhash(lex_of(groups[1][0])) == 'like' + assert names[1] == "like" + assert len(groups[2]) == 2 + assert len(groups[3]) == 1 + + +def test_group_by_last3(): + tokens = en.tokenize("I the blithe swarthy mate ate on the filthy deck") + names, hashes, groups = tokens.group_by(LAST3) + + assert len(groups[0]) == 1 + assert en.unhash(lex_of(groups[0][0])) == 'I' + assert len(groups[1]) == 3 + assert en.unhash(lex_of(groups[1][0])) == 'the' + assert len(groups[2]) == 2 + assert len(groups[3]) == 2 + assert len(groups[4]) == 1 diff --git a/tests/test_orth.py b/tests/test_orth.py new file mode 100644 index 000000000..8d9939f4c --- /dev/null +++ b/tests/test_orth.py @@ -0,0 +1,16 @@ +from __future__ import unicode_literals + +import pytest + +from spacy.en import lookup, unhash + +from spacy.lexeme import sic_of, lex_of, norm_of, shape_of, first_of +from spacy.lexeme import shape_of + +@pytest.fixture +def C3P0(): + return lookup("C3P0") + + +def test_shape(C3P0): + assert unhash(shape_of(C3P0)) == "XdXd"