From 613a195f9243272c33d31580e42c50388a24d88c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 30 Jan 2015 16:44:29 +1100 Subject: [PATCH] * Add test for indices --- tests/test_indices.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/test_indices.py diff --git a/tests/test_indices.py b/tests/test_indices.py new file mode 100644 index 000000000..ecd2e610c --- /dev/null +++ b/tests/test_indices.py @@ -0,0 +1,39 @@ +"""Test that token.idx correctly computes index into the original string.""" + +from __future__ import unicode_literals + +import pytest +from spacy.en import English + + +@pytest.fixture +def nlp(): + nlp = English() + return nlp.tokenizer + + +def test_simple_punct(nlp): + text = 'to walk, do foo' + tokens = nlp(text) + assert tokens[0].idx == 0 + assert tokens[1].idx == 3 + assert tokens[2].idx == 7 + assert tokens[3].idx == 9 + assert tokens[4].idx == 12 + + +def test_complex_punct(nlp): + text = 'Tom (D., Ill.)!' + tokens = nlp(text) + assert tokens[0].idx == 0 + assert len(tokens[0]) == 3 + assert tokens[1].idx == 4 + assert len(tokens[1]) == 1 + assert tokens[2].idx == 5 + assert len(tokens[2]) == 2 + assert tokens[3].idx == 7 + assert len(tokens[3]) == 1 + assert tokens[4].idx == 9 + assert len(tokens[4]) == 4 + assert tokens[5].idx == 13 + assert tokens[6].idx == 14