* Commit outstanding tests

2014-11-12 23:24:32 +11:00 · 2014-11-12 23:24:32 +11:00 · 60c1e78596
parent 7e0a9077dd
commit 60c1e78596
2 changed files with 44 additions and 0 deletions
--- a/tests/test_align.py
+++ b/tests/test_align.py
@ -0,0 +1,35 @@
 from spacy.util import align_tokens
 def test_perfect_align():
    ref = ['I', 'align', 'perfectly']
    indices = []
    i = 0
    for token in ref:
        indices.append((i, i + len(token)))
        i += len(token)
    aligned = list(align_tokens(ref, indices))
    assert aligned[0] == ('I', [(0, 1)])
    assert aligned[1] == ('align', [(1, 6)])
    assert aligned[2] == ('perfectly', [(6, 15)])
 def test_hyphen_align():
    ref = ['I', 'must', 're-align']
    indices = [(0, 1), (1, 5), (5, 7), (7, 8), (8, 13)]
    aligned = list(align_tokens(ref, indices))
    assert aligned[0] == ('I', [(0, 1)])
    assert aligned[1] == ('must', [(1, 5)])
    assert aligned[2] == ('re-align', [(5, 7), (7, 8), (8, 13)])
 def test_align_continue():
    ref = ['I', 'must', 're-align', 'and', 'continue']
    indices = [(0, 1), (1, 5), (5, 7), (7, 8), (8, 13), (13, 16), (16, 24)]
    aligned = list(align_tokens(ref, indices))
    assert aligned[2] == ('re-align', [(5, 7), (7, 8), (8, 13)])
    assert aligned[3] == ('and', [(13, 16)])
    assert aligned[4] == ('continue', [(16, 24)])
--- a/tests/test_tokens_from_list.py
+++ b/tests/test_tokens_from_list.py
@ -0,0 +1,9 @@
 from __future__ import unicode_literals
 from spacy.en import EN
 def test1():
    words = ['JAPAN', 'GET', 'LUCKY']
    tokens = EN.tokens_from_list(words)
    assert len(tokens) == 3
    assert tokens[0].string == 'JAPAN'