mirror of https://github.com/explosion/spaCy.git
* Commit outstanding tests
This commit is contained in:
parent
7e0a9077dd
commit
60c1e78596
|
@ -0,0 +1,35 @@
|
||||||
|
from spacy.util import align_tokens
|
||||||
|
|
||||||
|
|
||||||
|
def test_perfect_align():
|
||||||
|
ref = ['I', 'align', 'perfectly']
|
||||||
|
indices = []
|
||||||
|
i = 0
|
||||||
|
for token in ref:
|
||||||
|
indices.append((i, i + len(token)))
|
||||||
|
i += len(token)
|
||||||
|
aligned = list(align_tokens(ref, indices))
|
||||||
|
assert aligned[0] == ('I', [(0, 1)])
|
||||||
|
assert aligned[1] == ('align', [(1, 6)])
|
||||||
|
assert aligned[2] == ('perfectly', [(6, 15)])
|
||||||
|
|
||||||
|
|
||||||
|
def test_hyphen_align():
|
||||||
|
ref = ['I', 'must', 're-align']
|
||||||
|
indices = [(0, 1), (1, 5), (5, 7), (7, 8), (8, 13)]
|
||||||
|
aligned = list(align_tokens(ref, indices))
|
||||||
|
assert aligned[0] == ('I', [(0, 1)])
|
||||||
|
assert aligned[1] == ('must', [(1, 5)])
|
||||||
|
assert aligned[2] == ('re-align', [(5, 7), (7, 8), (8, 13)])
|
||||||
|
|
||||||
|
|
||||||
|
def test_align_continue():
|
||||||
|
ref = ['I', 'must', 're-align', 'and', 'continue']
|
||||||
|
indices = [(0, 1), (1, 5), (5, 7), (7, 8), (8, 13), (13, 16), (16, 24)]
|
||||||
|
aligned = list(align_tokens(ref, indices))
|
||||||
|
assert aligned[2] == ('re-align', [(5, 7), (7, 8), (8, 13)])
|
||||||
|
assert aligned[3] == ('and', [(13, 16)])
|
||||||
|
assert aligned[4] == ('continue', [(16, 24)])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from spacy.en import EN
|
||||||
|
|
||||||
|
def test1():
|
||||||
|
words = ['JAPAN', 'GET', 'LUCKY']
|
||||||
|
tokens = EN.tokens_from_list(words)
|
||||||
|
assert len(tokens) == 3
|
||||||
|
assert tokens[0].string == 'JAPAN'
|
Loading…
Reference in New Issue