mirror of https://github.com/explosion/spaCy.git
Modernise Doc parse tree navigation tests and don't depend on models
This commit is contained in:
parent
7262421bb2
commit
55d151aa61
|
@ -1,35 +1,74 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
from os import path
|
||||
import io
|
||||
|
||||
from ..util import get_doc
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sun_text():
|
||||
with io.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r',
|
||||
encoding='utf8') as file_:
|
||||
text = file_.read()
|
||||
return text
|
||||
def text():
|
||||
return u"""
|
||||
It was a bright cold day in April, and the clocks were striking thirteen.
|
||||
Winston Smith, his chin nuzzled into his breast in an effort to escape the
|
||||
vile wind, slipped quickly through the glass doors of Victory Mansions,
|
||||
though not quickly enough to prevent a swirl of gritty dust from entering
|
||||
along with him.
|
||||
|
||||
The hallway smelt of boiled cabbage and old rag mats. At one end of it a
|
||||
coloured poster, too large for indoor display, had been tacked to the wall.
|
||||
It depicted simply an enormous face, more than a metre wide: the face of a
|
||||
man of about forty-five, with a heavy black moustache and ruggedly handsome
|
||||
features. Winston made for the stairs. It was no use trying the lift. Even at
|
||||
the best of times it was seldom working, and at present the electric current
|
||||
was cut off during daylight hours. It was part of the economy drive in
|
||||
preparation for Hate Week. The flat was seven flights up, and Winston, who
|
||||
was thirty-nine and had a varicose ulcer above his right ankle, went slowly,
|
||||
resting several times on the way. On each landing, opposite the lift-shaft,
|
||||
the poster with the enormous face gazed from the wall. It was one of those
|
||||
pictures which are so contrived that the eyes follow you about when you move.
|
||||
BIG BROTHER IS WATCHING YOU, the caption beneath it ran.
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.models
|
||||
def test_consistency(EN, sun_text):
|
||||
tokens = EN(sun_text)
|
||||
for head in tokens:
|
||||
@pytest.fixture
|
||||
def heads():
|
||||
return [1, 1, 0, 3, 2, 1, -4, -1, -1, -7, -8, 1, -10, 2, 1, -3, -1, -15,
|
||||
-1, 1, 4, -1, 1, -3, 0, -1, 1, -2, -4, 1, -2, 1, -2, 3, -1, 1,
|
||||
-4, -13, -14, -1, -2, 2, 1, -3, -1, 1, -2, -9, -1, 3, 1, 1, -14,
|
||||
1, -2, 1, -2, -1, 1, -2, -6, -1, -1, -2, -1, -1, -42, -1, 2, 1,
|
||||
0, -1, 1, -2, -1, 2, 1, -4, -8, 0, 1, -2, -1, -1, 3, -1, 1, -6,
|
||||
9, 1, 7, -1, 1, -2, 3, 2, 1, -10, -1, 1, -2, -22, -1, 1, 0, -1,
|
||||
2, 1, -4, -1, -2, -1, 1, -2, -6, -7, 1, -9, -1, 2, -1, -3, -1,
|
||||
3, 2, 1, -4, -19, -24, 3, 2, 1, -4, -1, 1, 2, -1, -5, -34, 1, 0,
|
||||
-1, 1, -2, -4, 1, 0, 1, -2, -1, 1, -2, -6, 1, 9, -1, 1, -3, -1,
|
||||
-1, 3, 2, 1, 0, -1, -2, 7, -1, 5, 1, 3, -1, 1, -10, -1, -2, 1,
|
||||
-2, -15, 1, 0, -1, -1, 2, 1, -3, -1, -1, -2, -1, 1, -2, -12, 1,
|
||||
1, 0, 1, -2, -1, -2, -3, 9, -1, 2, -1, -4, 2, 1, -3, -4, -15, 2,
|
||||
1, -3, -1, 2, 1, -3, -8, -9, -1, -2, -1, -4, 1, -2, -3, 1, -2,
|
||||
-19, 17, 1, -2, 14, 13, 3, 2, 1, -4, 8, -1, 1, 5, -1, 2, 1, -3,
|
||||
0, -1, 1, -2, -4, 1, 0, -1, -1, 2, -1, -3, 1, -2, 1, -2, 3, 1,
|
||||
1, -4, -1, -2, 2, 1, -5, -19, -1, 1, 1, 0, 1, 6, -1, 1, -3, -1,
|
||||
-1, -8, -9, -1]
|
||||
|
||||
|
||||
def test_parser_parse_navigate_consistency(en_tokenizer, text, heads):
|
||||
tokens = en_tokenizer(text)
|
||||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
||||
for head in doc:
|
||||
for child in head.lefts:
|
||||
assert child.head is head
|
||||
for child in head.rights:
|
||||
assert child.head is head
|
||||
|
||||
|
||||
@pytest.mark.models
|
||||
def test_child_consistency(EN, sun_text):
|
||||
tokens = EN(sun_text)
|
||||
def test_parser_parse_navigate_child_consistency(en_tokenizer, text, heads):
|
||||
tokens = en_tokenizer(text)
|
||||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
||||
|
||||
lefts = {}
|
||||
rights = {}
|
||||
for head in tokens:
|
||||
for head in doc:
|
||||
assert head.i not in lefts
|
||||
lefts[head.i] = set()
|
||||
for left in head.lefts:
|
||||
|
@ -38,10 +77,10 @@ def test_child_consistency(EN, sun_text):
|
|||
rights[head.i] = set()
|
||||
for right in head.rights:
|
||||
rights[head.i].add(right.i)
|
||||
for head in tokens:
|
||||
for head in doc:
|
||||
assert head.n_rights == len(rights[head.i])
|
||||
assert head.n_lefts == len(lefts[head.i])
|
||||
for child in tokens:
|
||||
for child in doc:
|
||||
if child.i < child.head.i:
|
||||
assert child.i in lefts[child.head.i]
|
||||
assert child.i not in rights[child.head.i]
|
||||
|
@ -56,12 +95,12 @@ def test_child_consistency(EN, sun_text):
|
|||
assert not children
|
||||
|
||||
|
||||
@pytest.mark.models
|
||||
def test_edges(EN, sun_text):
|
||||
tokens = EN(sun_text)
|
||||
for token in tokens:
|
||||
def test_parser_parse_navigate_edges(en_tokenizer, text, heads):
|
||||
tokens = en_tokenizer(text)
|
||||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
||||
for token in doc:
|
||||
subtree = list(token.subtree)
|
||||
debug = '\t'.join((token.orth_, token.left_edge.orth_, subtree[0].orth_))
|
||||
debug = '\t'.join((token.text, token.left_edge.text, subtree[0].text))
|
||||
assert token.left_edge == subtree[0], debug
|
||||
debug = '\t'.join((token.orth_, token.right_edge.orth_, subtree[-1].orth_, token.right_edge.head.orth_))
|
||||
debug = '\t'.join((token.text, token.right_edge.text, subtree[-1].text, token.right_edge.head.text))
|
||||
assert token.right_edge == subtree[-1], debug
|
||||
|
|
Loading…
Reference in New Issue