From 55d151aa61764c62ac3f516c7d0218fc2f31ad00 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 11 Jan 2017 21:14:15 +0100 Subject: [PATCH] Modernise Doc parse tree navigation tests and don't depend on models --- spacy/tests/parser/test_parse_navigate.py | 85 +++++++++++++++++------ 1 file changed, 62 insertions(+), 23 deletions(-) diff --git a/spacy/tests/parser/test_parse_navigate.py b/spacy/tests/parser/test_parse_navigate.py index a0944c286..4d909f0d6 100644 --- a/spacy/tests/parser/test_parse_navigate.py +++ b/spacy/tests/parser/test_parse_navigate.py @@ -1,35 +1,74 @@ +# coding: utf-8 from __future__ import unicode_literals -from os import path -import io + +from ..util import get_doc import pytest @pytest.fixture -def sun_text(): - with io.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r', - encoding='utf8') as file_: - text = file_.read() - return text +def text(): + return u""" +It was a bright cold day in April, and the clocks were striking thirteen. +Winston Smith, his chin nuzzled into his breast in an effort to escape the +vile wind, slipped quickly through the glass doors of Victory Mansions, +though not quickly enough to prevent a swirl of gritty dust from entering +along with him. + +The hallway smelt of boiled cabbage and old rag mats. At one end of it a +coloured poster, too large for indoor display, had been tacked to the wall. +It depicted simply an enormous face, more than a metre wide: the face of a +man of about forty-five, with a heavy black moustache and ruggedly handsome +features. Winston made for the stairs. It was no use trying the lift. Even at +the best of times it was seldom working, and at present the electric current +was cut off during daylight hours. It was part of the economy drive in +preparation for Hate Week. The flat was seven flights up, and Winston, who +was thirty-nine and had a varicose ulcer above his right ankle, went slowly, +resting several times on the way. On each landing, opposite the lift-shaft, +the poster with the enormous face gazed from the wall. It was one of those +pictures which are so contrived that the eyes follow you about when you move. +BIG BROTHER IS WATCHING YOU, the caption beneath it ran. +""" -@pytest.mark.models -def test_consistency(EN, sun_text): - tokens = EN(sun_text) - for head in tokens: +@pytest.fixture +def heads(): + return [1, 1, 0, 3, 2, 1, -4, -1, -1, -7, -8, 1, -10, 2, 1, -3, -1, -15, + -1, 1, 4, -1, 1, -3, 0, -1, 1, -2, -4, 1, -2, 1, -2, 3, -1, 1, + -4, -13, -14, -1, -2, 2, 1, -3, -1, 1, -2, -9, -1, 3, 1, 1, -14, + 1, -2, 1, -2, -1, 1, -2, -6, -1, -1, -2, -1, -1, -42, -1, 2, 1, + 0, -1, 1, -2, -1, 2, 1, -4, -8, 0, 1, -2, -1, -1, 3, -1, 1, -6, + 9, 1, 7, -1, 1, -2, 3, 2, 1, -10, -1, 1, -2, -22, -1, 1, 0, -1, + 2, 1, -4, -1, -2, -1, 1, -2, -6, -7, 1, -9, -1, 2, -1, -3, -1, + 3, 2, 1, -4, -19, -24, 3, 2, 1, -4, -1, 1, 2, -1, -5, -34, 1, 0, + -1, 1, -2, -4, 1, 0, 1, -2, -1, 1, -2, -6, 1, 9, -1, 1, -3, -1, + -1, 3, 2, 1, 0, -1, -2, 7, -1, 5, 1, 3, -1, 1, -10, -1, -2, 1, + -2, -15, 1, 0, -1, -1, 2, 1, -3, -1, -1, -2, -1, 1, -2, -12, 1, + 1, 0, 1, -2, -1, -2, -3, 9, -1, 2, -1, -4, 2, 1, -3, -4, -15, 2, + 1, -3, -1, 2, 1, -3, -8, -9, -1, -2, -1, -4, 1, -2, -3, 1, -2, + -19, 17, 1, -2, 14, 13, 3, 2, 1, -4, 8, -1, 1, 5, -1, 2, 1, -3, + 0, -1, 1, -2, -4, 1, 0, -1, -1, 2, -1, -3, 1, -2, 1, -2, 3, 1, + 1, -4, -1, -2, 2, 1, -5, -19, -1, 1, 1, 0, 1, 6, -1, 1, -3, -1, + -1, -8, -9, -1] + + +def test_parser_parse_navigate_consistency(en_tokenizer, text, heads): + tokens = en_tokenizer(text) + doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) + for head in doc: for child in head.lefts: assert child.head is head for child in head.rights: assert child.head is head -@pytest.mark.models -def test_child_consistency(EN, sun_text): - tokens = EN(sun_text) +def test_parser_parse_navigate_child_consistency(en_tokenizer, text, heads): + tokens = en_tokenizer(text) + doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) lefts = {} rights = {} - for head in tokens: + for head in doc: assert head.i not in lefts lefts[head.i] = set() for left in head.lefts: @@ -38,10 +77,10 @@ def test_child_consistency(EN, sun_text): rights[head.i] = set() for right in head.rights: rights[head.i].add(right.i) - for head in tokens: + for head in doc: assert head.n_rights == len(rights[head.i]) assert head.n_lefts == len(lefts[head.i]) - for child in tokens: + for child in doc: if child.i < child.head.i: assert child.i in lefts[child.head.i] assert child.i not in rights[child.head.i] @@ -56,12 +95,12 @@ def test_child_consistency(EN, sun_text): assert not children -@pytest.mark.models -def test_edges(EN, sun_text): - tokens = EN(sun_text) - for token in tokens: +def test_parser_parse_navigate_edges(en_tokenizer, text, heads): + tokens = en_tokenizer(text) + doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) + for token in doc: subtree = list(token.subtree) - debug = '\t'.join((token.orth_, token.left_edge.orth_, subtree[0].orth_)) + debug = '\t'.join((token.text, token.left_edge.text, subtree[0].text)) assert token.left_edge == subtree[0], debug - debug = '\t'.join((token.orth_, token.right_edge.orth_, subtree[-1].orth_, token.right_edge.head.orth_)) + debug = '\t'.join((token.text, token.right_edge.text, subtree[-1].text, token.right_edge.head.text)) assert token.right_edge == subtree[-1], debug