diff --git a/spacy/tests/parser/test_sbd.py b/spacy/tests/parser/test_sbd.py index 37f9f4061..86bbe88d0 100644 --- a/spacy/tests/parser/test_sbd.py +++ b/spacy/tests/parser/test_sbd.py @@ -123,24 +123,21 @@ def test_sentence_breaks_with_space(EN): -@pytest.fixture +def apply_transition_sequence(model, doc, sequence): + with model.parser.step_through(doc) as stepwise: + for transition in sequence: + stepwise.transition(transition) + + @pytest.mark.models -def example(EN): - def apply_transition_sequence(model, doc, sequence): - with model.parser.step_through(doc) as stepwise: - for transition in sequence: - stepwise.transition(transition) - doc = EN.tokenizer.tokens_from_list(u"I bought a couch from IKEA. It was n't very comfortable .".split(' ')) - EN.tagger(doc) - apply_transition_sequence(EN, doc, ['L-nsubj','S','L-det','R-dobj','D','R-prep','R-pobj','D','D','S','L-nsubj','R-ROOT','R-neg','D','S','L-advmod','R-acomp','D','R-punct']) - return doc - - -def test_sbd_for_root_label_dependents(example): +def test_sbd_for_root_label_dependents(EN): """ make sure that the parser properly introduces a sentence boundary without the break transition by checking for dependents with the root label """ + example = EN.tokenizer.tokens_from_list(u"I bought a couch from IKEA. It was n't very comfortable .".split(' ')) + EN.tagger(example) + apply_transition_sequence(EN, example, ['L-nsubj','S','L-det','R-dobj','D','R-prep','R-pobj','D','D','S','L-nsubj','R-ROOT','R-neg','D','S','L-advmod','R-acomp','D','R-punct']) assert example[1].head.i == 1 assert example[7].head.i == 7 @@ -152,7 +149,7 @@ def test_sbd_for_root_label_dependents(example): @pytest.mark.models -def test_sbd_serialization(EN, example): +def test_sbd_serialization(EN): """ test that before and after serialization, the sentence boundaries are the same even if the parser predicted two roots for the sentence that were made into two sentences @@ -168,6 +165,10 @@ def test_sbd_serialization(EN, example): it introduced the second root node. """ + example = EN.tokenizer.tokens_from_list(u"I bought a couch from IKEA. It was n't very comfortable .".split(' ')) + EN.tagger(example) + apply_transition_sequence(EN, example, ['L-nsubj','S','L-det','R-dobj','D','R-prep','R-pobj','D','D','S','L-nsubj','R-ROOT','R-neg','D','S','L-advmod','R-acomp','D','R-punct']) + example_serialized = Doc(EN.vocab).from_bytes(example.to_bytes()) assert example.to_bytes() == example_serialized.to_bytes()