import pytest import numpy from spacy.vocab import Vocab from spacy.language import Language from spacy.ml.models.defaults import default_parser from spacy.pipeline import DependencyParser from spacy.syntax.arc_eager import ArcEager from spacy.tokens import Doc from spacy.syntax._beam_utils import ParserBeam from spacy.syntax.stateclass import StateClass from spacy.gold import GoldParse @pytest.fixture def vocab(): return Vocab() @pytest.fixture def moves(vocab): aeager = ArcEager(vocab.strings, {}) aeager.add_action(2, "nsubj") aeager.add_action(3, "dobj") aeager.add_action(2, "aux") return aeager @pytest.fixture def docs(vocab): return [Doc(vocab, words=["Rats", "bite", "things"])] @pytest.fixture def states(docs): return [StateClass(doc) for doc in docs] @pytest.fixture def tokvecs(docs, vector_size): output = [] for doc in docs: vec = numpy.random.uniform(-0.1, 0.1, (len(doc), vector_size)) output.append(numpy.asarray(vec)) return output @pytest.fixture def golds(docs): return [GoldParse(doc) for doc in docs] @pytest.fixture def batch_size(docs): return len(docs) @pytest.fixture def beam_width(): return 4 @pytest.fixture def vector_size(): return 6 @pytest.fixture def beam(moves, states, golds, beam_width): return ParserBeam(moves, states, golds, width=beam_width, density=0.0) @pytest.fixture def scores(moves, batch_size, beam_width): return [ numpy.asarray( numpy.random.uniform(-0.1, 0.1, (batch_size, moves.n_moves)), dtype="f" ) for _ in range(batch_size) ] def test_create_beam(beam): pass def test_beam_advance(beam, scores): beam.advance(scores) def test_beam_advance_too_few_scores(beam, scores): with pytest.raises(IndexError): beam.advance(scores[:-1]) def test_beam_parse(): nlp = Language() nlp.add_pipe(DependencyParser(nlp.vocab, default_parser()), name="parser") nlp.parser.add_label("nsubj") nlp.parser.begin_training([], token_vector_width=8, hidden_width=8) doc = nlp.make_doc("Australia is a country") nlp.parser(doc, beam_width=2)