spaCy/spacy/tests/regression/test_issue5048.py

33 lines
960 B
Python

import numpy
from spacy.tokens import Doc
from spacy.attrs import DEP, POS, TAG
from ..util import get_doc
def test_issue5048(en_vocab):
words = ["This", "is", "a", "sentence"]
pos_s = ["DET", "VERB", "DET", "NOUN"]
spaces = [" ", " ", " ", ""]
deps_s = ["dep", "adj", "nn", "atm"]
tags_s = ["DT", "VBZ", "DT", "NN"]
strings = en_vocab.strings
for w in words:
strings.add(w)
deps = [strings.add(d) for d in deps_s]
pos = [strings.add(p) for p in pos_s]
tags = [strings.add(t) for t in tags_s]
attrs = [POS, DEP, TAG]
array = numpy.array(list(zip(pos, deps, tags)), dtype="uint64")
doc = Doc(en_vocab, words=words, spaces=spaces)
doc.from_array(attrs, array)
v1 = [(token.text, token.pos_, token.tag_) for token in doc]
doc2 = get_doc(en_vocab, words=words, pos=pos_s, deps=deps_s, tags=tags_s)
v2 = [(token.text, token.pos_, token.tag_) for token in doc2]
assert v1 == v2