spaCy/spacy/tests/regression/test_issue3288.py

19 lines
650 B
Python
Raw Normal View History

2019-02-18 09:59:31 +00:00
# coding: utf-8
from __future__ import unicode_literals
import numpy
from spacy import displacy
from ..util import get_doc
def test_issue3288(en_vocab):
"""Test that retokenization works correctly via displaCy when punctuation
is merged onto the preceeding token and tensor is resized."""
words = ["Hello", "World", "!", "When", "is", "this", "breaking", "?"]
heads = [1, 0, -1, 1, 0, 1, -2, -3]
deps = ["intj", "ROOT", "punct", "advmod", "ROOT", "det", "nsubj", "punct"]
doc = get_doc(en_vocab, words=words, heads=heads, deps=deps)
2019-02-18 10:01:54 +00:00
doc.tensor = numpy.zeros((len(words), 96), dtype="float32")
2019-02-18 09:59:31 +00:00
displacy.render(doc)