spaCy/spacy/tests/test_misc.py

103 lines
3.4 KiB
Python
Raw Normal View History

2017-04-23 19:06:46 +00:00
# coding: utf-8
from __future__ import unicode_literals
import pytest
💫 Refactor test suite (#2568) ## Description Related issues: #2379 (should be fixed by separating model tests) * **total execution time down from > 300 seconds to under 60 seconds** 🎉 * removed all model-specific tests that could only really be run manually anyway – those will now live in a separate test suite in the [`spacy-models`](https://github.com/explosion/spacy-models) repository and are already integrated into our new model training infrastructure * changed all relative imports to absolute imports to prepare for moving the test suite from `/spacy/tests` to `/tests` (it'll now always test against the installed version) * merged old regression tests into collections, e.g. `test_issue1001-1500.py` (about 90% of the regression tests are very short anyways) * tidied up and rewrote existing tests wherever possible ### Todo - [ ] move tests to `/tests` and adjust CI commands accordingly - [x] move model test suite from internal repo to `spacy-models` - [x] ~~investigate why `pipeline/test_textcat.py` is flakey~~ - [x] review old regression tests (leftover files) and see if they can be merged, simplified or deleted - [ ] update documentation on how to run tests ### Types of change enhancement, tests ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-07-24 21:38:44 +00:00
from pathlib import Path
from spacy import util
from spacy import displacy
from spacy.tokens import Span
from spacy._ml import PrecomputableAffine
from .util import get_doc
2017-04-23 19:06:46 +00:00
@pytest.mark.parametrize("text", ["hello/world", "hello world"])
2017-04-23 19:06:46 +00:00
def test_util_ensure_path_succeeds(text):
path = util.ensure_path(text)
2017-04-23 19:06:46 +00:00
assert isinstance(path, Path)
2017-05-28 23:37:57 +00:00
@pytest.mark.parametrize("package", ["numpy"])
def test_util_is_package(package):
"""Test that an installed package via pip is recognised by util.is_package."""
assert util.is_package(package)
@pytest.mark.parametrize("package", ["thinc"])
def test_util_get_package_path(package):
"""Test that a Path object is returned for a package name."""
path = util.get_package_path(package)
assert isinstance(path, Path)
def test_displacy_parse_ents(en_vocab):
"""Test that named entities on a Doc are converted into displaCy's format."""
doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
ents = displacy.parse_ents(doc)
assert isinstance(ents, dict)
assert ents["text"] == "But Google is starting from behind "
assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
def test_displacy_parse_deps(en_vocab):
"""Test that deps and tags on a Doc are converted into displaCy's format."""
words = ["This", "is", "a", "sentence"]
heads = [1, 0, 1, -2]
pos = ["DET", "VERB", "DET", "NOUN"]
tags = ["DT", "VBZ", "DT", "NN"]
deps = ["nsubj", "ROOT", "det", "attr"]
doc = get_doc(en_vocab, words=words, heads=heads, pos=pos, tags=tags, deps=deps)
deps = displacy.parse_deps(doc)
assert isinstance(deps, dict)
assert deps["words"] == [
{"text": "This", "tag": "DET"},
{"text": "is", "tag": "VERB"},
{"text": "a", "tag": "DET"},
{"text": "sentence", "tag": "NOUN"},
]
assert deps["arcs"] == [
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
{"start": 2, "end": 3, "label": "det", "dir": "left"},
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
]
2017-11-02 23:48:54 +00:00
def test_displacy_spans(en_vocab):
"""Test that displaCy can render Spans."""
doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
html = displacy.render(doc[1:4], style="ent")
assert html.startswith("<div")
def test_displacy_raises_for_wrong_type(en_vocab):
with pytest.raises(ValueError):
html = displacy.render("hello world")
2017-11-02 23:48:54 +00:00
def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
model = PrecomputableAffine(nO=nO, nI=nI, nF=nF, nP=nP)
assert model.W.shape == (nF, nO, nP, nI)
tensor = model.ops.allocate((10, nI))
Y, get_dX = model.begin_update(tensor)
assert Y.shape == (tensor.shape[0] + 1, nF, nO, nP)
2017-11-02 23:48:54 +00:00
assert model.d_pad.shape == (1, nF, nO, nP)
2017-11-03 13:04:16 +00:00
dY = model.ops.allocate((15, nO, nP))
2017-11-02 23:48:54 +00:00
ids = model.ops.allocate((15, nF))
ids[1, 2] = -1
2017-11-03 13:04:16 +00:00
dY[1] = 1
assert model.d_pad[0, 2, 0, 0] == 0.0
2017-11-02 23:48:54 +00:00
model._backprop_padding(dY, ids)
assert model.d_pad[0, 2, 0, 0] == 1.0
model.d_pad.fill(0.0)
ids.fill(0.0)
dY.fill(0.0)
ids[1, 2] = -1
ids[1, 1] = -1
ids[1, 0] = -1
2017-11-03 13:04:16 +00:00
dY[1] = 1
assert model.d_pad[0, 2, 0, 0] == 0.0
2017-11-03 13:04:16 +00:00
model._backprop_padding(dY, ids)
assert model.d_pad[0, 2, 0, 0] == 3.0