Update Tokenizer.explain tests

2019-11-20 13:14:11 +01:00 · 2019-11-20 13:14:11 +01:00 · 2e7c896fe5
parent 2c876eb672
commit 2e7c896fe5
1 changed files with 52 additions and 19 deletions
--- a/spacy/tests/tokenizer/test_explain.py
+++ b/spacy/tests/tokenizer/test_explain.py
@ -1,30 +1,63 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import importlib
 import pytest
 from spacy.util import get_lang_class

-
-# fmt: off
 # Only include languages with no external dependencies
-# excluded: ja, ru, th, uk, vi, zh
-LANGUAGES = ["af", "ar", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es",
-             "et", "fa", "fi", "fr", "ga", "he", "hi", "hr", "hu", "id", "is",
-             "it", "kn", "lt", "lv", "nb", "nl", "pl", "pt", "ro", "si", "sk",
-             "sl", "sq", "sr", "sv", "ta", "te", "tl", "tr", "tt", "ur"]
-# fmt: on
+# "is" seems to confuse importlib, so we're also excluding it for now
+# excluded: ja, ru, th, uk, vi, zh, is
+LANGUAGES = [
+    "af",
+    "ar",
+    "bg",
+    "bn",
+    pytest.param("ca", marks=pytest.mark.xfail()),
+    "cs",
+    "da",
+    "de",
+    "el",
+    "en",
+    "es",
+    "et",
+    "fa",
+    "fi",
+    pytest.param("fr", marks=pytest.mark.xfail()),
+    "ga",
+    "he",
+    "hi",
+    "hr",
+    pytest.param("hu", marks=pytest.mark.xfail()),
+    "id",
+    "it",
+    "kn",
+    "lt",
+    "lv",
+    "nb",
+    "nl",
+    pytest.param("pl", marks=pytest.mark.xfail()),
+    "pt",
+    "ro",
+    "si",
+    "sk",
+    "sl",
+    "sq",
+    "sr",
+    "sv",
+    "ta",
+    "te",
+    "tl",
+    "tr",
+    "tt",
+    "ur",
+]

-
-@pytest.mark.slow
+# @pytest.mark.slow
@pytest.mark.parametrize("lang", LANGUAGES)
 def test_tokenizer_explain(lang):
    nlp = get_lang_class(lang)()
-    try:
-        examples = importlib.import_module("spacy.lang." + lang + ".examples")
-        for sentence in examples.sentences:
-            tokens = [t.text for t in nlp.tokenizer(sentence) if not t.is_space]
-            debug_tokens = [t[1] for t in nlp.tokenizer.explain(sentence)]
-            assert tokens == debug_tokens
-    except:
-        pass
+    examples = pytest.importorskip("spacy.lang.{}.examples".format(lang))
+    for sentence in examples.sentences:
+        tokens = [t.text for t in nlp.tokenizer(sentence) if not t.is_space]
+        debug_tokens = [t[1] for t in nlp.tokenizer.explain(sentence)]
+        assert tokens == debug_tokens