From 8de59ce3b96a1a5d94e8bf72963e6d632b96564f Mon Sep 17 00:00:00 2001 From: luvogels Date: Wed, 26 Apr 2017 19:10:18 +0200 Subject: [PATCH] Added tokenizer tests --- spacy/__init__.py | 40 +------------------------------- spacy/tests/nb/__init__.py | 0 spacy/tests/nb/test_tokenizer.py | 17 ++++++++++++++ 3 files changed, 18 insertions(+), 39 deletions(-) create mode 100644 spacy/tests/nb/__init__.py create mode 100644 spacy/tests/nb/test_tokenizer.py diff --git a/spacy/__init__.py b/spacy/__init__.py index 19cc61c06..9bbbd8f3a 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals from . import util from .deprecated import resolve_model_name from .cli.info import info -<<<<<<< HEAD from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he, nb @@ -18,39 +17,6 @@ _languages = (en.English, de.German, es.Spanish, pt.Portuguese, fr.French, for _lang in _languages: util.set_lang_class(_lang.lang, _lang) -from . import en -from . import de -from . import zh -from . import es -from . import it -from . import hu -from . import fr -from . import pt -from . import nl -from . import sv -from . import fi -from . import bn -from . import he -from . import nb - -from .about import * - - -set_lang_class(en.English.lang, en.English) -set_lang_class(de.German.lang, de.German) -set_lang_class(es.Spanish.lang, es.Spanish) -set_lang_class(pt.Portuguese.lang, pt.Portuguese) -set_lang_class(fr.French.lang, fr.French) -set_lang_class(it.Italian.lang, it.Italian) -set_lang_class(hu.Hungarian.lang, hu.Hungarian) -set_lang_class(zh.Chinese.lang, zh.Chinese) -set_lang_class(nl.Dutch.lang, nl.Dutch) -set_lang_class(sv.Swedish.lang, sv.Swedish) -set_lang_class(fi.Finnish.lang, fi.Finnish) -set_lang_class(bn.Bengali.lang, bn.Bengali) -set_lang_class(he.Hebrew.lang, he.Hebrew) -set_lang_class(nb.Norwegian.lang, nb.Norwegian) - def load(name, **overrides): if overrides.get('path') in (None, False, True): @@ -72,8 +38,4 @@ def load(name, **overrides): cls = util.get_lang_class(lang) overrides['meta'] = meta overrides['path'] = model_path - return cls(**overrides) - - -def info(name, markdown): - info(name, markdown) + return cls(**overrides) \ No newline at end of file diff --git a/spacy/tests/nb/__init__.py b/spacy/tests/nb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/tests/nb/test_tokenizer.py b/spacy/tests/nb/test_tokenizer.py new file mode 100644 index 000000000..b55901339 --- /dev/null +++ b/spacy/tests/nb/test_tokenizer.py @@ -0,0 +1,17 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest + + +NB_TOKEN_EXCEPTION_TESTS = [ + ('Smørsausen brukes bl.a. til fisk', ['Smørsausen', 'brukes', 'bl.a.', 'til', 'fisk']), + ('Jeg kommer først kl. 13 pga. diverse forsinkelser', ['Jeg', 'kommer', 'først', 'kl.', '13', 'pga.', 'diverse', 'forsinkelser']) +] + + +@pytest.mark.parametrize('text,expected_tokens', NB_TOKEN_EXCEPTION_TESTS) +def test_tokenizer_handles_exception_cases(nb_tokenizer, text, expected_tokens): + tokens = nb_tokenizer(text) + token_list = [token.text for token in tokens if not token.is_space] + assert expected_tokens == token_list