From 6652f2a135a2712381ed1238abefde08353a0b31 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 25 Nov 2016 12:44:13 +0100 Subject: [PATCH] Test #656, #624: special case rules for tokenizer with attributes. --- spacy/tests/unit/test_tokenizer.py | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 spacy/tests/unit/test_tokenizer.py diff --git a/spacy/tests/unit/test_tokenizer.py b/spacy/tests/unit/test_tokenizer.py new file mode 100644 index 000000000..b6fbfd855 --- /dev/null +++ b/spacy/tests/unit/test_tokenizer.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals +import pytest +import re + +from ...vocab import Vocab +from ...tokenizer import Tokenizer + + +@pytest.fixture +def vocab(): + return Vocab(tag_map={'NN': {'pos': 'NOUN'}}) + +@pytest.fixture +def rules(): + return {} + +@pytest.fixture +def prefix_search(): + return None + +@pytest.fixture +def suffix_search(): + return None + +@pytest.fixture +def infix_finditer(): + return None + + +@pytest.fixture +def tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer): + return Tokenizer(vocab, rules, prefix_search, suffix_search, infix_finditer) + + +def test_add_special_case(tokenizer): + tokenizer.add_special_case('dog', [{'orth': 'd'}, {'orth': 'og'}]) + doc = tokenizer('dog') + assert doc[0].text == 'd' + assert doc[1].text == 'og' + + +def test_special_case_tag(tokenizer): + tokenizer.add_special_case('dog', [{'orth': 'd', 'tag': 'NN'}, {'orth': 'og'}]) + doc = tokenizer('dog') + assert doc[0].text == 'd' + assert doc[0].tag_ == 'NN' + assert doc[0].pos_ == 'NOUN' + assert doc[1].text == 'og'