spaCy/spacy/tests/test_basic_create.py

90 lines
2.5 KiB
Python

"""Some quick tests that don't depend on data files or on pytest, for debugging the
MS windows build issues."""
from __future__ import print_function, unicode_literals
import unittest
import re
from spacy.lemmatizer import Lemmatizer
from spacy.morphology import Morphology
from spacy.strings import StringStore
from spacy.vocab import Vocab
from spacy.tokenizer import Tokenizer
from spacy.syntax.arc_eager import ArcEager
from spacy._ml import Model
from spacy.tagger import Tagger
from spacy.syntax.parser import Parser
from spacy.matcher import Matcher
class TestStringStore(unittest.TestCase):
def test_encode_decode(self):
strings = StringStore()
hello_id = strings[u'Hello']
world_id = strings[u'World']
self.assertNotEqual(hello_id, world_id)
self.assertEqual(strings[hello_id], u'Hello')
self.assertEqual(strings[world_id], u'World')
self.assertEqual(strings[u'Hello'], hello_id)
self.assertEqual(strings[u'World'], world_id)
class TestMorphology(unittest.TestCase):
def test_create(self):
lemmatizer = Lemmatizer({}, {}, {})
strings = StringStore()
lemmatizer = Lemmatizer({}, {}, {})
morphology = Morphology(strings, {}, lemmatizer)
class TestVocab(unittest.TestCase):
def test_create(self):
vocab = Vocab()
def test_get_lexeme(self):
vocab = Vocab()
lexeme = vocab[u'Hello']
self.assertEqual(lexeme.orth_, u'Hello')
class TestTokenizer(unittest.TestCase):
def test_create(self):
vocab = Vocab()
dummy_re = re.compile(r'sklfb;s')
tokenizer = Tokenizer(vocab, {}, dummy_re, dummy_re, dummy_re)
doc = tokenizer(u'I am a document.')
self.assertEqual(len(doc), 4)
class TestTagger(unittest.TestCase):
def test_create(self):
vocab = Vocab()
templates = ((1,),)
model = Model(vocab.morphology.n_tags, templates, model_loc=None)
tagger = Tagger(vocab, model)
class TestParser(unittest.TestCase):
def test_create(self):
vocab = Vocab()
templates = ((1,),)
labels_by_action = {0: ['One', 'Two'], 1: ['Two', 'Three']}
transition_system = ArcEager(vocab.strings, labels_by_action)
model = Model(vocab.morphology.n_tags, templates, model_loc=None)
parser = Parser(vocab.strings, transition_system, model)
class TestMatcher(unittest.TestCase):
def test_create(self):
vocab = Vocab()
matcher = Matcher(vocab, {})
if __name__ == '__main__':
unittest.main()