spaCy/spacy/tests/test_basic_create.py

"""Some quick tests that don't depend on data files or on pytest, for debugging the
MS windows build issues."""
from __future__ import print_function, unicode_literals

import unittest
import re

from spacy.lemmatizer import Lemmatizer
from spacy.morphology import Morphology
from spacy.strings import StringStore
from spacy.vocab import Vocab
from spacy.tokenizer import Tokenizer
from spacy.syntax.arc_eager import ArcEager
from spacy._ml import Model
from spacy.tagger import Tagger
from spacy.syntax.parser import Parser
from spacy.matcher import Matcher


class TestStringStore(unittest.TestCase):
    def test_encode_decode(self):
        strings = StringStore()
        hello_id = strings[u'Hello']
        world_id = strings[u'World']

        self.assertNotEqual(hello_id, world_id)
        
        self.assertEqual(strings[hello_id], u'Hello')
        self.assertEqual(strings[world_id], u'World')

        self.assertEqual(strings[u'Hello'], hello_id)
        self.assertEqual(strings[u'World'], world_id)


class TestMorphology(unittest.TestCase):
    def test_create(self):
        lemmatizer = Lemmatizer({}, {}, {})
        strings = StringStore()
        lemmatizer = Lemmatizer({}, {}, {})
        morphology = Morphology(strings, {}, lemmatizer)


class TestVocab(unittest.TestCase):
    def test_create(self):
        vocab = Vocab()

    def test_get_lexeme(self):
        vocab = Vocab()
        lexeme = vocab[u'Hello']
        self.assertEqual(lexeme.orth_, u'Hello')


class TestTokenizer(unittest.TestCase):
    def test_create(self):
        vocab = Vocab()
        dummy_re = re.compile(r'sklfb;s')
        tokenizer = Tokenizer(vocab, {}, dummy_re, dummy_re, dummy_re)
        doc = tokenizer(u'I am a document.')
        
        self.assertEqual(len(doc), 4)


class TestTagger(unittest.TestCase):
    def test_create(self):
        vocab = Vocab()
        templates = ((1,),)
        model = Model(vocab.morphology.n_tags, templates, model_loc=None)
        tagger = Tagger(vocab, model)


class TestParser(unittest.TestCase):
    def test_create(self):
        vocab = Vocab()
        templates = ((1,),)
        labels_by_action = {0: ['One', 'Two'], 1: ['Two', 'Three']}
        transition_system = ArcEager(vocab.strings, labels_by_action)
        model = Model(vocab.morphology.n_tags, templates, model_loc=None)
        
        parser = Parser(vocab.strings, transition_system, model)


class TestMatcher(unittest.TestCase):
    def test_create(self):
        vocab = Vocab()
        matcher = Matcher(vocab, {})


if __name__ == '__main__':
    unittest.main()
* Add basic, non-data dependent class creation tests, without depending on pytest. For use in debugging MS build issues, for Issue #132 2015-10-11 03:29:12 +00:00			`"""Some quick tests that don't depend on data files or on pytest, for debugging the`
			`MS windows build issues."""`
			`from __future__ import print_function, unicode_literals`

			`import unittest`
			`import re`

			`from spacy.lemmatizer import Lemmatizer`
			`from spacy.morphology import Morphology`
			`from spacy.strings import StringStore`
			`from spacy.vocab import Vocab`
			`from spacy.tokenizer import Tokenizer`
			`from spacy.syntax.arc_eager import ArcEager`
			`from spacy._ml import Model`
			`from spacy.tagger import Tagger`
			`from spacy.syntax.parser import Parser`
* Fix missing import 2015-10-11 03:38:21 +00:00			`from spacy.matcher import Matcher`
* Add basic, non-data dependent class creation tests, without depending on pytest. For use in debugging MS build issues, for Issue #132 2015-10-11 03:29:12 +00:00

			`class TestStringStore(unittest.TestCase):`
			`def test_encode_decode(self):`
			`strings = StringStore()`
			`hello_id = strings[u'Hello']`
			`world_id = strings[u'World']`

			`self.assertNotEqual(hello_id, world_id)`

			`self.assertEqual(strings[hello_id], u'Hello')`
			`self.assertEqual(strings[world_id], u'World')`

			`self.assertEqual(strings[u'Hello'], hello_id)`
			`self.assertEqual(strings[u'World'], world_id)`


			`class TestMorphology(unittest.TestCase):`
			`def test_create(self):`
			`lemmatizer = Lemmatizer({}, {}, {})`
			`strings = StringStore()`
			`lemmatizer = Lemmatizer({}, {}, {})`
			`morphology = Morphology(strings, {}, lemmatizer)`


			`class TestVocab(unittest.TestCase):`
			`def test_create(self):`
			`vocab = Vocab()`

			`def test_get_lexeme(self):`
			`vocab = Vocab()`
			`lexeme = vocab[u'Hello']`
* Fix assertion in test_basic_create 2015-10-11 13:48:18 +00:00			`self.assertEqual(lexeme.orth_, u'Hello')`
* Add basic, non-data dependent class creation tests, without depending on pytest. For use in debugging MS build issues, for Issue #132 2015-10-11 03:29:12 +00:00

			`class TestTokenizer(unittest.TestCase):`
			`def test_create(self):`
			`vocab = Vocab()`
			`dummy_re = re.compile(r'sklfb;s')`
			`tokenizer = Tokenizer(vocab, {}, dummy_re, dummy_re, dummy_re)`
			`doc = tokenizer(u'I am a document.')`

			`self.assertEqual(len(doc), 4)`


			`class TestTagger(unittest.TestCase):`
			`def test_create(self):`
			`vocab = Vocab()`
			`templates = ((1,),)`
			`model = Model(vocab.morphology.n_tags, templates, model_loc=None)`
			`tagger = Tagger(vocab, model)`


			`class TestParser(unittest.TestCase):`
			`def test_create(self):`
			`vocab = Vocab()`
			`templates = ((1,),)`
			`labels_by_action = {0: ['One', 'Two'], 1: ['Two', 'Three']}`
			`transition_system = ArcEager(vocab.strings, labels_by_action)`
			`model = Model(vocab.morphology.n_tags, templates, model_loc=None)`

			`parser = Parser(vocab.strings, transition_system, model)`


			`class TestMatcher(unittest.TestCase):`
			`def test_create(self):`
			`vocab = Vocab()`
* Fix Matcher test 2015-10-11 03:59:12 +00:00			`matcher = Matcher(vocab, {})`
* Add basic, non-data dependent class creation tests, without depending on pytest. For use in debugging MS build issues, for Issue #132 2015-10-11 03:29:12 +00:00

			`if __name__ == '__main__':`
			`unittest.main()`