diff --git a/spacy/tests/test_basic_create.py b/spacy/tests/test_basic_create.py deleted file mode 100644 index 2e3b39be3..000000000 --- a/spacy/tests/test_basic_create.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Some quick tests that don't depend on data files or on pytest, for debugging the -MS windows build issues.""" -from __future__ import print_function, unicode_literals - -import unittest -import re - -from spacy.lemmatizer import Lemmatizer -from spacy.morphology import Morphology -from spacy.strings import StringStore -from spacy.vocab import Vocab -from spacy.tokenizer import Tokenizer -from spacy.syntax.arc_eager import ArcEager -from spacy.tagger import Tagger, TaggerModel -from spacy.syntax.parser import Parser, ParserModel -from spacy.matcher import Matcher - - -class TestStringStore(unittest.TestCase): - def test_encode_decode(self): - strings = StringStore() - hello_id = strings[u'Hello'] - world_id = strings[u'World'] - - self.assertNotEqual(hello_id, world_id) - - self.assertEqual(strings[hello_id], u'Hello') - self.assertEqual(strings[world_id], u'World') - - self.assertEqual(strings[u'Hello'], hello_id) - self.assertEqual(strings[u'World'], world_id) - - -class TestMorphology(unittest.TestCase): - def test_create(self): - lemmatizer = Lemmatizer({}, {}, {}) - strings = StringStore() - lemmatizer = Lemmatizer({}, {}, {}) - morphology = Morphology(strings, {}, lemmatizer) - - -class TestVocab(unittest.TestCase): - def test_create(self): - vocab = Vocab() - - def test_get_lexeme(self): - vocab = Vocab() - lexeme = vocab[u'Hello'] - self.assertEqual(lexeme.orth_, u'Hello') - - -class TestTokenizer(unittest.TestCase): - def test_create(self): - vocab = Vocab() - dummy_re = re.compile(r'sklfb;s') - tokenizer = Tokenizer(vocab, {}, dummy_re, dummy_re, dummy_re) - doc = tokenizer(u'I am a document.') - - self.assertEqual(len(doc), 4) - - -class TestTagger(unittest.TestCase): - def test_create(self): - vocab = Vocab() - templates = ((1,),) - model = TaggerModel(vocab.morphology.n_tags, templates) - tagger = Tagger(vocab, model) - - -class TestParser(unittest.TestCase): - def test_create(self): - vocab = Vocab() - templates = ((1,),) - labels_by_action = {0: ['One', 'Two'], 1: ['Two', 'Three']} - transition_system = ArcEager(vocab.strings, labels_by_action) - model = ParserModel(transition_system.n_moves, templates) - - parser = Parser(vocab.strings, transition_system, model) - - -class TestMatcher(unittest.TestCase): - def test_create(self): - vocab = Vocab() - matcher = Matcher(vocab, {}) - - -if __name__ == '__main__': - unittest.main() diff --git a/spacy/tests/test_basic_load.py b/spacy/tests/test_basic_load.py deleted file mode 100644 index c70bcb84a..000000000 --- a/spacy/tests/test_basic_load.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Some quick tests that don't depend on data files or on pytest, for debugging the -MS windows build issues.""" -from __future__ import print_function, unicode_literals - -import unittest -import re -from os import path - -from spacy.lemmatizer import Lemmatizer -from spacy.morphology import Morphology -from spacy.strings import StringStore -from spacy.vocab import Vocab -from spacy.tokenizer import Tokenizer -from spacy.syntax.arc_eager import ArcEager -from spacy.tagger import Tagger -from spacy.syntax.parser import Parser, ParserModel -from spacy.matcher import Matcher -from spacy.syntax.parser import get_templates - -from spacy.en import English - - -class TestLoadVocab(unittest.TestCase): - def test_load(self): - if path.exists(path.join(English.default_data_dir(), 'vocab')): - vocab = Vocab.from_dir(path.join(English.default_data_dir(), 'vocab')) - - -class TestLoadTokenizer(unittest.TestCase): - def test_load(self): - data_dir = English.default_data_dir() - if path.exists(path.join(data_dir, 'vocab')): - vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) - tokenizer = Tokenizer.from_dir(vocab, path.join(data_dir, 'tokenizer')) - - -class TestLoadTagger(unittest.TestCase): - def test_load(self): - data_dir = English.default_data_dir() - - if path.exists(path.join(data_dir, 'vocab')): - vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) - tagger = Tagger.from_dir(path.join(data_dir, 'tagger'), vocab) - - -class TestLoadParser(unittest.TestCase): - def test_load(self): - data_dir = English.default_data_dir() - if path.exists(path.join(data_dir, 'vocab')): - vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) - if path.exists(path.join(data_dir, 'deps')): - parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager) - - config_data = {"labels": {"0": {"": True}, "1": {"": True}, "2": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True}, "3": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True}, "4": {"ROOT": True}}, "seed": 0, "features": "basic", "beam_width": 1} - - data_dir = English.default_data_dir() - vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) - - moves = ArcEager(vocab.strings, config_data['labels']) - templates = get_templates(config_data['features']) - - model = ParserModel(moves.n_moves, templates) - model.load(path.join(data_dir, 'deps', 'model')) - - parser = Parser(vocab.strings, moves, model) - - -if __name__ == '__main__': - unittest.main()