* Upd contractions test

2014-12-21 20:41:13 +11:00 · 2014-12-21 20:41:13 +11:00 · 199025609f
parent 0d9972f4b0
commit 199025609f
1 changed files with 23 additions and 18 deletions
--- a/tests/test_contractions.py
+++ b/tests/test_contractions.py
@ -1,32 +1,37 @@
 from __future__ import unicode_literals
 import pytest
-from spacy.en import EN
+from spacy.en import English
@pytest.fixture
 def EN():
    return English(pos_tag=False)
-def test_possess():
+def test_possess(EN):
-    tokens = EN.tokenize("Mike's")
+    tokens = EN("Mike's")
-    assert EN.lexicon.strings[tokens[0].sic] == "Mike"
+    assert EN.vocab.strings[tokens[0].sic] == "Mike"
-    assert EN.lexicon.strings[tokens[1].sic] == "'s"
+    assert EN.vocab.strings[tokens[1].sic] == "'s"
    assert len(tokens) == 2
-def test_apostrophe():
+def test_apostrophe(EN):
-    tokens = EN.tokenize("schools'")
+    tokens = EN("schools'")
    assert len(tokens) == 2
    assert tokens[1].string == "'"
    assert tokens[0].string == "schools"
-def test_LL():
+def test_LL(EN):
-    tokens = EN.tokenize("we'll")
+    tokens = EN("we'll")
    assert len(tokens) == 2
    assert tokens[1].string == "'ll"
    assert tokens[1].lemma == "will"
    assert tokens[0].string == "we"
-def test_aint():
+def test_aint(EN):
-    tokens = EN.tokenize("ain't")
+    tokens = EN("ain't")
    assert len(tokens) == 2
    assert tokens[0].string == "ai"
    assert tokens[0].lemma == "be"
@ -34,19 +39,19 @@ def test_aint():
    assert tokens[1].lemma == "not"
-def test_capitalized():
+def test_capitalized(EN):
-    tokens = EN.tokenize("can't")
+    tokens = EN("can't")
    assert len(tokens) == 2
-    tokens = EN.tokenize("Can't")
+    tokens = EN("Can't")
    assert len(tokens) == 2
-    tokens = EN.tokenize("Ain't")
+    tokens = EN("Ain't")
    assert len(tokens) == 2
    assert tokens[0].string == "Ai"
    assert tokens[0].lemma == "be"
-def test_punct():
+def test_punct(EN):
-    tokens = EN.tokenize("We've")
+    tokens = EN("We've")
    assert len(tokens) == 2
-    tokens = EN.tokenize("``We've")
+    tokens = EN("``We've")
    assert len(tokens) == 3