From 199025609f7c97248002ca29492b4192cefc986d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 21 Dec 2014 20:41:13 +1100 Subject: [PATCH] * Upd contractions test --- tests/test_contractions.py | 41 +++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/tests/test_contractions.py b/tests/test_contractions.py index 1e697afd2..78a56f67f 100644 --- a/tests/test_contractions.py +++ b/tests/test_contractions.py @@ -1,32 +1,37 @@ from __future__ import unicode_literals +import pytest -from spacy.en import EN +from spacy.en import English + +@pytest.fixture +def EN(): + return English(pos_tag=False) -def test_possess(): - tokens = EN.tokenize("Mike's") - assert EN.lexicon.strings[tokens[0].sic] == "Mike" - assert EN.lexicon.strings[tokens[1].sic] == "'s" +def test_possess(EN): + tokens = EN("Mike's") + assert EN.vocab.strings[tokens[0].sic] == "Mike" + assert EN.vocab.strings[tokens[1].sic] == "'s" assert len(tokens) == 2 -def test_apostrophe(): - tokens = EN.tokenize("schools'") +def test_apostrophe(EN): + tokens = EN("schools'") assert len(tokens) == 2 assert tokens[1].string == "'" assert tokens[0].string == "schools" -def test_LL(): - tokens = EN.tokenize("we'll") +def test_LL(EN): + tokens = EN("we'll") assert len(tokens) == 2 assert tokens[1].string == "'ll" assert tokens[1].lemma == "will" assert tokens[0].string == "we" -def test_aint(): - tokens = EN.tokenize("ain't") +def test_aint(EN): + tokens = EN("ain't") assert len(tokens) == 2 assert tokens[0].string == "ai" assert tokens[0].lemma == "be" @@ -34,19 +39,19 @@ def test_aint(): assert tokens[1].lemma == "not" -def test_capitalized(): - tokens = EN.tokenize("can't") +def test_capitalized(EN): + tokens = EN("can't") assert len(tokens) == 2 - tokens = EN.tokenize("Can't") + tokens = EN("Can't") assert len(tokens) == 2 - tokens = EN.tokenize("Ain't") + tokens = EN("Ain't") assert len(tokens) == 2 assert tokens[0].string == "Ai" assert tokens[0].lemma == "be" -def test_punct(): - tokens = EN.tokenize("We've") +def test_punct(EN): + tokens = EN("We've") assert len(tokens) == 2 - tokens = EN.tokenize("``We've") + tokens = EN("``We've") assert len(tokens) == 3