spaCy/spacy/tests/regression/test_issue3521.py

# coding: utf8
from __future__ import unicode_literals

import pytest


@pytest.mark.parametrize(
    "word",
    [
        "don't",
        "don’t",
        "I'd",
        "I’d",
    ],
)
def test_issue3521(en_tokenizer, word):
    tok = en_tokenizer(word)[1]
    # 'not' and 'would' should be stopwords, also in their abbreviated forms
    assert tok.is_stop
-												specify encoding in files

											
										
										
											2019-04-02 13:05:31 +00:00
+								# coding: utf8
-												addressed all comments by Ines

											
										
										
											2019-04-03 11:50:33 +00:00
+								from __future__ import unicode_literals
-												failing test for Issue #3521

											
										
										
											2019-04-02 11:15:35 +00:00
+								import pytest
 								@pytest.mark.parametrize(
 								    "word",
 								    [
-												addressed all comments by Ines

											
										
										
											2019-04-03 11:50:33 +00:00
+								        "don't",
 								        "don’t",
 								        "I'd",
 								        "I’d",
-												failing test for Issue #3521

											
										
										
											2019-04-02 11:15:35 +00:00
+								    ],
 								)
-												fixing Issue #3521 by adding all hyphen variants for each stopword

											
										
										
											2019-04-02 11:24:59 +00:00
+								def test_issue3521(en_tokenizer, word):
 								    tok = en_tokenizer(word)[1]
 								    # 'not' and 'would' should be stopwords, also in their abbreviated forms
-												failing test for Issue #3521

											
										
										
											2019-04-02 11:15:35 +00:00
+								    assert tok.is_stop