spaCy/spacy/tests/regression/test_issue3521.py

20 lines
365 B
Python
Raw Normal View History

2019-04-02 13:05:31 +00:00
# coding: utf8
2019-04-03 11:50:33 +00:00
from __future__ import unicode_literals
2019-04-02 11:15:35 +00:00
import pytest
@pytest.mark.parametrize(
"word",
[
2019-04-03 11:50:33 +00:00
"don't",
"dont",
"I'd",
"Id",
2019-04-02 11:15:35 +00:00
],
)
def test_issue3521(en_tokenizer, word):
tok = en_tokenizer(word)[1]
# 'not' and 'would' should be stopwords, also in their abbreviated forms
2019-04-02 11:15:35 +00:00
assert tok.is_stop