spaCy/spacy/tests/regression/test_issue3521.py

12 lines
315 B
Python
Raw Normal View History

2019-04-02 13:05:31 +00:00
# coding: utf8
2019-04-03 11:50:33 +00:00
from __future__ import unicode_literals
2019-04-02 11:15:35 +00:00
import pytest
2019-08-20 15:36:34 +00:00
@pytest.mark.parametrize("word", ["don't", "dont", "I'd", "Id"])
def test_issue3521(en_tokenizer, word):
tok = en_tokenizer(word)[1]
# 'not' and 'would' should be stopwords, also in their abbreviated forms
2019-04-02 11:15:35 +00:00
assert tok.is_stop