spaCy/spacy/tests/vocab/test_lexeme_flags.py

43 lines
1.4 KiB
Python

from __future__ import unicode_literals
import pytest
from spacy.attrs import *
def test_is_alpha(en_vocab):
the = en_vocab['the']
assert the.flags & (1 << IS_ALPHA)
year = en_vocab['1999']
assert not year.flags & (1 << IS_ALPHA)
mixed = en_vocab['hello1']
assert not mixed.flags & (1 << IS_ALPHA)
def test_is_digit(en_vocab):
the = en_vocab['the']
assert not the.flags & (1 << IS_DIGIT)
year = en_vocab['1999']
assert year.flags & (1 << IS_DIGIT)
mixed = en_vocab['hello1']
assert not mixed.flags & (1 << IS_DIGIT)
def test_add_flag_auto_id(en_vocab):
is_len4 = en_vocab.add_flag(lambda string: len(string) == 4)
assert en_vocab['1999'].check_flag(is_len4) == True
assert en_vocab['1999'].check_flag(IS_DIGIT) == True
assert en_vocab['199'].check_flag(is_len4) == False
assert en_vocab['199'].check_flag(IS_DIGIT) == True
assert en_vocab['the'].check_flag(is_len4) == False
assert en_vocab['dogs'].check_flag(is_len4) == True
def test_add_flag_provided_id(en_vocab):
is_len4 = en_vocab.add_flag(lambda string: len(string) == 4, flag_id=IS_DIGIT)
assert en_vocab['1999'].check_flag(is_len4) == True
assert en_vocab['199'].check_flag(is_len4) == False
assert en_vocab['199'].check_flag(IS_DIGIT) == False
assert en_vocab['the'].check_flag(is_len4) == False
assert en_vocab['dogs'].check_flag(is_len4) == True