* Add attrs file for English

This commit is contained in:
Matthew Honnibal 2014-12-21 11:29:11 +11:00
parent 2a89d70429
commit 9f3f07cab6
2 changed files with 51 additions and 0 deletions

32
spacy/en/attrs.pxd Normal file
View File

@ -0,0 +1,32 @@
from ..lexeme cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7
from ..lexeme cimport FLAG8, FLAG9
from ..lexeme cimport ID as _ID
from ..lexeme cimport SIC as _SIC
from ..lexeme cimport SHAPE as _SHAPE
from ..lexeme cimport DENSE as _DENSE
from ..lexeme cimport SHAPE as _SHAPE
from ..lexeme cimport PREFIX as _PREFIX
from ..lexeme cimport SUFFIX as _SUFFIX
from ..lexeme cimport LEMMA as _LEMMA
# Work around the lack of global cpdef variables
cpdef enum:
IS_ALPHA = FLAG0
IS_ASCII = FLAG1
IS_DIGIT = FLAG2
IS_LOWER = FLAG3
IS_PUNCT = FLAG4
IS_SPACE = FLAG5
IS_TITLE = FLAG6
IS_UPPER = FLAG7
LIKE_URL = FLAG8
LIKE_NUM = FLAG9
ID = _ID
SIC = _SIC
SHAPE = _DENSE
DENSE = _SHAPE
PREFIX = _PREFIX
SUFFIX = _SUFFIX
LEMMA = _LEMMA

19
spacy/en/attrs.pyx Normal file
View File

@ -0,0 +1,19 @@
from .. import orth
from ..typedefs cimport flags_t
def get_flags(unicode string):
cdef flags_t flags = 0
flags |= orth.is_alpha(string) << IS_ALPHA
flags |= orth.is_ascii(string) << IS_ASCII
flags |= orth.is_digit(string) << IS_DIGIT
flags |= orth.is_lower(string) << IS_LOWER
flags |= orth.is_punct(string) << IS_PUNCT
flags |= orth.is_space(string) << IS_SPACE
flags |= orth.is_title(string) << IS_TITLE
flags |= orth.is_upper(string) << IS_UPPER
flags |= orth.like_url(string) << LIKE_URL
flags |= orth.like_number(string) << LIKE_NUM
return flags