mirror of https://github.com/explosion/spaCy.git
* Add attrs file for English
This commit is contained in:
parent
2a89d70429
commit
9f3f07cab6
|
@ -0,0 +1,32 @@
|
|||
from ..lexeme cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7
|
||||
from ..lexeme cimport FLAG8, FLAG9
|
||||
from ..lexeme cimport ID as _ID
|
||||
from ..lexeme cimport SIC as _SIC
|
||||
from ..lexeme cimport SHAPE as _SHAPE
|
||||
from ..lexeme cimport DENSE as _DENSE
|
||||
from ..lexeme cimport SHAPE as _SHAPE
|
||||
from ..lexeme cimport PREFIX as _PREFIX
|
||||
from ..lexeme cimport SUFFIX as _SUFFIX
|
||||
from ..lexeme cimport LEMMA as _LEMMA
|
||||
|
||||
|
||||
# Work around the lack of global cpdef variables
|
||||
cpdef enum:
|
||||
IS_ALPHA = FLAG0
|
||||
IS_ASCII = FLAG1
|
||||
IS_DIGIT = FLAG2
|
||||
IS_LOWER = FLAG3
|
||||
IS_PUNCT = FLAG4
|
||||
IS_SPACE = FLAG5
|
||||
IS_TITLE = FLAG6
|
||||
IS_UPPER = FLAG7
|
||||
LIKE_URL = FLAG8
|
||||
LIKE_NUM = FLAG9
|
||||
|
||||
ID = _ID
|
||||
SIC = _SIC
|
||||
SHAPE = _DENSE
|
||||
DENSE = _SHAPE
|
||||
PREFIX = _PREFIX
|
||||
SUFFIX = _SUFFIX
|
||||
LEMMA = _LEMMA
|
|
@ -0,0 +1,19 @@
|
|||
from .. import orth
|
||||
from ..typedefs cimport flags_t
|
||||
|
||||
|
||||
def get_flags(unicode string):
|
||||
cdef flags_t flags = 0
|
||||
flags |= orth.is_alpha(string) << IS_ALPHA
|
||||
flags |= orth.is_ascii(string) << IS_ASCII
|
||||
flags |= orth.is_digit(string) << IS_DIGIT
|
||||
flags |= orth.is_lower(string) << IS_LOWER
|
||||
flags |= orth.is_punct(string) << IS_PUNCT
|
||||
flags |= orth.is_space(string) << IS_SPACE
|
||||
flags |= orth.is_title(string) << IS_TITLE
|
||||
flags |= orth.is_upper(string) << IS_UPPER
|
||||
flags |= orth.like_url(string) << LIKE_URL
|
||||
flags |= orth.like_number(string) << LIKE_NUM
|
||||
return flags
|
||||
|
||||
|
Loading…
Reference in New Issue