From 9f3f07cab662c401716cd9248509e8067c9becfa Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 21 Dec 2014 11:29:11 +1100 Subject: [PATCH] * Add attrs file for English --- spacy/en/attrs.pxd | 32 ++++++++++++++++++++++++++++++++ spacy/en/attrs.pyx | 19 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 spacy/en/attrs.pxd create mode 100644 spacy/en/attrs.pyx diff --git a/spacy/en/attrs.pxd b/spacy/en/attrs.pxd new file mode 100644 index 000000000..3454e3368 --- /dev/null +++ b/spacy/en/attrs.pxd @@ -0,0 +1,32 @@ +from ..lexeme cimport FLAG0, FLAG1, FLAG2, FLAG3, FLAG4, FLAG5, FLAG6, FLAG7 +from ..lexeme cimport FLAG8, FLAG9 +from ..lexeme cimport ID as _ID +from ..lexeme cimport SIC as _SIC +from ..lexeme cimport SHAPE as _SHAPE +from ..lexeme cimport DENSE as _DENSE +from ..lexeme cimport SHAPE as _SHAPE +from ..lexeme cimport PREFIX as _PREFIX +from ..lexeme cimport SUFFIX as _SUFFIX +from ..lexeme cimport LEMMA as _LEMMA + + +# Work around the lack of global cpdef variables +cpdef enum: + IS_ALPHA = FLAG0 + IS_ASCII = FLAG1 + IS_DIGIT = FLAG2 + IS_LOWER = FLAG3 + IS_PUNCT = FLAG4 + IS_SPACE = FLAG5 + IS_TITLE = FLAG6 + IS_UPPER = FLAG7 + LIKE_URL = FLAG8 + LIKE_NUM = FLAG9 + + ID = _ID + SIC = _SIC + SHAPE = _DENSE + DENSE = _SHAPE + PREFIX = _PREFIX + SUFFIX = _SUFFIX + LEMMA = _LEMMA diff --git a/spacy/en/attrs.pyx b/spacy/en/attrs.pyx new file mode 100644 index 000000000..0ff7c43ed --- /dev/null +++ b/spacy/en/attrs.pyx @@ -0,0 +1,19 @@ +from .. import orth +from ..typedefs cimport flags_t + + +def get_flags(unicode string): + cdef flags_t flags = 0 + flags |= orth.is_alpha(string) << IS_ALPHA + flags |= orth.is_ascii(string) << IS_ASCII + flags |= orth.is_digit(string) << IS_DIGIT + flags |= orth.is_lower(string) << IS_LOWER + flags |= orth.is_punct(string) << IS_PUNCT + flags |= orth.is_space(string) << IS_SPACE + flags |= orth.is_title(string) << IS_TITLE + flags |= orth.is_upper(string) << IS_UPPER + flags |= orth.like_url(string) << LIKE_URL + flags |= orth.like_number(string) << LIKE_NUM + return flags + +