2014-12-07 12:52:41 +00:00
|
|
|
from thinc.typedefs cimport atom_t
|
|
|
|
|
|
|
|
from .lang cimport Language
|
|
|
|
from .tokens cimport Tokens
|
|
|
|
from .tokens cimport TokenC
|
|
|
|
|
2014-09-10 16:11:13 +00:00
|
|
|
|
2014-12-08 10:12:15 +00:00
|
|
|
cpdef enum en_person_t:
|
|
|
|
NO_PERSON
|
|
|
|
FIRST
|
|
|
|
SECOND
|
|
|
|
THIRD
|
2014-12-09 03:48:01 +00:00
|
|
|
NON_THIRD
|
2014-12-08 10:12:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
cpdef enum en_number_t:
|
|
|
|
NO_NUMBER
|
|
|
|
SINGULAR
|
|
|
|
PLURAL
|
|
|
|
MASS
|
|
|
|
|
|
|
|
|
|
|
|
cpdef enum en_gender_t:
|
|
|
|
NO_GENDER
|
|
|
|
MASCULINE
|
|
|
|
FEMININE
|
2014-12-09 03:48:01 +00:00
|
|
|
NEUTER
|
|
|
|
|
|
|
|
|
|
|
|
cpdef enum en_case_t:
|
|
|
|
NO_CASE
|
|
|
|
NOMINATIVE
|
|
|
|
GENITIVE
|
|
|
|
ACCUSATIVE
|
|
|
|
REFLEXIVE
|
|
|
|
DEMONYM
|
2014-12-08 10:12:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
cpdef enum en_tenspect_t:
|
|
|
|
NO_TENSE
|
|
|
|
BASE_VERB
|
|
|
|
PRESENT
|
|
|
|
PAST
|
|
|
|
PASSIVE
|
|
|
|
ING
|
|
|
|
MODAL
|
|
|
|
|
|
|
|
|
|
|
|
cpdef enum misc_t:
|
|
|
|
NO_MISC
|
|
|
|
COMPARATIVE
|
|
|
|
SUPERLATIVE
|
|
|
|
RELATIVE
|
|
|
|
NAME
|
|
|
|
|
|
|
|
|
2014-12-03 00:04:00 +00:00
|
|
|
# Flags
|
|
|
|
cpdef enum FlagID:
|
|
|
|
IS_ALPHA
|
|
|
|
IS_ASCII
|
|
|
|
IS_DIGIT
|
|
|
|
IS_LOWER
|
|
|
|
IS_PUNCT
|
|
|
|
IS_SPACE
|
|
|
|
IS_TITLE
|
|
|
|
IS_UPPER
|
|
|
|
|
|
|
|
LIKE_URL
|
|
|
|
LIKE_NUMBER
|
|
|
|
|
|
|
|
OFT_LOWER
|
|
|
|
OFT_TITLE
|
|
|
|
OFT_UPPER
|
|
|
|
|
|
|
|
IN_MALES
|
|
|
|
IN_FEMALES
|
|
|
|
IN_SURNAMES
|
|
|
|
IN_PLACES
|
|
|
|
IN_GAMES
|
|
|
|
IN_CELEBS
|
|
|
|
IN_NAMES
|
|
|
|
|
2014-09-10 16:11:13 +00:00
|
|
|
|
2014-12-07 12:52:41 +00:00
|
|
|
cpdef enum:
|
|
|
|
P2_sic
|
|
|
|
P2_cluster
|
|
|
|
P2_shape
|
|
|
|
P2_prefix
|
|
|
|
P2_suffix
|
|
|
|
P2_pos
|
2014-12-09 21:08:55 +00:00
|
|
|
P2_lemma
|
|
|
|
P2_pos_type
|
2014-12-07 12:52:41 +00:00
|
|
|
|
|
|
|
P1_sic
|
|
|
|
P1_cluster
|
|
|
|
P1_shape
|
|
|
|
P1_prefix
|
|
|
|
P1_suffix
|
|
|
|
P1_pos
|
2014-12-09 21:08:55 +00:00
|
|
|
P1_lemma
|
|
|
|
P1_pos_type
|
2014-12-07 12:52:41 +00:00
|
|
|
|
|
|
|
W_sic
|
|
|
|
W_cluster
|
|
|
|
W_shape
|
|
|
|
W_prefix
|
|
|
|
W_suffix
|
|
|
|
W_pos
|
2014-12-09 21:08:55 +00:00
|
|
|
W_lemma
|
|
|
|
W_pos_type
|
2014-12-07 12:52:41 +00:00
|
|
|
|
|
|
|
N1_sic
|
|
|
|
N1_cluster
|
|
|
|
N1_shape
|
|
|
|
N1_prefix
|
|
|
|
N1_suffix
|
|
|
|
N1_pos
|
2014-12-09 21:08:55 +00:00
|
|
|
N1_lemma
|
|
|
|
N1_pos_type
|
2014-12-07 12:52:41 +00:00
|
|
|
|
|
|
|
N2_sic
|
|
|
|
N2_cluster
|
|
|
|
N2_shape
|
|
|
|
N2_prefix
|
|
|
|
N2_suffix
|
|
|
|
N2_pos
|
2014-12-09 21:08:55 +00:00
|
|
|
N2_lemma
|
|
|
|
N2_pos_type
|
2014-12-07 12:52:41 +00:00
|
|
|
|
|
|
|
N_CONTEXT_FIELDS
|
|
|
|
|
|
|
|
|
2014-08-27 15:15:39 +00:00
|
|
|
cdef class English(Language):
|
2014-12-19 14:42:09 +00:00
|
|
|
cdef int is_base_np_end(self, const TokenC* token) except -1
|
|
|
|
cdef int is_outside_base_np(self, const TokenC* token) except -1
|