2015-07-16 09:20:08 +00:00
|
|
|
from .typedefs cimport attr_t, hash_t, flags_t, id_t, len_t, tag_t
|
2015-07-15 22:58:51 +00:00
|
|
|
from .attrs cimport attr_id_t
|
2015-07-16 09:20:08 +00:00
|
|
|
from .attrs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
2015-07-15 22:58:51 +00:00
|
|
|
|
2015-01-11 23:26:22 +00:00
|
|
|
from .structs cimport LexemeC
|
2014-12-19 19:51:03 +00:00
|
|
|
from .strings cimport StringStore
|
2014-10-09 08:53:30 +00:00
|
|
|
|
2015-01-17 05:21:17 +00:00
|
|
|
from numpy cimport ndarray
|
|
|
|
|
|
|
|
|
2014-10-09 08:53:30 +00:00
|
|
|
|
2015-01-11 23:26:22 +00:00
|
|
|
cdef LexemeC EMPTY_LEXEME
|
2014-09-10 18:41:37 +00:00
|
|
|
|
2014-12-03 00:04:00 +00:00
|
|
|
|
2015-01-17 05:21:17 +00:00
|
|
|
cdef int set_lex_struct_props(LexemeC* lex, dict props, StringStore strings,
|
|
|
|
const float* empty_vec) except -1
|
2015-04-19 08:31:31 +00:00
|
|
|
|
2015-01-12 00:23:44 +00:00
|
|
|
cdef class Lexeme:
|
2015-01-22 15:08:25 +00:00
|
|
|
cdef readonly ndarray repvec
|
2015-01-12 00:23:44 +00:00
|
|
|
|
|
|
|
cdef readonly flags_t flags
|
|
|
|
cdef readonly attr_t id
|
|
|
|
cdef readonly attr_t length
|
|
|
|
|
2015-01-22 15:08:25 +00:00
|
|
|
cdef readonly attr_t orth
|
2015-01-23 19:17:03 +00:00
|
|
|
cdef readonly attr_t lower
|
|
|
|
cdef readonly attr_t norm
|
2015-01-17 05:21:17 +00:00
|
|
|
cdef readonly attr_t shape
|
|
|
|
cdef readonly attr_t prefix
|
|
|
|
cdef readonly attr_t suffix
|
|
|
|
|
2015-01-22 15:08:25 +00:00
|
|
|
cdef readonly unicode orth_
|
2015-01-23 19:17:03 +00:00
|
|
|
cdef readonly unicode lower_
|
|
|
|
cdef readonly unicode norm_
|
2015-01-17 05:21:17 +00:00
|
|
|
cdef readonly unicode shape_
|
|
|
|
cdef readonly unicode prefix_
|
|
|
|
cdef readonly unicode suffix_
|
2015-01-12 00:23:44 +00:00
|
|
|
|
|
|
|
cdef readonly attr_t cluster
|
|
|
|
cdef readonly float prob
|
|
|
|
cdef readonly float sentiment
|
2015-02-07 13:42:16 +00:00
|
|
|
cdef readonly float l2_norm
|
2015-01-12 00:23:44 +00:00
|
|
|
|
2015-01-17 05:21:17 +00:00
|
|
|
# Workaround for an apparent bug in the way the decorator is handled ---
|
|
|
|
# TODO: post bug report / patch to Cython.
|
|
|
|
@staticmethod
|
2015-06-05 14:26:39 +00:00
|
|
|
cdef inline Lexeme from_ptr(const LexemeC* ptr, StringStore strings, int repvec_length):
|
|
|
|
cdef Lexeme py = Lexeme.__new__(Lexeme, repvec_length)
|
|
|
|
for i in range(repvec_length):
|
2015-01-21 15:03:54 +00:00
|
|
|
py.repvec[i] = ptr.repvec[i]
|
2015-02-07 13:42:16 +00:00
|
|
|
py.l2_norm = ptr.l2_norm
|
2015-01-17 05:21:17 +00:00
|
|
|
py.flags = ptr.flags
|
|
|
|
py.id = ptr.id
|
|
|
|
py.length = ptr.length
|
|
|
|
|
2015-01-22 15:08:25 +00:00
|
|
|
py.orth = ptr.orth
|
2015-01-23 19:17:03 +00:00
|
|
|
py.lower = ptr.lower
|
|
|
|
py.norm = ptr.norm
|
2015-01-17 05:21:17 +00:00
|
|
|
py.shape = ptr.shape
|
|
|
|
py.prefix = ptr.prefix
|
|
|
|
py.suffix = ptr.suffix
|
|
|
|
|
2015-01-22 15:08:25 +00:00
|
|
|
py.orth_ = strings[ptr.orth]
|
2015-01-23 19:17:03 +00:00
|
|
|
py.lower_ = strings[ptr.lower]
|
|
|
|
py.norm_ = strings[ptr.norm]
|
2015-01-17 05:21:17 +00:00
|
|
|
py.shape_ = strings[ptr.shape]
|
|
|
|
py.prefix_ = strings[ptr.prefix]
|
|
|
|
py.suffix_ = strings[ptr.suffix]
|
2015-01-12 00:23:44 +00:00
|
|
|
|
2015-01-17 05:21:17 +00:00
|
|
|
py.cluster = ptr.cluster
|
|
|
|
py.prob = ptr.prob
|
|
|
|
py.sentiment = ptr.sentiment
|
|
|
|
return py
|
2015-01-12 00:23:44 +00:00
|
|
|
|
2015-02-07 13:42:16 +00:00
|
|
|
cpdef bint check(self, attr_id_t flag_id) except -1
|
2015-07-01 18:10:41 +00:00
|
|
|
|
2015-01-12 00:23:44 +00:00
|
|
|
|
2015-01-11 23:26:22 +00:00
|
|
|
cdef inline bint check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
|
2014-10-23 13:59:17 +00:00
|
|
|
return lexeme.flags & (1 << flag_id)
|
2014-12-03 00:04:00 +00:00
|
|
|
|
|
|
|
|
2015-01-11 23:26:22 +00:00
|
|
|
cdef inline attr_t get_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
|
2014-12-04 09:46:20 +00:00
|
|
|
if feat_name < (sizeof(flags_t) * 8):
|
|
|
|
return check_flag(lex, feat_name)
|
|
|
|
elif feat_name == ID:
|
|
|
|
return lex.id
|
2015-01-22 15:08:25 +00:00
|
|
|
elif feat_name == ORTH:
|
|
|
|
return lex.orth
|
2015-01-23 19:17:03 +00:00
|
|
|
elif feat_name == LOWER:
|
|
|
|
return lex.norm
|
|
|
|
elif feat_name == NORM:
|
|
|
|
return lex.norm
|
2014-12-04 09:46:20 +00:00
|
|
|
elif feat_name == SHAPE:
|
|
|
|
return lex.shape
|
|
|
|
elif feat_name == PREFIX:
|
|
|
|
return lex.prefix
|
|
|
|
elif feat_name == SUFFIX:
|
|
|
|
return lex.suffix
|
|
|
|
elif feat_name == LENGTH:
|
|
|
|
return lex.length
|
|
|
|
elif feat_name == CLUSTER:
|
|
|
|
return lex.cluster
|
|
|
|
else:
|
|
|
|
return 0
|