2020-03-02 10:48:10 +00:00
|
|
|
from numpy cimport ndarray
|
|
|
|
|
2015-07-18 20:39:57 +00:00
|
|
|
from .typedefs cimport attr_t, hash_t, flags_t, len_t, tag_t
|
2015-07-15 22:58:51 +00:00
|
|
|
from .attrs cimport attr_id_t
|
2016-03-10 12:01:34 +00:00
|
|
|
from .attrs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER, LANG
|
2017-05-17 10:04:50 +00:00
|
|
|
from .structs cimport LexemeC, SerializedLexemeC
|
2014-12-19 19:51:03 +00:00
|
|
|
from .strings cimport StringStore
|
2015-09-06 17:45:15 +00:00
|
|
|
from .vocab cimport Vocab
|
2014-10-09 08:53:30 +00:00
|
|
|
|
2015-01-17 05:21:17 +00:00
|
|
|
|
2015-01-11 23:26:22 +00:00
|
|
|
cdef LexemeC EMPTY_LEXEME
|
2014-09-10 18:41:37 +00:00
|
|
|
|
2015-01-12 00:23:44 +00:00
|
|
|
cdef class Lexeme:
|
2015-08-22 20:04:34 +00:00
|
|
|
cdef LexemeC* c
|
|
|
|
cdef readonly Vocab vocab
|
2015-01-22 15:08:25 +00:00
|
|
|
cdef readonly attr_t orth
|
2015-01-12 00:23:44 +00:00
|
|
|
|
2015-09-06 17:45:15 +00:00
|
|
|
@staticmethod
|
|
|
|
cdef inline Lexeme from_ptr(LexemeC* lex, Vocab vocab, int vector_length):
|
|
|
|
cdef Lexeme self = Lexeme.__new__(Lexeme, vocab, lex.orth)
|
|
|
|
self.c = lex
|
|
|
|
self.vocab = vocab
|
|
|
|
self.orth = lex.orth
|
2017-05-17 10:04:50 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
cdef inline SerializedLexemeC c_to_bytes(const LexemeC* lex) nogil:
|
|
|
|
cdef SerializedLexemeC lex_data
|
|
|
|
buff = <const unsigned char*>&lex.flags
|
2017-05-28 10:51:09 +00:00
|
|
|
end = <const unsigned char*>&lex.sentiment + sizeof(lex.sentiment)
|
2017-05-17 10:04:50 +00:00
|
|
|
for i in range(sizeof(lex_data.data)):
|
|
|
|
lex_data.data[i] = buff[i]
|
|
|
|
return lex_data
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
cdef inline void c_from_bytes(LexemeC* lex, SerializedLexemeC lex_data) nogil:
|
|
|
|
buff = <unsigned char*>&lex.flags
|
2017-05-28 12:06:40 +00:00
|
|
|
end = <unsigned char*>&lex.sentiment + sizeof(lex.sentiment)
|
2017-05-17 10:04:50 +00:00
|
|
|
for i in range(sizeof(lex_data.data)):
|
|
|
|
buff[i] = lex_data.data[i]
|
|
|
|
|
2015-09-06 17:45:15 +00:00
|
|
|
@staticmethod
|
|
|
|
cdef inline void set_struct_attr(LexemeC* lex, attr_id_t name, attr_t value) nogil:
|
|
|
|
if name < (sizeof(flags_t) * 8):
|
2015-09-15 03:06:18 +00:00
|
|
|
Lexeme.c_set_flag(lex, name, value)
|
2015-09-06 17:45:15 +00:00
|
|
|
elif name == ID:
|
|
|
|
lex.id = value
|
|
|
|
elif name == LOWER:
|
|
|
|
lex.lower = value
|
|
|
|
elif name == NORM:
|
|
|
|
lex.norm = value
|
|
|
|
elif name == SHAPE:
|
|
|
|
lex.shape = value
|
|
|
|
elif name == PREFIX:
|
|
|
|
lex.prefix = value
|
|
|
|
elif name == SUFFIX:
|
|
|
|
lex.suffix = value
|
|
|
|
elif name == CLUSTER:
|
|
|
|
lex.cluster = value
|
2016-03-10 12:01:34 +00:00
|
|
|
elif name == LANG:
|
|
|
|
lex.lang = value
|
2015-01-17 05:21:17 +00:00
|
|
|
|
2015-08-22 20:04:34 +00:00
|
|
|
@staticmethod
|
|
|
|
cdef inline attr_t get_struct_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
|
|
|
|
if feat_name < (sizeof(flags_t) * 8):
|
2015-09-15 03:06:18 +00:00
|
|
|
if Lexeme.c_check_flag(lex, feat_name):
|
2015-09-06 15:52:32 +00:00
|
|
|
return 1
|
|
|
|
else:
|
|
|
|
return 0
|
2015-08-22 20:04:34 +00:00
|
|
|
elif feat_name == ID:
|
|
|
|
return lex.id
|
|
|
|
elif feat_name == ORTH:
|
|
|
|
return lex.orth
|
|
|
|
elif feat_name == LOWER:
|
|
|
|
return lex.lower
|
|
|
|
elif feat_name == NORM:
|
|
|
|
return lex.norm
|
|
|
|
elif feat_name == SHAPE:
|
|
|
|
return lex.shape
|
|
|
|
elif feat_name == PREFIX:
|
|
|
|
return lex.prefix
|
|
|
|
elif feat_name == SUFFIX:
|
|
|
|
return lex.suffix
|
|
|
|
elif feat_name == LENGTH:
|
|
|
|
return lex.length
|
|
|
|
elif feat_name == CLUSTER:
|
|
|
|
return lex.cluster
|
2016-03-10 12:01:34 +00:00
|
|
|
elif feat_name == LANG:
|
|
|
|
return lex.lang
|
2015-08-22 20:04:34 +00:00
|
|
|
else:
|
|
|
|
return 0
|
2019-02-24 20:13:51 +00:00
|
|
|
|
2015-09-06 17:45:15 +00:00
|
|
|
@staticmethod
|
2015-09-15 03:06:18 +00:00
|
|
|
cdef inline bint c_check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
|
2015-09-10 12:45:43 +00:00
|
|
|
cdef flags_t one = 1
|
2015-09-15 03:06:18 +00:00
|
|
|
if lexeme.flags & (one << flag_id):
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
2015-09-06 17:45:15 +00:00
|
|
|
|
2015-08-23 18:49:18 +00:00
|
|
|
@staticmethod
|
2015-09-15 03:06:18 +00:00
|
|
|
cdef inline bint c_set_flag(LexemeC* lex, attr_id_t flag_id, bint value) nogil:
|
2015-08-23 18:49:18 +00:00
|
|
|
cdef flags_t one = 1
|
|
|
|
if value:
|
|
|
|
lex.flags |= one << flag_id
|
|
|
|
else:
|
|
|
|
lex.flags &= ~(one << flag_id)
|