from .typedefs cimport attr_t, hash_t, flags_t, len_t, tag_t from .attrs cimport attr_id_t from .attrs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER, LANG from .structs cimport LexemeC, SerializedLexemeC from .strings cimport StringStore from .vocab cimport Vocab from numpy cimport ndarray cdef LexemeC EMPTY_LEXEME cdef class Lexeme: cdef LexemeC* c cdef readonly Vocab vocab cdef readonly attr_t orth @staticmethod cdef inline Lexeme from_ptr(LexemeC* lex, Vocab vocab, int vector_length): cdef Lexeme self = Lexeme.__new__(Lexeme, vocab, lex.orth) self.c = lex self.vocab = vocab self.orth = lex.orth @staticmethod cdef inline SerializedLexemeC c_to_bytes(const LexemeC* lex) nogil: cdef SerializedLexemeC lex_data buff = <const unsigned char*>&lex.flags end = <const unsigned char*>&lex.sentiment + sizeof(lex.sentiment) for i in range(sizeof(lex_data.data)): lex_data.data[i] = buff[i] return lex_data @staticmethod cdef inline void c_from_bytes(LexemeC* lex, SerializedLexemeC lex_data) nogil: buff = <unsigned char*>&lex.flags end = <unsigned char*>&lex.sentiment + sizeof(lex.sentiment) for i in range(sizeof(lex_data.data)): buff[i] = lex_data.data[i] @staticmethod cdef inline void set_struct_attr(LexemeC* lex, attr_id_t name, attr_t value) nogil: if name < (sizeof(flags_t) * 8): Lexeme.c_set_flag(lex, name, value) elif name == ID: lex.id = value elif name == LOWER: lex.lower = value elif name == NORM: lex.norm = value elif name == SHAPE: lex.shape = value elif name == PREFIX: lex.prefix = value elif name == SUFFIX: lex.suffix = value elif name == CLUSTER: lex.cluster = value elif name == LANG: lex.lang = value @staticmethod cdef inline attr_t get_struct_attr(const LexemeC* lex, attr_id_t feat_name) nogil: if feat_name < (sizeof(flags_t) * 8): if Lexeme.c_check_flag(lex, feat_name): return 1 else: return 0 elif feat_name == ID: return lex.id elif feat_name == ORTH: return lex.orth elif feat_name == LOWER: return lex.lower elif feat_name == NORM: return lex.norm elif feat_name == SHAPE: return lex.shape elif feat_name == PREFIX: return lex.prefix elif feat_name == SUFFIX: return lex.suffix elif feat_name == LENGTH: return lex.length elif feat_name == CLUSTER: return lex.cluster elif feat_name == LANG: return lex.lang else: return 0 @staticmethod cdef inline bint c_check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil: cdef flags_t one = 1 if lexeme.flags & (one << flag_id): return True else: return False @staticmethod cdef inline bint c_set_flag(LexemeC* lex, attr_id_t flag_id, bint value) nogil: cdef flags_t one = 1 if value: lex.flags |= one << flag_id else: lex.flags &= ~(one << flag_id)