diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd index c17994462..6a249bf07 100644 --- a/spacy/lexeme.pxd +++ b/spacy/lexeme.pxd @@ -19,3 +19,6 @@ cdef LexemeC* lexeme_init(Pool mem, size_t i, unicode string, double prob, size_ cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id) cdef unicode lexeme_string_view(LexemeC* lexeme, size_t view_id) + + +cdef dict lexeme_pack(LexemeC* lexeme) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 604ec6181..238a954e7 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -34,3 +34,31 @@ cdef bint lexeme_check_flag(LexemeC* lexeme, size_t flag_id): cdef unicode lexeme_string_view(LexemeC* lexeme, size_t view_id): cdef bytes byte_string = lexeme.views[view_id] return byte_string.decode('utf8') + + +cdef dict lexeme_pack(LexemeC* lexeme): + cdef dict packed = {} + packed['i'] = lexeme.i + packed['length'] = lexeme.length + packed['prob'] = lexeme.prob + packed['cluster'] = lexeme.cluster + packed['string'] = lexeme.string.decode('utf8') + packed['views'] = [] + cdef size_t i = 0 + while lexeme.views[i] != NULL: + packed['views'].append(lexeme.views[i].decode('utf8')) + i += 1 + packed['flags'] = lexeme.flags + return packed + + +cdef int lexeme_unpack(LexemeC* lex, dict p) except -1: + cdef size_t length + lex.i = p['i'] + lex.length = p['length'] + lex.prob = p['prob'] + lex.cluster = p['cluster'] + lex.string = intern_and_encode(p['string'], &length) + for i, view in enumerate(p['views']): + lex.views[i] = intern_and_encode(view, &length) + lex.flags = p['flags']