diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd index e51273ecd..1a781d59c 100644 --- a/spacy/lexeme.pxd +++ b/spacy/lexeme.pxd @@ -26,6 +26,8 @@ cpdef enum LexStrs: LexStr_shape LexStr_unsparse LexStr_asciied + LexStr_pre + LexStr_suff LexStr_N diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index edbc8e027..0d5f0a0f5 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -24,6 +24,8 @@ cpdef dict get_lexeme_dict(size_t i, unicode string): strings[LexStr_shape] = orth.word_shape(string) strings[LexStr_unsparse] = strings[LexStr_shape] strings[LexStr_asciied] = orth.asciied(string) + strings[LexStr_pre] = string[0] + strings[LexStr_suff] = string[-3:] orth_flags = get_orth_flags(string) dist_flags = OOV_DIST_FLAGS @@ -98,7 +100,8 @@ cdef int lexeme_unpack(LexemeC* lex, dict p) except -1: for i, lex_float in enumerate(p['floats']): lex.floats[i] = lex_float cdef size_t _ - for i, lex_string in enumerate(p['strings']): + for i in range(LexStr_N): + lex_string = p['strings'][i] lex.strings[i] = intern_and_encode(lex_string, &_) lex.orth_flags = p['orth_flags'] lex.dist_flags = p['dist_flags']