From 0a0e41f6c8607a6d7ff43247e2c9aeb9434d9579 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 22 Oct 2014 12:56:09 +1100 Subject: [PATCH] * Add prefix and suffix features --- spacy/lexeme.pxd | 2 ++ spacy/lexeme.pyx | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd index e51273ecd..1a781d59c 100644 --- a/spacy/lexeme.pxd +++ b/spacy/lexeme.pxd @@ -26,6 +26,8 @@ cpdef enum LexStrs: LexStr_shape LexStr_unsparse LexStr_asciied + LexStr_pre + LexStr_suff LexStr_N diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index edbc8e027..0d5f0a0f5 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -24,6 +24,8 @@ cpdef dict get_lexeme_dict(size_t i, unicode string): strings[LexStr_shape] = orth.word_shape(string) strings[LexStr_unsparse] = strings[LexStr_shape] strings[LexStr_asciied] = orth.asciied(string) + strings[LexStr_pre] = string[0] + strings[LexStr_suff] = string[-3:] orth_flags = get_orth_flags(string) dist_flags = OOV_DIST_FLAGS @@ -98,7 +100,8 @@ cdef int lexeme_unpack(LexemeC* lex, dict p) except -1: for i, lex_float in enumerate(p['floats']): lex.floats[i] = lex_float cdef size_t _ - for i, lex_string in enumerate(p['strings']): + for i in range(LexStr_N): + lex_string = p['strings'][i] lex.strings[i] = intern_and_encode(lex_string, &_) lex.orth_flags = p['orth_flags'] lex.dist_flags = p['dist_flags']