* Add prefix and suffix features

This commit is contained in:
Matthew Honnibal 2014-10-22 12:56:09 +11:00
parent 7018b53d3a
commit 0a0e41f6c8
2 changed files with 6 additions and 1 deletions

View File

@ -26,6 +26,8 @@ cpdef enum LexStrs:
LexStr_shape
LexStr_unsparse
LexStr_asciied
LexStr_pre
LexStr_suff
LexStr_N

View File

@ -24,6 +24,8 @@ cpdef dict get_lexeme_dict(size_t i, unicode string):
strings[<int>LexStr_shape] = orth.word_shape(string)
strings[<int>LexStr_unsparse] = strings[<int>LexStr_shape]
strings[<int>LexStr_asciied] = orth.asciied(string)
strings[<int>LexStr_pre] = string[0]
strings[<int>LexStr_suff] = string[-3:]
orth_flags = get_orth_flags(string)
dist_flags = OOV_DIST_FLAGS
@ -98,7 +100,8 @@ cdef int lexeme_unpack(LexemeC* lex, dict p) except -1:
for i, lex_float in enumerate(p['floats']):
lex.floats[i] = lex_float
cdef size_t _
for i, lex_string in enumerate(p['strings']):
for i in range(LexStr_N):
lex_string = p['strings'][i]
lex.strings[i] = intern_and_encode(lex_string, &_)
lex.orth_flags = p['orth_flags']
lex.dist_flags = p['dist_flags']