From 6c807aa45feaeef40a3fb2fa24a97c13ddf5a750 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 31 Oct 2014 17:43:00 +1100 Subject: [PATCH] * Restore id attribute to lexeme, and rename pos field to postype, to store clustered tag dictionaries --- spacy/lexeme.pxd | 5 +++-- spacy/lexeme.pyx | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd index 066f05b20..76b236e5b 100644 --- a/spacy/lexeme.pxd +++ b/spacy/lexeme.pxd @@ -24,6 +24,7 @@ cpdef enum: cdef struct Lexeme: flag_t flags + id_t id id_t sic id_t norm id_t shape @@ -36,7 +37,7 @@ cdef struct Lexeme: len_t length tag_t cluster - tag_t pos + tag_t postype tag_t supersense @@ -44,7 +45,7 @@ cdef struct Lexeme: cdef Lexeme EMPTY_LEXEME -cpdef Lexeme init(unicode string, hash_t hashed, +cpdef Lexeme init(id_t i, unicode string, hash_t hashed, StringStore store, dict props) except * diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 98b8decff..6616cda47 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -26,14 +26,15 @@ def get_flags(unicode string, float upper_pc, float title_pc, float lower_pc): return flags -cpdef Lexeme init(unicode string, hash_t hashed, +cpdef Lexeme init(id_t i, unicode string, hash_t hashed, StringStore store, dict props) except *: cdef Lexeme lex + lex.id = i lex.length = len(string) lex.sic = get_string_id(string, store) lex.cluster = props.get('cluster', 0) - lex.pos = props.get('pos', 0) + lex.postype = props.get('postype', 0) lex.supersense = props.get('supersense', 0) lex.prob = props.get('prob', 0) @@ -55,6 +56,7 @@ cpdef Lexeme init(unicode string, hash_t hashed, lex.flags = get_flags(string, upper_pc, title_pc, lower_pc) return lex + cdef id_t get_string_id(unicode string, StringStore store) except 0: cdef bytes byte_string = string.encode('utf8') cdef Utf8Str* orig_str = store.intern(byte_string, len(byte_string))